def process(items, languages, games, processed_ids: Set): item_count = len(items) logging.info("[PROCESS] processing items: %d", item_count) filtered_by_processed_items = filter_with_logging( lambda x: x["id"] if type(x["id"]) == str else x["id"]["videoId"] not in processed_ids, "repeated items", (items, item_count)) processed_ids.update( x["id"] if type(x["id"]) == str else x["id"]["videoId"] for x in filtered_by_processed_items[0]) filtered_by_having_snippet = filter_with_logging( has_snippet, "have no snippet", filtered_by_processed_items) filtered_by_language = filter_with_logging( lambda x: isVideoInCorrectLanguage(languages, x), "incorrect languages videos", filtered_by_having_snippet) loop = asyncio.get_event_loop() titles = loop.run_until_complete( asyncio.gather(*(getVideoGameTitle(item["id"] if type(item["id"]) == str else item["id"]["videoId"]) for item in filtered_by_language[0]))) filtered_by_game_title = filter_with_logging( has_correct_label(games), "incorrect or undefined titles", (list(map(add_title, zip( titles, filtered_by_language[0]))), filtered_by_language[1])) if filtered_by_game_title[1] / item_count < MINIMUM_PROCESS_EFFECIENCY: raise RuntimeError("Too low items go through filtering") return filtered_by_game_title[0]
def test_parse_language_whitespace_description(self): with open('src//tests//resources//whitespaceDescription.json', 'r', encoding='utf-8') as file: whitespaceDescription = json.load(file) self.assertFalse( isVideoInCorrectLanguage(["en"], whitespaceDescription))
def test_parse_language_dot_description(self): with open('src//tests//resources//basicItem.json', 'r', encoding='utf-8') as file: basic_data = json.load(file) basic_data["snippet"]["description"] = '.' self.assertFalse(isVideoInCorrectLanguage(["en"], basic_data))
def test_parse_language_for_english(self): with open('src//tests//resources//basicEnglish.json', 'r', encoding='utf-8') as file: basicEnglish = json.load(file) self.assertTrue(isVideoInCorrectLanguage(["en"], basicEnglish))
def test_parse_language(self): with open('src//tests//resources//noGameTitleItem.json', 'r', encoding='utf-8') as file: advanced_data = json.load(file) self.assertFalse(isVideoInCorrectLanguage(["en"], advanced_data))