예제 #1
0
def process(items, languages, games, processed_ids: Set):
    item_count = len(items)
    logging.info("[PROCESS] processing items: %d", item_count)

    filtered_by_processed_items = filter_with_logging(
        lambda x: x["id"]
        if type(x["id"]) == str else x["id"]["videoId"] not in processed_ids,
        "repeated items", (items, item_count))

    processed_ids.update(
        x["id"] if type(x["id"]) == str else x["id"]["videoId"]
        for x in filtered_by_processed_items[0])
    filtered_by_having_snippet = filter_with_logging(
        has_snippet, "have no snippet", filtered_by_processed_items)

    filtered_by_language = filter_with_logging(
        lambda x: isVideoInCorrectLanguage(languages, x),
        "incorrect languages videos", filtered_by_having_snippet)

    loop = asyncio.get_event_loop()
    titles = loop.run_until_complete(
        asyncio.gather(*(getVideoGameTitle(item["id"] if type(item["id"]) ==
                                           str else item["id"]["videoId"])
                         for item in filtered_by_language[0])))

    filtered_by_game_title = filter_with_logging(
        has_correct_label(games), "incorrect or undefined titles",
        (list(map(add_title, zip(
            titles, filtered_by_language[0]))), filtered_by_language[1]))
    if filtered_by_game_title[1] / item_count < MINIMUM_PROCESS_EFFECIENCY:
        raise RuntimeError("Too low items go through filtering")
    return filtered_by_game_title[0]
예제 #2
0
 def test_parse_language_whitespace_description(self):
     with open('src//tests//resources//whitespaceDescription.json',
               'r',
               encoding='utf-8') as file:
         whitespaceDescription = json.load(file)
     self.assertFalse(
         isVideoInCorrectLanguage(["en"], whitespaceDescription))
예제 #3
0
 def test_parse_language_dot_description(self):
     with open('src//tests//resources//basicItem.json',
               'r',
               encoding='utf-8') as file:
         basic_data = json.load(file)
     basic_data["snippet"]["description"] = '.'
     self.assertFalse(isVideoInCorrectLanguage(["en"], basic_data))
예제 #4
0
 def test_parse_language_for_english(self):
     with open('src//tests//resources//basicEnglish.json',
               'r',
               encoding='utf-8') as file:
         basicEnglish = json.load(file)
     self.assertTrue(isVideoInCorrectLanguage(["en"], basicEnglish))
예제 #5
0
 def test_parse_language(self):
     with open('src//tests//resources//noGameTitleItem.json',
               'r',
               encoding='utf-8') as file:
         advanced_data = json.load(file)
     self.assertFalse(isVideoInCorrectLanguage(["en"], advanced_data))