Exemple #1
0
def build_collection_index(movies, tvshows):
    start = time.time()

    entities = list(itertools.chain.from_iterable([movies, tvshows]))
    values = list(set([
        parse_collection(strip_accents(entity['set']))
        for entity in entities if 'set' in entity and len(entity['set']) > 0
    ]))

    mapped_entities = {}
    for entity in entities:
        if 'set' in entity and entity['set']:
            value = parse_collection(strip_accents(entity['set']))
            if value not in mapped_entities:
                mapped_entities[value] = []

            mapped_entities[value].append(entity)

    logger.debug(u'Iterating collection took {} ms'.format(int((time.time() - start) * 1000)))

    start = time.time()
    index = NGram(items=values, key=lambda x: x.lower())
    logger.debug(u'Building collection index took {} ms'.format(int((time.time() - start) * 1000)))

    return index, mapped_entities
Exemple #2
0
def build_title_index(movies, tvshows):
    start = time.time()

    entities = list(itertools.chain.from_iterable([movies, tvshows]))
    values = [strip_accents(entity['title']) for entity in entities]

    mapped_entities = {}
    for entity in entities:
        value = strip_accents(entity['title'])
        if value not in mapped_entities:
            mapped_entities[value] = []

        mapped_entities[value].append(entity)

    logger.debug(u'Iterating title took {} ms'.format(int((time.time() - start) * 1000)))

    start = time.time()
    index = NGram(items=values, key=lambda x: x.lower())
    logger.debug(u'Building title index took {} ms'.format(int((time.time() - start) * 1000)))

    return index, mapped_entities
Exemple #3
0
def build_cast_index(movies, tvshows, key):
    start = time.time()

    entities = list(itertools.chain.from_iterable([movies, tvshows]))
    values = [[strip_accents(cast[key]) for cast in entity['cast']] for entity in entities]
    values = list(set(itertools.chain.from_iterable(values)))

    mapped_entities = {}
    for entity in entities:
        for cast in entity['cast']:
            value = strip_accents(cast[key])
            if value not in mapped_entities:
                mapped_entities[value] = []

            mapped_entities[value].append(entity)

    logger.debug(u'Iterating {} took {} ms'.format(key, int((time.time() - start) * 1000)))

    start = time.time()
    index = NGram(items=values, key=lambda x: x.lower())
    logger.debug(u'Building {} index took {} ms'.format(key, int((time.time() - start) * 1000)))

    return index, mapped_entities
Exemple #4
0
    def _find_by(self, filter_value, value_type):
        index = self.compose_index[value_type]['ix']
        value_map = self.compose_index[value_type]['map']
        threshold = self.compose_index[value_type]['threshold']

        similar_values = index.search(strip_accents(filter_value).lower())
        similar_values = [(value, score) for value, score in similar_values if score > threshold]
        logger.debug(similar_values)

        matched_entities = [(value_map[value], score) for value, score in similar_values]
        matched_entities = [[(entity, score) for entity in entities] for entities, score in matched_entities]
        matched_entities = list(itertools.chain.from_iterable(matched_entities))

        return matched_entities
    def test_library_index(self):
        values = [
            "Padre no hay más que uno",
            "Élite",
            "Pequeñas mentirosas",
            "Capitán América: El primer vengador",
            "Alita: Ángel del combate",
            "Animales fantásticos y dónde encontrarlos",
            "Animales fantásticos: Los crímenes de Grindelwald",
            "Cafarnaúm",
            "El Camino: Una película de Breaking Bad",
            "Cómo entrenar a tu dragón",
        ]

        index, _ = build_title_index([dict(title=value) for value in values],
                                     [])

        self.assertEqual(
            index.search(
                strip_accents("Capitán América: El primer vengador").lower())
            [0][1], 1.0)
Exemple #6
0
def build_genre_index(movies, tvshows):
    start = time.time()

    entities = list(itertools.chain.from_iterable([movies, tvshows]))
    values = list(set(filter(
        strip_accents,
        itertools.chain.from_iterable([entity['genre'] for entity in entities])
    )))

    mapped_entities = {}
    for entity in entities:
        for genre in [strip_accents(genre) for genre in entity['genre']]:
            if genre not in mapped_entities:
                mapped_entities[genre] = []

            mapped_entities[genre].append(entity)

    logger.debug(u'Iterating genre took {} ms'.format(int((time.time() - start) * 1000)))

    start = time.time()
    index = NGram(items=values, key=lambda x: x.lower())
    logger.debug(u'Building genre index took {} ms'.format(int((time.time() - start) * 1000)))

    return index, mapped_entities
 def test_strip_accent(self):
     self.assertEqual("Padre no hay mas que uno",
                      strip_accents("Padre no hay más que uno"))
     self.assertEqual("Elite", strip_accents("Élite"))
     self.assertEqual("Pequenas mentirosas",
                      strip_accents("Pequeñas mentirosas"))
     self.assertEqual("Capitan America: El primer vengador",
                      strip_accents("Capitán América: El primer vengador"))
     self.assertEqual("Aladdin", strip_accents("Aladdín"))
     self.assertEqual("Alita: Angel del combate",
                      strip_accents("Alita: Ángel del combate"))
     self.assertEqual(
         "Animales fantasticos y donde encontrarlos",
         strip_accents("Animales fantásticos y dónde encontrarlos"))
     self.assertEqual(
         "Animales fantasticos: Los crimenes de Grindelwald",
         strip_accents("Animales fantásticos: Los crímenes de Grindelwald"))
     self.assertEqual("Cafarnaum", strip_accents("Cafarnaúm"))
     self.assertEqual(
         "El Camino: Una pelicula de Breaking Bad",
         strip_accents("El Camino: Una película de Breaking Bad"))
     self.assertEqual("Como entrenar a tu dragon",
                      strip_accents("Cómo entrenar a tu dragón"))
     self.assertEqual("Erase una vez en Hollywood",
                      strip_accents("Érase una vez en… Hollywood"))