Exemple #1
0
def _resolve_movie(sapo_id, sapo_title, sapo_description):
    """Resolve movie based on id, title and description"""
    id_alias = Movie.from_pymongo(
        ms.get_movie_alias_by_id(sapo_id))  # Movie alias based on id
    if id_alias is None:
        same_titles = Movie.from_pymongo(
            ms.get_movie_in_db_by_title(sapo_title))  # Search by title
        title_aliases = Movie.from_pymongo(
            ms.get_movie_alias_by_title(sapo_title))  # Search by title aliases
        alias_candidates = same_titles + list(
            filter(lambda e: e.sapo_id not in [x.sapo_id for x in same_titles],
                   title_aliases))
        for alias_candidate in alias_candidates:

            if SequenceMatcher(None, alias_candidate.sapo_description,
                               sapo_description).ratio() > 0.5:
                return alias_candidate  # Match found based on title

            for alias_of in ms.get_alias_movie_by_aliasof(
                    alias_candidate.sapo_id):
                if (alias_of['sapo_title'] == sapo_title or alias_of['sapo_title'] in alias_candidate.alias_titles) \
                        and SequenceMatcher(None, alias_of['sapo_description'], sapo_description).ratio() > 0.5:
                    return alias_candidate  # Match found based on alias

    else:
        return id_alias  # Match found based on id

    return None
import services.movie_service as ms
from models.movie import Movie
from configs.config import db
from bson.json_util import dumps
from difflib import SequenceMatcher
import json
import sys
import uuid

if __name__ == '__main__':
    movies = Movie.from_pymongo(db.movie.find({}))
    repeated = []
    for movie in movies:
        found = Movie.from_pymongo(
            db.movie.find_one({
                'imdb_id': movie.imdb_id,
                'sapo_id': {
                    '$ne': movie.sapo_id
                }
            }))
        if found is not None:
            if found.sapo_id not in repeated:
                repeated.append(movie.sapo_id)
                repeated.append(found.sapo_id)

                print('\n')
                print('*** IMDb movie ID: {} ***'.format(movie.imdb_id))
                print('[1] Sapo ID: {}'.format(movie.sapo_id))
                print('    Sapo title: {}. IMDb title: {}'.format(
                    movie.sapo_title, movie.imdb_title))
                print('    Sapo Description: {}'.format(