def _resolve_movie(sapo_id, sapo_title, sapo_description): """Resolve movie based on id, title and description""" id_alias = Movie.from_pymongo( ms.get_movie_alias_by_id(sapo_id)) # Movie alias based on id if id_alias is None: same_titles = Movie.from_pymongo( ms.get_movie_in_db_by_title(sapo_title)) # Search by title title_aliases = Movie.from_pymongo( ms.get_movie_alias_by_title(sapo_title)) # Search by title aliases alias_candidates = same_titles + list( filter(lambda e: e.sapo_id not in [x.sapo_id for x in same_titles], title_aliases)) for alias_candidate in alias_candidates: if SequenceMatcher(None, alias_candidate.sapo_description, sapo_description).ratio() > 0.5: return alias_candidate # Match found based on title for alias_of in ms.get_alias_movie_by_aliasof( alias_candidate.sapo_id): if (alias_of['sapo_title'] == sapo_title or alias_of['sapo_title'] in alias_candidate.alias_titles) \ and SequenceMatcher(None, alias_of['sapo_description'], sapo_description).ratio() > 0.5: return alias_candidate # Match found based on alias else: return id_alias # Match found based on id return None
import services.movie_service as ms from models.movie import Movie from configs.config import db from bson.json_util import dumps from difflib import SequenceMatcher import json import sys import uuid if __name__ == '__main__': movies = Movie.from_pymongo(db.movie.find({})) repeated = [] for movie in movies: found = Movie.from_pymongo( db.movie.find_one({ 'imdb_id': movie.imdb_id, 'sapo_id': { '$ne': movie.sapo_id } })) if found is not None: if found.sapo_id not in repeated: repeated.append(movie.sapo_id) repeated.append(found.sapo_id) print('\n') print('*** IMDb movie ID: {} ***'.format(movie.imdb_id)) print('[1] Sapo ID: {}'.format(movie.sapo_id)) print(' Sapo title: {}. IMDb title: {}'.format( movie.sapo_title, movie.imdb_title)) print(' Sapo Description: {}'.format(