def retrieve_movie_from_id(movie_id): """ Retrieve movie info from IMDB by movie id. :param movie_id: original title of the film to retrieve info :type movie_id: string :return: Movie's key :rtype: ndb.Key :raise RetrieverError: if there is an error from MYAPIFILMS """ logging.info('Retrieving %s', movie_id) url = BASE_URL_MYAPIFILMS + 'imdb?idIMDB=' + movie_id + '&format=JSON&aka=1&business=0&seasons=0&seasonYear=0&technical=0&filter=N&exactFilter=0&limit=1&lang=en-us&actors=S&biography=0&trailer=1&uniqueName=0&filmography=0&bornDied=0&starSign=0&actorActress=0&actorTrivia=0&movieTrivia=0&awards=0&token=307cccfe-d20b-4b69-b976-d6a024538864' json_page = get(url).encode('utf-8') json_data = json.loads(json_page) movie = Movie(id=json_data['idIMDB'], plot=json_data['plot'], poster=clear_url(json_data['urlPoster']) if ('urlPoster' in json_data and json_data['urlPoster'] != "") else None, rated=json_data['rated'], simple_plot=json_data['simplePlot'], genres=json_data['genres']) try: trailer_url = json_data['trailer']['videoURL'] movie.trailer = trailer_url except KeyError: movie.trailer = None movie.original_title = json_data['title'] akas = json_data['akas'] for aka in akas: if aka['country'] == 'Italy': movie.title = aka['title'] run_times = json_data['runtime'] if len(run_times) == 0: movie.run_times = None else: movie.run_times = run_times[0] year = json_data['year'] if len(year) > 4: year = year[-4:] movie.year = year key = movie.put() actors_list = json_data['actors'] directors_list = json_data['directors'] writers_list = json_data['writers'] retrieve_artists(movie, actors_list, directors_list, writers_list) logging.info('Retrieved %s', movie_id) return key
def retrieve_movie_from_title(movie_original_title, movie_director, movie_cast, movie_title=None, movie_url=None, movie_year=None, movie_genre=None): """ Retrieve movie info from IMDB by movie title. :param movie_title: title of the film to retrieve info :type movie_title: string :param movie_original_title: original title of the film to retrieve info :type movie_original_title: string :param movie_director: director of the film to retrieve info :type movie_director: string :param movie_genre: genre of the film to retrieve info :type movie_genre: string :return: Movie's key :rtype: ndb.Key :raise RetrieverError: if there is an error from MYAPIFILMS """ logging.info('Retrieving %s', movie_original_title) url = BASE_URL_MYAPIFILMS + 'imdb?title=' + movie_original_title + '&format=JSON&aka=0&business=0&seasons=0&seasonYear=' + movie_year + '&technical=0&filter=M&exactFilter=0&limit=1&lang=en-us&actors=S&biography=0&trailer=1&uniqueName=0&filmography=0&bornDied=0&starSign=0&actorActress=0&actorTrivia=0&movieTrivia=0&awards=0&token=307cccfe-d20b-4b69-b976-d6a024538864' logging.info('Url My API Films: %s', url) json_page = get(url).encode('utf-8') json_data = json.loads(json_page) if type(json_data) is not list: # If it is not a list there is a problem logging.info('Movie not found in IMDB.') for x in range(26, len(movie_url)): if movie_url[x] == "/": end = x break movie_id = movie_url[26: end] movie = Movie(id=movie_id, year=movie_year, original_title=movie_original_title, title=movie_title, genres=[movie_genre]) actors_string = movie_cast directors_list = movie_director writers_list = [] #print actors_string actors_list = [] begin = 0 count = 0 for i in actors_string: count += 1 if i == "," or count == len(actors_string) - 1: actors_list.append(actors_string[begin:count - 1]) begin = count + 1 search_artist_from_name(actors_list[len(actors_list) - 1], movie) for director_name in directors_list: search_artist_from_name(actors_list[len(actors_list) - 1], movie, director_name) html_page_plot = get(movie_url).encode('utf-8') tree = lxml.html.fromstring(html_page_plot) try: movie.plot_it = tree.xpath('//article[@class="scheda-desc"]/p/text()')[0] except IndexError: logging.error('Impossible to retrieve info from FilmTV') pass movie.put() else: directors_list = json_data[0]['directors'] #print movie_director #prova = directors_list[0]['name'].encode('utf-8') #print prova if (movie_director in directors_list[0]['name'].encode('utf-8')) or (directors_list[0]['name'].encode('utf-8') in movie_director): movie = Movie(id=json_data[0]['idIMDB'], plot=json_data[0]['plot'], poster=clear_url(json_data[0]['urlPoster']), rated=json_data[0]['rated'], simple_plot=json_data[0]['simplePlot'], genres=json_data[0]['genres']) try: trailer_url = json_data[0]['trailer']['videoURL'] movie.trailer = trailer_url except KeyError: movie.trailer = None movie.title = movie_title movie.original_title = movie_original_title run_times = json_data[0]['runtime'] if len(run_times) == 0: movie.run_times = None else: movie.run_times = run_times[0] year = json_data[0]['year'] if len(year) > 4: year = year[-4:] movie.year = year actors_list = json_data[0]['actors'] writers_list = json_data[0]['writers'] retrieve_artists(movie, actors_list, directors_list, writers_list) logging.info('Url FilmTV: %s', movie_url) html_page_plot = get(movie_url).encode('utf-8') tree = lxml.html.fromstring(html_page_plot) try: movie.plot_it = tree.xpath('//article[@class="scheda-desc"]/p/text()')[0] except IndexError: logging.error('Impossible to retrieve info from FilmTV') pass movie.put() else: logging.info("FilmTV movie is not the same with retrieved movie in IMDB!") for x in range(26, len(movie_url)): if movie_url[x] == "/": end = x break movie_id = movie_url[26: end] #print movie_id movie = Movie(id=movie_id, genres=[movie_genre], year=movie_year, original_title=movie_original_title, title=movie_title) actors_string = movie_cast directors_list = movie_director writers_list = [] #print actors_string actors_list = [] begin = 0 count = 0 if actors_string is not None: for i in actors_string: count += 1 if i == "," or count == len(actors_string) - 1: actors_list.append(actors_string[begin:count - 1]) begin = count + 1 search_artist_from_name(actors_list[len(actors_list) - 1], movie) if directors_list is not None: for director_name in directors_list: search_artist_from_name(actors_list[len(actors_list) - 1], movie, director_name) html_page_plot = get(movie_url).encode('utf-8') tree = lxml.html.fromstring(html_page_plot) try: movie.plot_it = tree.xpath('//article[@class="scheda-desc"]/p/text()')[0] except IndexError: logging.error('Impossible to retrieve info from FilmTV') pass key = movie.put() logging.info('Retrieved %s', movie_original_title) return key