def test_no_year(self): # Make sure parser doesn't crash for movies with no year parser = ImdbParser() parser.parse('tt3303790') assert parser.name == 'Master of None' # There is no year assert not parser.year
def test_no_plot(self): # Make sure parser doesn't crash for movies with no plot parser = ImdbParser() parser.parse('tt1300562') assert parser.name == 'Goodbye Mothers' # There is no plot assert not parser.plot_outline
def _parse_new_movie(self, imdb_url, session): """ Get Movie object by parsing imdb page and save movie into the database. :param imdb_url: IMDB url :param session: Session to be used :return: Newly added Movie """ parser = ImdbParser() parser.parse(imdb_url) # store to database movie = db.Movie() movie.photo = parser.photo movie.title = parser.name movie.original_title = parser.original_name movie.score = parser.score movie.votes = parser.votes movie.meta_score = parser.meta_score movie.year = parser.year movie.mpaa_rating = parser.mpaa_rating movie.plot_outline = parser.plot_outline movie.url = imdb_url for name in parser.genres: genre = session.query(db.Genre).filter(db.Genre.name == name).first() if not genre: genre = db.Genre(name) movie.genres.append(genre) # pylint:disable=E1101 for index, name in enumerate(parser.languages): language = session.query(db.Language).filter(db.Language.name == name).first() if not language: language = db.Language(name) movie.languages.append(db.MovieLanguage(language, prominence=index)) for imdb_id, name in parser.actors.items(): actor = session.query(db.Actor).filter(db.Actor.imdb_id == imdb_id).first() if not actor: actor = db.Actor(imdb_id, name) movie.actors.append(actor) # pylint:disable=E1101 for imdb_id, name in parser.directors.items(): director = session.query(db.Director).filter(db.Director.imdb_id == imdb_id).first() if not director: director = db.Director(imdb_id, name) movie.directors.append(director) # pylint:disable=E1101 for imdb_id, name in parser.writers.items(): writer = session.query(db.Writer).filter(db.Writer.imdb_id == imdb_id).first() if not writer: writer = db.Writer(imdb_id, name) movie.writers.append(writer) # pylint:disable=E1101 for name in parser.plot_keywords: plot_keyword = ( session.query(db.PlotKeyword).filter(db.PlotKeyword.name == name).first() ) if not plot_keyword: plot_keyword = db.PlotKeyword(name) movie.plot_keywords.append(plot_keyword) # pylint:disable=E1101 # so that we can track how long since we've updated the info later movie.updated = datetime.now() session.add(movie) return movie
def test_parsed_data(self): parser = ImdbParser() parser.parse('tt0114814') assert parser.actors == { 'nm0000592': 'Pete Postlethwaite', 'nm0261452': 'Christine Estabrook', 'nm0000751': 'Suzy Amis', 'nm0000286': 'Stephen Baldwin', 'nm0000445': 'Dan Hedaya', 'nm0800339': 'Phillipe Simon', 'nm0002064': 'Giancarlo Esposito', 'nm0001590': 'Chazz Palminteri', 'nm0000321': 'Gabriel Byrne', 'nm0790436': 'Jack Shearer', 'nm0000228': 'Kevin Spacey', 'nm0001629': 'Kevin Pollak', 'nm0107808': 'Carl Bressler', 'nm0001125': 'Benicio Del Toro', 'nm0000860': 'Paul Bartel', }, 'Actors not parsed correctly' assert parser.directors == { 'nm0001741': 'Bryan Singer' }, 'Directors not parsed correctly' print(parser.genres) assert len( set(parser.genres).intersection([ u'crime', u'mystery', u'thriller' ])) == len([u'crime', u'mystery', u'thriller']), 'Genres not parsed correctly' assert parser.imdb_id == 'tt0114814', 'ID not parsed correctly' assert (len( set(parser.languages).intersection( ['english', 'hungarian', 'spanish', 'french'])) == 4), 'Languages not parsed correctly' assert parser.mpaa_rating == 'R', 'Rating not parsed correctly' assert parser.name == 'The Usual Suspects', 'Name not parsed correctly' assert parser.photo, 'Photo not parsed correctly' assert parser.plot_outline == ( 'Following a truck hijack in New York, five conmen are arrested and brought together for questioning. ' 'As none of them are guilty, they plan a revenge operation against the police. The operation goes well, ' 'but then the influence of a legendary mastermind criminal called Keyser S\xf6ze is felt. It becomes ' 'clear that each one of them has wronged S\xf6ze at some point and must pay back now. The payback job ' 'leaves 27 men dead in a boat explosion, but the real question arises now: Who actually is Keyser S\xf6ze?' ), 'Plot outline not parsed correctly' assert 8.0 < parser.score < 9.0, 'Score not parsed correctly' assert parser.url == 'https://www.imdb.com/title/tt0114814/', 'URL not parsed correctly' assert 400000 < parser.votes < 1000000, 'Votes not parsed correctly' assert parser.year == 1995, 'Year not parsed correctly' expected_keywords = { u'criminal', u'suspect', u'criminal mastermind', u'dirty cop', u'burying a body' } assert len(expected_keywords.intersection(parser.plot_keywords)) == len(expected_keywords),\ 'Parsed plot keywords missing items from the expected result' assert len(expected_keywords) == len(parser.plot_keywords),\ 'Parsed plot keyword count does not match expected.'
def test_plot_with_links(self): """Make sure plot doesn't terminate at the first link. GitHub #756""" parser = ImdbParser() parser.parse('tt2503944') assert parser.plot_outline == ( "Chef Adam Jones (Bradley Cooper) had it all - and lost it. A two-star Michelin " "rockstar with the bad habits to match, the former enfant terrible of the Paris " "restaurant scene did everything different every time out, and only ever cared " "about the thrill of creating explosions of taste. To land his own kitchen and " "that third elusive Michelin star though, he'll need the best of the best on " "his side, including the beautiful Helene (Sienna Miller).")
def test_plot_with_links(self): """Make sure plot doesn't terminate at the first link. GitHub #756""" parser = ImdbParser() parser.parse('tt2503944') assert parser.plot_outline == ( "Chef Adam Jones (Bradley Cooper) had it all - and lost it. A two-star Michelin " "rockstar with the bad habits to match, the former enfant terrible of the Paris " "restaurant scene did everything different every time out, and only ever cared " "about the thrill of creating explosions of taste. To land his own kitchen and " "that third elusive Michelin star though, he'll need the best of the best on " "his side, including the beautiful Helene (Sienna Miller)." )
def _parse_new_movie(self, imdb_url, session): """ Get Movie object by parsing imdb page and save movie into the database. :param imdb_url: IMDB url :param session: Session to be used :return: Newly added Movie """ parser = ImdbParser() parser.parse(imdb_url) # store to database movie = db.Movie() movie.photo = parser.photo movie.title = parser.name movie.original_title = parser.original_name movie.score = parser.score movie.votes = parser.votes movie.meta_score = parser.meta_score movie.year = parser.year movie.mpaa_rating = parser.mpaa_rating movie.plot_outline = parser.plot_outline movie.url = imdb_url for name in parser.genres: genre = session.query(db.Genre).filter(db.Genre.name == name).first() if not genre: genre = db.Genre(name) movie.genres.append(genre) # pylint:disable=E1101 for index, name in enumerate(parser.languages): language = session.query(db.Language).filter(db.Language.name == name).first() if not language: language = db.Language(name) movie.languages.append(db.MovieLanguage(language, prominence=index)) for imdb_id, name in parser.actors.items(): actor = session.query(db.Actor).filter(db.Actor.imdb_id == imdb_id).first() if not actor: actor = db.Actor(imdb_id, name) movie.actors.append(actor) # pylint:disable=E1101 for imdb_id, name in parser.directors.items(): director = session.query(db.Director).filter(db.Director.imdb_id == imdb_id).first() if not director: director = db.Director(imdb_id, name) movie.directors.append(director) # pylint:disable=E1101 for imdb_id, name in parser.writers.items(): writer = session.query(db.Writer).filter(db.Writer.imdb_id == imdb_id).first() if not writer: writer = db.Writer(imdb_id, name) movie.writers.append(writer) # pylint:disable=E1101 # so that we can track how long since we've updated the info later movie.updated = datetime.now() session.add(movie) return movie
def test_parsed_data(self): parser = ImdbParser() parser.parse('tt0114814') assert parser.actors == { 'nm0000592': 'Pete Postlethwaite', 'nm0261452': 'Christine Estabrook', 'nm0000751': 'Suzy Amis', 'nm0000286': 'Stephen Baldwin', 'nm0000445': 'Dan Hedaya', 'nm0800339': 'Phillipe Simon', 'nm0002064': 'Giancarlo Esposito', 'nm0001590': 'Chazz Palminteri', 'nm0000321': 'Gabriel Byrne', 'nm0790436': 'Jack Shearer', 'nm0000228': 'Kevin Spacey', 'nm0001629': 'Kevin Pollak', 'nm0107808': 'Carl Bressler', 'nm0001125': 'Benicio Del Toro', 'nm0000860': 'Paul Bartel', }, 'Actors not parsed correctly' assert parser.directors == {'nm0001741': 'Bryan Singer'}, 'Directors not parsed correctly' print(parser.genres) assert len(set(parser.genres).intersection([u'crime', u'mystery', u'thriller'])) == len( [u'crime', u'mystery', u'thriller'] ), 'Genres not parsed correctly' assert parser.imdb_id == 'tt0114814', 'ID not parsed correctly' assert ( len(set(parser.languages).intersection(['english', 'hungarian', 'spanish', 'french'])) == 4 ), 'Languages not parsed correctly' assert parser.mpaa_rating == 'R', 'Rating not parsed correctly' assert parser.name == 'The Usual Suspects', 'Name not parsed correctly' assert parser.photo, 'Photo not parsed correctly' assert parser.plot_outline == ( 'Following a truck hijack in New York, five conmen are arrested and brought together for questioning. ' 'As none of them are guilty, they plan a revenge operation against the police. The operation goes well, ' 'but then the influence of a legendary mastermind criminal called Keyser S\xf6ze is felt. It becomes ' 'clear that each one of them has wronged S\xf6ze at some point and must pay back now. The payback job ' 'leaves 27 men dead in a boat explosion, but the real question arises now: Who actually is Keyser S\xf6ze?' ), 'Plot outline not parsed correctly' assert 8.0 < parser.score < 9.0, 'Score not parsed correctly' assert parser.url == 'https://www.imdb.com/title/tt0114814/', 'URL not parsed correctly' assert 400000 < parser.votes < 1000000, 'Votes not parsed correctly' assert parser.year == 1995, 'Year not parsed correctly'