예제 #1
0
 def test_no_year(self):
     # Make sure parser doesn't crash for movies with no year
     parser = ImdbParser()
     parser.parse('tt3303790')
     assert parser.name == 'Master of None'
     # There is no year
     assert not parser.year
예제 #2
0
 def test_no_year(self):
     # Make sure parser doesn't crash for movies with no year
     parser = ImdbParser()
     parser.parse('tt3303790')
     assert parser.name == 'Master of None'
     # There is no year
     assert not parser.year
예제 #3
0
 def test_no_plot(self):
     # Make sure parser doesn't crash for movies with no plot
     parser = ImdbParser()
     parser.parse('tt1300562')
     assert parser.name == 'Goodbye Mothers'
     # There is no plot
     assert not parser.plot_outline
예제 #4
0
 def test_no_plot(self):
     # Make sure parser doesn't crash for movies with no plot
     parser = ImdbParser()
     parser.parse('tt1300562')
     assert parser.name == 'Goodbye Mothers'
     # There is no plot
     assert not parser.plot_outline
예제 #5
0
    def _parse_new_movie(self, imdb_url, session):
        """
        Get Movie object by parsing imdb page and save movie into the database.

        :param imdb_url: IMDB url
        :param session: Session to be used
        :return: Newly added Movie
        """
        parser = ImdbParser()
        parser.parse(imdb_url)
        # store to database
        movie = db.Movie()
        movie.photo = parser.photo
        movie.title = parser.name
        movie.original_title = parser.original_name
        movie.score = parser.score
        movie.votes = parser.votes
        movie.meta_score = parser.meta_score
        movie.year = parser.year
        movie.mpaa_rating = parser.mpaa_rating
        movie.plot_outline = parser.plot_outline
        movie.url = imdb_url
        for name in parser.genres:
            genre = session.query(db.Genre).filter(db.Genre.name == name).first()
            if not genre:
                genre = db.Genre(name)
            movie.genres.append(genre)  # pylint:disable=E1101
        for index, name in enumerate(parser.languages):
            language = session.query(db.Language).filter(db.Language.name == name).first()
            if not language:
                language = db.Language(name)
            movie.languages.append(db.MovieLanguage(language, prominence=index))
        for imdb_id, name in parser.actors.items():
            actor = session.query(db.Actor).filter(db.Actor.imdb_id == imdb_id).first()
            if not actor:
                actor = db.Actor(imdb_id, name)
            movie.actors.append(actor)  # pylint:disable=E1101
        for imdb_id, name in parser.directors.items():
            director = session.query(db.Director).filter(db.Director.imdb_id == imdb_id).first()
            if not director:
                director = db.Director(imdb_id, name)
            movie.directors.append(director)  # pylint:disable=E1101
        for imdb_id, name in parser.writers.items():
            writer = session.query(db.Writer).filter(db.Writer.imdb_id == imdb_id).first()
            if not writer:
                writer = db.Writer(imdb_id, name)
            movie.writers.append(writer)  # pylint:disable=E1101
        for name in parser.plot_keywords:
            plot_keyword = (
                session.query(db.PlotKeyword).filter(db.PlotKeyword.name == name).first()
            )
            if not plot_keyword:
                plot_keyword = db.PlotKeyword(name)
            movie.plot_keywords.append(plot_keyword)  # pylint:disable=E1101
        # so that we can track how long since we've updated the info later
        movie.updated = datetime.now()
        session.add(movie)
        return movie
예제 #6
0
 def test_parsed_data(self):
     parser = ImdbParser()
     parser.parse('tt0114814')
     assert parser.actors == {
         'nm0000592': 'Pete Postlethwaite',
         'nm0261452': 'Christine Estabrook',
         'nm0000751': 'Suzy Amis',
         'nm0000286': 'Stephen Baldwin',
         'nm0000445': 'Dan Hedaya',
         'nm0800339': 'Phillipe Simon',
         'nm0002064': 'Giancarlo Esposito',
         'nm0001590': 'Chazz Palminteri',
         'nm0000321': 'Gabriel Byrne',
         'nm0790436': 'Jack Shearer',
         'nm0000228': 'Kevin Spacey',
         'nm0001629': 'Kevin Pollak',
         'nm0107808': 'Carl Bressler',
         'nm0001125': 'Benicio Del Toro',
         'nm0000860': 'Paul Bartel',
     }, 'Actors not parsed correctly'
     assert parser.directors == {
         'nm0001741': 'Bryan Singer'
     }, 'Directors not parsed correctly'
     print(parser.genres)
     assert len(
         set(parser.genres).intersection([
             u'crime', u'mystery', u'thriller'
         ])) == len([u'crime', u'mystery',
                     u'thriller']), 'Genres not parsed correctly'
     assert parser.imdb_id == 'tt0114814', 'ID not parsed correctly'
     assert (len(
         set(parser.languages).intersection(
             ['english', 'hungarian', 'spanish',
              'french'])) == 4), 'Languages not parsed correctly'
     assert parser.mpaa_rating == 'R', 'Rating not parsed correctly'
     assert parser.name == 'The Usual Suspects', 'Name not parsed correctly'
     assert parser.photo, 'Photo not parsed correctly'
     assert parser.plot_outline == (
         'Following a truck hijack in New York, five conmen are arrested and brought together for questioning. '
         'As none of them are guilty, they plan a revenge operation against the police. The operation goes well, '
         'but then the influence of a legendary mastermind criminal called Keyser S\xf6ze is felt. It becomes '
         'clear that each one of them has wronged S\xf6ze at some point and must pay back now. The payback job '
         'leaves 27 men dead in a boat explosion, but the real question arises now: Who actually is Keyser S\xf6ze?'
     ), 'Plot outline not parsed correctly'
     assert 8.0 < parser.score < 9.0, 'Score not parsed correctly'
     assert parser.url == 'https://www.imdb.com/title/tt0114814/', 'URL not parsed correctly'
     assert 400000 < parser.votes < 1000000, 'Votes not parsed correctly'
     assert parser.year == 1995, 'Year not parsed correctly'
     expected_keywords = {
         u'criminal', u'suspect', u'criminal mastermind', u'dirty cop',
         u'burying a body'
     }
     assert len(expected_keywords.intersection(parser.plot_keywords)) == len(expected_keywords),\
         'Parsed plot keywords missing items from the expected result'
     assert len(expected_keywords) == len(parser.plot_keywords),\
         'Parsed plot keyword count does not match expected.'
예제 #7
0
 def test_plot_with_links(self):
     """Make sure plot doesn't terminate at the first link. GitHub #756"""
     parser = ImdbParser()
     parser.parse('tt2503944')
     assert parser.plot_outline == (
         "Chef Adam Jones (Bradley Cooper) had it all - and lost it. A two-star Michelin "
         "rockstar with the bad habits to match, the former enfant terrible of the Paris "
         "restaurant scene did everything different every time out, and only ever cared "
         "about the thrill of creating explosions of taste. To land his own kitchen and "
         "that third elusive Michelin star though, he'll need the best of the best on "
         "his side, including the beautiful Helene (Sienna Miller).")
예제 #8
0
 def test_plot_with_links(self):
     """Make sure plot doesn't terminate at the first link. GitHub #756"""
     parser = ImdbParser()
     parser.parse('tt2503944')
     assert parser.plot_outline == (
         "Chef Adam Jones (Bradley Cooper) had it all - and lost it. A two-star Michelin "
         "rockstar with the bad habits to match, the former enfant terrible of the Paris "
         "restaurant scene did everything different every time out, and only ever cared "
         "about the thrill of creating explosions of taste. To land his own kitchen and "
         "that third elusive Michelin star though, he'll need the best of the best on "
         "his side, including the beautiful Helene (Sienna Miller)."
     )
예제 #9
0
    def _parse_new_movie(self, imdb_url, session):
        """
        Get Movie object by parsing imdb page and save movie into the database.

        :param imdb_url: IMDB url
        :param session: Session to be used
        :return: Newly added Movie
        """
        parser = ImdbParser()
        parser.parse(imdb_url)
        # store to database
        movie = db.Movie()
        movie.photo = parser.photo
        movie.title = parser.name
        movie.original_title = parser.original_name
        movie.score = parser.score
        movie.votes = parser.votes
        movie.meta_score = parser.meta_score
        movie.year = parser.year
        movie.mpaa_rating = parser.mpaa_rating
        movie.plot_outline = parser.plot_outline
        movie.url = imdb_url
        for name in parser.genres:
            genre = session.query(db.Genre).filter(db.Genre.name == name).first()
            if not genre:
                genre = db.Genre(name)
            movie.genres.append(genre)  # pylint:disable=E1101
        for index, name in enumerate(parser.languages):
            language = session.query(db.Language).filter(db.Language.name == name).first()
            if not language:
                language = db.Language(name)
            movie.languages.append(db.MovieLanguage(language, prominence=index))
        for imdb_id, name in parser.actors.items():
            actor = session.query(db.Actor).filter(db.Actor.imdb_id == imdb_id).first()
            if not actor:
                actor = db.Actor(imdb_id, name)
            movie.actors.append(actor)  # pylint:disable=E1101
        for imdb_id, name in parser.directors.items():
            director = session.query(db.Director).filter(db.Director.imdb_id == imdb_id).first()
            if not director:
                director = db.Director(imdb_id, name)
            movie.directors.append(director)  # pylint:disable=E1101
        for imdb_id, name in parser.writers.items():
            writer = session.query(db.Writer).filter(db.Writer.imdb_id == imdb_id).first()
            if not writer:
                writer = db.Writer(imdb_id, name)
            movie.writers.append(writer)  # pylint:disable=E1101
            # so that we can track how long since we've updated the info later
        movie.updated = datetime.now()
        session.add(movie)
        return movie
예제 #10
0
 def test_parsed_data(self):
     parser = ImdbParser()
     parser.parse('tt0114814')
     assert parser.actors == {
         'nm0000592': 'Pete Postlethwaite',
         'nm0261452': 'Christine Estabrook',
         'nm0000751': 'Suzy Amis',
         'nm0000286': 'Stephen Baldwin',
         'nm0000445': 'Dan Hedaya',
         'nm0800339': 'Phillipe Simon',
         'nm0002064': 'Giancarlo Esposito',
         'nm0001590': 'Chazz Palminteri',
         'nm0000321': 'Gabriel Byrne',
         'nm0790436': 'Jack Shearer',
         'nm0000228': 'Kevin Spacey',
         'nm0001629': 'Kevin Pollak',
         'nm0107808': 'Carl Bressler',
         'nm0001125': 'Benicio Del Toro',
         'nm0000860': 'Paul Bartel',
     }, 'Actors not parsed correctly'
     assert parser.directors == {'nm0001741': 'Bryan Singer'}, 'Directors not parsed correctly'
     print(parser.genres)
     assert len(set(parser.genres).intersection([u'crime', u'mystery', u'thriller'])) == len(
         [u'crime', u'mystery', u'thriller']
     ), 'Genres not parsed correctly'
     assert parser.imdb_id == 'tt0114814', 'ID not parsed correctly'
     assert (
         len(set(parser.languages).intersection(['english', 'hungarian', 'spanish', 'french']))
         == 4
     ), 'Languages not parsed correctly'
     assert parser.mpaa_rating == 'R', 'Rating not parsed correctly'
     assert parser.name == 'The Usual Suspects', 'Name not parsed correctly'
     assert parser.photo, 'Photo not parsed correctly'
     assert parser.plot_outline == (
         'Following a truck hijack in New York, five conmen are arrested and brought together for questioning. '
         'As none of them are guilty, they plan a revenge operation against the police. The operation goes well, '
         'but then the influence of a legendary mastermind criminal called Keyser S\xf6ze is felt. It becomes '
         'clear that each one of them has wronged S\xf6ze at some point and must pay back now. The payback job '
         'leaves 27 men dead in a boat explosion, but the real question arises now: Who actually is Keyser S\xf6ze?'
     ), 'Plot outline not parsed correctly'
     assert 8.0 < parser.score < 9.0, 'Score not parsed correctly'
     assert parser.url == 'https://www.imdb.com/title/tt0114814/', 'URL not parsed correctly'
     assert 400000 < parser.votes < 1000000, 'Votes not parsed correctly'
     assert parser.year == 1995, 'Year not parsed correctly'