Exemple #1
0
    def test_get_title_by_id_excludes_episodes(self):
        assert self.imdb.get_title_by_id("tt3181538") is not None

        imdb = Imdb(exclude_episodes=True)
        title = imdb.get_title_by_id("tt3181538")

        assert title is None
Exemple #2
0
 def save(self, *args, **kwargs):
     imdb = Imdb()
     movie = imdb.get_title_by_id(self.imdb_id)
     self.title = movie.title
     self.year = movie.year
     self.plot = movie.plot_outline
     super(Film, self).save(*args, **kwargs)
Exemple #3
0
class TestCache(unittest.TestCase):

    def setUp(self):
        self.imdb = Imdb()

    def tearDown(self):
        shutil.rmtree(self.imdb.cache_dir, ignore_errors=True)

    def _get_cache_size(self):
        """ Returns a count of the items in the cache """
        cache = os.path.exists(self.imdb.cache_dir)
        if not cache:
            return 0
        _, _, cache_files = next(os.walk(self.imdb.cache_dir))
        return len(cache_files)

    def test_cache_populated(self):
        """ Tests the cache is populated correctly """
        self.imdb = Imdb({'cache': True, 'cache_dir': '/tmp/imdbpie-test'})

        self.assertEqual(self._get_cache_size(), 0)
        movie = self.imdb.find_movie_by_id("tt0382932")
        # Make a 2nd call to ensure no duplicate cache items created
        self.imdb.find_movie_by_id("tt0382932")

        # find makes 2 api calls
        self.assertEqual(self._get_cache_size(), 2)
        self.assertEqual(movie.title, 'Ratatouille')

    def test_cache_not_populated_when_disabled(self):
        """ Tests the cache is not populated when disabled (default) """
        self.assertEqual(self._get_cache_size(), 0)
        self.imdb.find_movie_by_id("tt0382932")
        self.assertEqual(self._get_cache_size(), 0)
Exemple #4
0
    def imdb(self):
        """Get IMDb data for release, set self.imdb_id to override auto match
        from movie title + year"""
        from imdbpie import Imdb

        imdb = Imdb()

        if not self.movie_release:
            return False

        imdb_id = None
        if hasattr(self, "imdb_id"):
            imdb_id = self.imdb_id

        # Find IMDb match by title and check if year is a match
        if not imdb_id:
            for imdb_match in imdb.find_by_title(self.movie_title):
                if int(imdb_match.get("year")) == self.movie_year:
                    imdb_id = imdb_match.get("imdb_id")
                    break

        # No IMDb match could be found from title + year
        if not imdb_id:
            return False

        return imdb.find_movie_by_id(imdb_id)
Exemple #5
0
    def test_build_url_proxied(self):
        imdb_fr = Imdb(locale="en_FR", cache=False, anonymize=True, proxy_uri="http://someproxywebsite.co.uk?url={0}")
        imdb_fr.timestamp = time.mktime(datetime.date.today().timetuple())

        url = imdb_fr._build_url(path="/title/maindetails", params={"tconst": "tt1111111"})

        expected_url = "http://someproxywebsite.co.uk?url=" + quote("https://app.imdb.com/title/maindetails")
        assert url.startswith(expected_url) is True
 def __init__(self, cache=True, cache_dir=None):
     # open connection to imdb
     if cache is not None:
         if cache_dir is not None:
             self.imdb = Imdb(cache=True, cache_dir=cache_dir)
         else:
             self.imdb = Imdb(cache=True)
     else:
         self.imdb = Imdb()
Exemple #7
0
def updateart():
    import urllib
    from imdbpie import Imdb
    imdb = Imdb()
    print 'updating art for movies(imdb cover)'
    for movie in Movie.query.all():
        print 'processing %s' % movie.c00
        imdbid = movie.c09
        if Posters.query.filter_by(apiid=imdbid).first() is not None:
            print 'skipping %s as it is allready in the database' % movie.c00
            continue
        try:
            title = imdb.get_title_by_id(imdbid)
            if title.cover_url is None:
                continue
            poster = Posters()
            poster.apiid = imdbid
            poster.type = 'movie'
            response = urllib.urlopen(title.cover_url)
            data = response.read()
            data64 = data.encode('base64').replace('\n', '')
            poster.imgdata = 'data:image/jpeg;base64,%s' % data64
            # print poster.imgdata
            db.session.add(poster)
            db.session.commit()
        except:
            continue
    print 'updating art for tv'
    from tvdb_api import Tvdb
    t = Tvdb(banners=True)
    for show in Tvshow.query.all():
        print 'processing %s' % show.c00
        tvdbid = show.c12
        if Posters.query.filter_by(apiid=tvdbid).first() is not None:
            print 'skipping %s as it is allready in the database' % show.c00
            continue
        try:
            tvdbshow = t[int(tvdbid)]
            bannerkeys = tvdbshow['_banners']['season']['season'].keys()
            banner_url = tvdbshow['_banners']['season']['season'][bannerkeys[0]]['_bannerpath']
            poster = Posters()
            poster.apiid = tvdbid
            poster.type = 'tv'
            response = urllib.urlopen(banner_url)
            data = response.read()
            data64 = data.encode('base64').replace('\n', '')
            poster.imgdata = 'data:image/jpeg;base64,%s' % data64
            # print poster.imgdata
            db.session.add(poster)
            db.session.commit()
        except:
            continue
Exemple #8
0
def imdb_import(number):
    """
    Helper method to import large quantities of movies from IMDB
    as sample data.
    """

    reset_database()

    imdb = Imdb(cache=True)
    top = imdb.top_250()

    movies = []
    count = 0
    for x in top:
        if count >= int(number):
            break

        m = Movie()

        im = imdb.get_title_by_id(x['tconst'])

        m.name = im.title
        m.year = im.year
        m.imdb_id = im.imdb_id
        m.save()
        movies.append(m)

        # adding director and actors
        for person in im.credits:
            if person.token == "directors":
                m.director = Person.objects.create_or_find_imdb(person)

            elif person.token == "cast":
                m.actors.add(Person.objects.create_or_find_imdb(person))

        m.save()
        for i in range(random.randrange(3)):
            mc = MovieCopy()
            mc.movie = m
            mc.save()

        count = count+1

    # imdb.get_title_images("tt0468569")
    # imdb.get_person_images("nm0000033")

    return {
        'number_imported': number,
        'kind': 'movies',
        'movies': movies,
    }
Exemple #9
0
    def test_get_episodes(self):
        assert self.imdb.get_title_by_id('tt0303461') is not None

        imdb = Imdb()
        episodes = imdb.get_episodes('tt0303461')
        assert episodes is not None

        assert len(episodes) == 14
        episode_1 = episodes[0]
        assert episode_1.imdb_id == "tt0579539"
        assert episode_1.type == "tv_episode"
        assert episode_1.title == u'The Train Job'
        assert episode_1.series_name == 'Firefly'
        assert episode_1.release_date == "2002-09-20"
        assert episode_1.year == 2002
Exemple #10
0
    def test_get_title_by_id_using_proxy(self):
        imdb = Imdb(locale="en_US", cache=False, anonymize=True)
        title = imdb.get_title_by_id("tt0111161")

        assert title.title == "The Shawshank Redemption"
        assert title.year == 1994
        assert title.type == "feature"
        assert title.tagline == ("Fear can hold you prisoner. " "Hope can set you free.")
        assert isinstance(title.plots, list) is True
        assert len(title.plots) == 5
        assert isinstance(title.rating, float) is True
        assert sorted(title.genres) == sorted(["Crime", "Drama"])
        assert isinstance(title.votes, int) is True
        assert title.runtime == 8520
        assert len(title.trailers) == 3
Exemple #11
0
	def __init__(self):
		self._imdb = Imdb()
		self._oldTop = self._get_stored_data()
		self._oldTopList = self._generate_oldTop_id_list()
		self._newTop = self._fetch_data()
		self._newTopList = self._generate_newTop_id_list()
		self._newlyAdded = self._find_newly_added()
Exemple #12
0
    def test_get_title_by_id_using_proxy(self):
        imdb = Imdb(locale='en_US', cache=False, anonymize=True)
        title = imdb.get_title_by_id('tt0111161')

        assert title.title == 'The Shawshank Redemption'
        assert title.year == 1994
        assert title.type == 'feature'
        assert title.tagline == ('Fear can hold you prisoner. '
                                 'Hope can set you free.')
        assert isinstance(title.plots, list) is True
        assert len(title.plots) == 6
        assert isinstance(title.rating, float) is True
        assert sorted(title.genres) == sorted(['Crime', 'Drama'])
        assert isinstance(title.votes, int) is True
        assert title.runtime == 8520
        assert len(title.trailers) == 3
Exemple #13
0
class IMDB(Miner):

    def __init__(self):

        self.handle = Imdb()
        super(IMDB, self).__init__()

    def top_list(self, number):
        pop_movies = self.handle.top_250()
        return pop_movies

    def get_movie_id(self, index):
        return "tt" + index  # formatting to IMDB_ID

    def get_movie_by_id(self, movie_id):
        return self.handle.get_title_images(movie_id), self.handle.get_title(movie_id)
def test_review(set_up):
    imdb = Imdb(locale='en_US', cache=False)

    reviews_data = imdb._get_reviews_data('tt0111161')
    review = Review(data=reviews_data[0])

    assert 'carflo' == review.username
    assert review.text.startswith('Why do I want to write the 234th ') is True
    assert review.text.endswith('Redemption to touch the soul.') is True
    assert '2003-11-26' == review.date
    assert 10 == review.rating
    assert 'Tied for the best movie I have ever seen' == review.summary
    assert 'G' == review.status
    assert 'Texas' == review.user_location
    assert 1902 <= review.user_score
    assert 2207 <= review.user_score_count
Exemple #15
0
    def test_build_url_proxied(self):
        imdb_fr = Imdb(
            locale='en_FR',
            cache=False,
            anonymize=True,
            proxy_uri='http://someproxywebsite.co.uk?url={0}'
        )
        imdb_fr.timestamp = time.mktime(datetime.date.today().timetuple())

        url = imdb_fr._build_url(
            path='/title/maindetails', params={'tconst': 'tt1111111'})

        expected_url = (
            'http://someproxywebsite.co.uk?url=' +
            quote('https://app.imdb.com/title/maindetails')
        )
        assert url.startswith(expected_url) is True
Exemple #16
0
    def test_build_url(self):
        imdb_fr = Imdb(locale="en_FR", cache=False)
        imdb_fr.timestamp = time.mktime(datetime.date.today().timetuple())

        url = imdb_fr._build_url(path="/title/maindetails", params={"tconst": "tt1111111"})

        expected_url = (
            "https://app.imdb.com/"
            "title/maindetails"
            "?apiKey=d2bb34ec6f6d4ef3703c9b0c36c4791ef8b9ca9b"
            "&apiPolicy=app1_1"
            "&locale=en_FR"
            "&timestamp={timestamp}"
            "&tconst=tt1111111&api=v1&appid=iphone1_1"
        ).format(timestamp=imdb_fr.timestamp)

        assert_urls_match(expected_url, url)
Exemple #17
0
 def __init__(self):
     self.parser = Parser()
     self.directory = ''
     self.imdb = Imdb()
     self.cover_size = 214, 317
     self.square_size = 317, 317
     self.current = MOVIE_DICT
     self.all_files = []
     self.display = Display()
Exemple #18
0
    def test_build_url(self):
        imdb_fr = Imdb(locale='en_FR', cache=False)
        imdb_fr.timestamp = time.mktime(datetime.date.today().timetuple())

        url = imdb_fr._build_url(
            path='/title/maindetails', params={'tconst': 'tt1111111'})

        expected_url = (
            'https://app.imdb.com/'
            'title/maindetails'
            '?apiKey=d2bb34ec6f6d4ef3703c9b0c36c4791ef8b9ca9b'
            '&apiPolicy=app1_1'
            '&locale=en_FR'
            '&timestamp={timestamp}'
            '&tconst=tt1111111&api=v1&appid=iphone1_1'
        ).format(timestamp=imdb_fr.timestamp)

        assert_urls_match(expected_url, url)
class CommonMetadataIMDB(object):
    """
    Class for interfacing with imdb
    """

    def __init__(self, cache=True, cache_dir=None):
        # open connection to imdb
        if cache is not None:
            if cache_dir is not None:
                self.imdb = Imdb(cache=True, cache_dir=cache_dir)
            else:
                self.imdb = Imdb(cache=True)
        else:
            self.imdb = Imdb()

    def com_imdb_title_search(self, media_title):
        """
        # fetch info from title
        """
        return self.imdb.search_for_title(media_title)

    def com_imdb_id_search(self, media_id):
        """
        # fetch info by ttid
        """
        return self.imdb.get_title_by_id(media_id)

    def com_imdb_person_by_id(self, person_id):
        """
        # fetch person info by id
        """
        return self.imdb.get_person_by_id(person_id)

    def com_imdb_person_images_by_id(self, person_id):
        """
        # fetch person images by id
        """
        return self.imdb.get_person_images(person_id)

    def com_imdb_title_review_by_id(self, media_id):
        """
        # fetch the title review
        """
        return self.imdb.get_title_reviews(media_id)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-k", "--clusters", required=True, type=int, help="Number of cluters")
    args = vars(parser.parse_args())

    k = args["clusters"]
    make_output_dirs(k)

    if os.listdir("posters") == []:
        imdb = Imdb(anonymize=True)
        top = imdb.top_250()
        write_posters(top)

    qp_dir = "quantized_posters_" + str(k) + "/"
    cb_dir = "color_bars_" + str(k) + "/"
    if (os.listdir(qp_dir) == []) and (os.listdir(cb_dir) == []):
        posters = os.listdir("posters")
        for poster in posters:
            process_poster("posters/" + poster, k)
Exemple #21
0
def net_search(name):
	#input=movie name
	#output=movie id from imdb
	
	imdb = Imdb({'anonymize': False})
	#~ movie = imdb.find_movie_by_id("tt0382932")
	#~ print movie.title

	if len(name)>0:
		details=imdb.find_by_title(name)
		if len(details)>0:
			#~ return imdb.find_movie_by_id(details[0]['imdb_id'])
			imdb_id=details[0]['imdb_id']
			#~ print imdb_id
			movie=imdb.find_movie_by_id(imdb_id)
			return movie
		else:
			#~ return NULL
			return -1
Exemple #22
0
    def test_cache_populated(self):
        """ Tests the cache is populated correctly """
        self.imdb = Imdb({'cache': True, 'cache_dir': '/tmp/imdbpie-test'})

        self.assertEqual(self._get_cache_size(), 0)
        movie = self.imdb.find_movie_by_id("tt0382932")
        # Make a 2nd call to ensure no duplicate cache items created
        self.imdb.find_movie_by_id("tt0382932")

        # find makes 2 api calls
        self.assertEqual(self._get_cache_size(), 2)
        self.assertEqual(movie.title, 'Ratatouille')
 def __init__(self, app_dir, logger):
     self.app_dir_data = app_dir.user_data_dir
     self.cur_cache = {}
     self.cache = {}
     self.cache_file = os.path.join(app_dir.user_data_dir, CACHING_FILE)
     if os.path.exists(self.cache_file):
         with open(self.cache_file) as f:
             self.cache = yaml.load(f)
             if not self.cache:
                 self.cache = {}
     self.in_cache = []
     self.imdb = Imdb()
     self.logger = logger
Exemple #24
0
class ImdbCommand(Command):
    name = 'imdb'
    aliases = ['movie']
    description = 'Searches IMDB for movie titles.'

    def __init__(self, bot, config):
        super().__init__(bot, config)
        self._imdb = Imdb(cache=True, exclude_episodes=True)

    def run(self, message, args):
        if not args:
            self.reply(message, 'Please supply some search terms!')
            return

        self.bot.telegram.send_chat_action(message.chat.id, 'typing')
        results = self._imdb.search_for_title(' '.join(args))
        if not results:
            self.reply(message, 'No results found!')
            return

        result = self._imdb.get_title_by_id(results[0]['imdb_id'])
        reply = '<b>URL:</b> http://www.imdb.com/title/{0}\n'.format(telegram_escape(result.imdb_id))
        reply += '<b>Title:</b> {0}\n'.format(telegram_escape(result.title))
        reply += '<b>Year:</b> {0}\n'.format(result.year)
        reply += '<b>Genre:</b> {0}\n'.format(telegram_escape(', '.join(result.genres[:3])))
        reply += '<b>Rating:</b> {0}\n'.format(result.rating)
        runtime, _ = divmod(result.runtime, 60)
        reply += '<b>Runtime:</b> {0} minutes\n'.format(runtime)
        reply += '<b>Certification:</b> {0}\n'.format(result.certification)
        reply += '<b>Cast:</b> {0}\n'.format(
            telegram_escape(', '.join([person.name for person in result.cast_summary[:5]])))
        reply += '<b>Director(s):</b> {0}\n\n'.format(
            telegram_escape(', '.join([person.name for person in result.directors_summary[:5]])))
        reply += telegram_escape(result.plots[0])

        self.reply(message, reply, parse_mode='HTML')
Exemple #25
0
def lookup_movie(movie_name):
    movie_matches = Imdb().search_for_title(movie_name)
    if not movie_matches:
        raise MovieLookUpFailed("No movies matching this name!")
    else:
        return MovieList(movie_matches)
print('The most used "significant" words & their freq. are:')
print('film: 18, soldiers: 5, stress: 4, soldier: 4')
print('story: 3, lives: 3, weapon: 2, violent: 2, trauma: 2')
print('training: 2, killing: 2, hard: 2, fighting: 2')

print('\n')

print('Words used pertaining to the emotional aspects of the movies:')
print('violent: 2, emotions: 2, emotional: 2, angry: 2')


#NLTK
#nltk.download() and click Models and download vador_lexicon
from imdbpie import Imdb

imdb = Imdb()
print(imdb.search_for_title("Lone Survivor")[0])
print(imdb._get_reviews_data("tt1091191")[0]['summary'])
print(imdb._get_reviews_data("tt1091191")[0]['user_name'])
print(imdb._get_reviews_data("tt1091191")[0]['date'])
print(imdb._get_reviews_data("tt1091191")[0]['text'])

from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk

sentence = open('lone_survivor.txt', 'r', encoding='utf8').read()

score = SentimentIntensityAnalyzer().polarity_scores(sentence)
print(score)

Exemple #27
0
from imdbpie import Imdb
import unittest

imdb = Imdb({'anonymize': False})
movie = imdb.find_movie_by_id("tt0382932")


class TestTrailer(unittest.TestCase):

    def test_trailer_url(self):
        self.assertIsNotNone(movie.trailers)


if __name__ == '__main__':
    unittest.main()
Exemple #28
0
    suite = unittest.TestSuite()
    for all_tests in unittest.defaultTestLoader.discover('./', pattern='*_test.py'):
        for test in all_tests:
            suite.addTests(test)
    return suite


if __name__ == '__main__':
    unittest.main()

########NEW FILE########
__FILENAME__ = image_test
from imdbpie import Imdb
import unittest

imdb = Imdb({'anonymize': False})
images = imdb.title_images("tt0468569")


class TestImage(unittest.TestCase):

    def test_results(self):
        self.assertGreaterEqual(len(images), 107)

    def test_caption(self):
        self.assertEqual(images[0].caption, 'Still of Gary Oldman in The Dark Knight')

    def test_url(self):
        self.assertEqual(
            images[0].url,
            'http://ia.media-imdb.com/images/M/MV5BOTAxNzI0ND'
Exemple #29
0
        else:
            return s
    except Exception as ex:
        print('Exception at single_quote 22', ex)
        return s


movie_list = []
mng = MngIMDB()
movie_list = mng.load_data()
mngDb = MovieDB()
moviesid = mngDb.get_all_old_movies_by_field('movieid')
moviesnames = mngDb.get_all_old_movies_by_field('title')
actorsnames = mngDb.get_all_old_actors_by_field('name')
mng.setUpClass()
imdb = Imdb()
for movie in movie_list:
    if int(movie.year) < 2017:
        continue
    # for movie.title in moviesnames:
    #     continue
    try:
        if type(movie.title) is tuple:
            movie.title = movie.title[0]
        datam = None
        try:
            datam = imdb.search_for_title(movie.title)
        except:
            print("error")
            continue
        movie_id = ''
Exemple #30
0
#! /usr/bin/python

from imdbpie import Imdb
imdb = Imdb()

s_result = imdb.search_for_title("Logan")

max_values = len(s_result)

if max_values > 5:
    max_values = 5

for i in xrange(0, max_values, 1):
    print s_result[i]

#print s_result[0]

#print(type(dknight))

#print dknight

#print(dknight['imdb_id'])
Exemple #31
0
class Movie:
    def __init__(self):
        self.imdb = Imdb()
        self.reviews = None
        self.chartdata = None
        self.director = None
        self.commentbasedrating = 0
        self.title = None
        self.poster = None
        self.durationMin = None
        self.rating = 0
        self.id = None
        self.summary = None
        self.outline = None
        self.cast = None
        self.directors = None

    def SetAfterInit(self, dict):
        self.dict = dict
        self.id = dict['base']['id'].split('/')[2]

        if 'base' in self.dict:
            if 'title' in self.dict['base']:
                self.title = self.dict['base']['title']
            if 'runningTimeInMinutes' in self.dict['base']:
                self.durationMin = self.dict['base']['runningTimeInMinutes']
            if 'image' in self.dict['base']:
                if 'url' in self.dict['base']['image']:
                    self.poster = self.dict['base']['image']['url']
        if 'ratings' in self.dict:
            if 'rating' in self.dict['ratings']:
                self.rating = float(self.dict['ratings']['rating'])
        if 'plot' in self.dict:
            if 'outline' in self.dict['plot']:
                if 'text' in self.dict['plot']['outline']:
                    self.outline = self.dict['plot']['outline']['text']
            if 'summaries' in self.dict['plot']:
                if len(self.dict['plot']['summaries']) > 0:
                    self.summary = self.dict['plot']['summaries'][0]['text']

    def SetAfterSearch(self, dict):
        self.id = dict['id']
        self.title = dict['title']
        self.poster = dict['poster']
        self.durationMin = dict['durationMin']
        self.rating = dict['rating']
        self.outline = dict['outline']
        self.summary = dict['summary']

    def AnalyzeReviews(self):
        self.GetReviews()
        if self.reviews != None:
            self.SetCommentbasedrating()
            self.GenerateChartData()

    def GetReviews(self):

        reviews_temp_load = self.imdb.get_title_user_reviews(self.id)
        if 'reviews' in reviews_temp_load:
            reviews_temp = reviews_temp_load['reviews']
            self.reviews = []
            for review in reviews_temp:
                review_temp = UserReview(review['helpfulnessScore'],
                                         review['reviewText'])
                self.reviews.append(review_temp)

    def SetCommentbasedrating(self):
        scoreSum = 0
        for review in self.reviews:
            scoreSum += review.commentbasedrating
        self.commentbasedrating = scoreSum / len(self.reviews)

    def GenerateChartData(self):
        cd = ChartData(self.reviews)
        cd.SetDataHelpfulness(self.reviews)
        cd.SetDataCommentbasedrating(self.reviews)
        self.chartdata = cd

    def SetCast(self):
        dict = self.imdb.get_title_credits(self.id)
        if 'credits' in dict:
            if 'cast' in dict['credits']:
                self.cast = dict['credits']['cast']
            if 'director' in dict['credits']:
                self.directors = dict['credits']['director']

    def GetJSONSearch(self):

        listReviews = []
        if self.reviews != None:
            for review in self.reviews:
                listReviews.append(review.GetJSON())

        cd = {}
        if self.chartdata != None:
            cd = self.chartdata.GetJSON()

        dict = {
            "id": self.id,
            "title": self.title,
            "poster": self.poster,
            "durationMin": self.durationMin,
            "rating": self.rating,
            "outline": self.outline,
            "summary": self.summary,
            "cast": self.cast,
            "directors": self.directors,
            "reviews": listReviews,
            "commentbasedrating": self.commentbasedrating,
            "chartdata": cd
        }
        return dict
Exemple #32
0
import matplotlib.pyplot as plt
import plotly.plotly as py
from imdbpie import Imdb
imdb = Imdb()
imdb = Imdb(anonymize=True) # to proxy requests

import pickle
import ast
import json
movie=open('movie review score_0-50','r')
movie1=open('movie review score_51-100','r')
movie2=open('movie review score_101-150','r')
movie3=open('movie review score_151-200','r')
movie4=open('movie review score_201-250','r')
#dictionary=dict(x.split(':') for x in movie.read().split('],'))
#print dictionary
Scores= eval(movie.read())
Scores.update(eval(movie1.read()))
Scores.update(eval(movie2.read()))
Scores.update(eval(movie3.read()))
Scores.update(eval(movie4.read()))

with open('catergory', 'rb') as handle:
  Category = pickle.load(handle)

#print Category


CategoryLen={}
for i in Category:
    temp=[]
Exemple #33
0
from sklearn.metrics import accuracy_score
from sklearn.grid_search import GridSearchCV

import psycopg2
from sqlalchemy import create_engine
import requests
from imdbpie import Imdb
import nltk

import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
#########################################
# part 1
# importing top 250 movies from imdb database using api thang into a dataframe
imdb = Imdb()
imdb = Imdb(anonymize=True)
top_250 = pd.DataFrame(imdb.top_250())

# sorting values by rating and selecting only the top 100 movies
top_250 = top_250.sort_values(by='rating', ascending=False)
top_100 = top_250[0:100]

# limiting columns according to starter code
mask = ['num_votes', 'rating', 'tconst', 'title', 'year']
top_100 = top_100[mask]

# getting genre/runtime from OMDB
top_100
movie_list = top_100['tconst']
Exemple #34
0
        return s


movie_list = []
movie_genres = {}
actor_set = {}

with open('data.csv') as f:
    for row in f.readlines()[1:]:
        columns = row.split(',')
        movie_id = columns[0].split('/')[4]
        genres = columns[1][:-1]
        movie_list.append(movie_id)
        movie_genres[movie_id] = genres

imdb = Imdb()
movie_count = 0
for movie_id in movie_list:
    try:
        title = imdb.get_title(movie_id)
        sql = (
            '''INSERT INTO movie_movie VALUES (\'{}\',\'{}\',\'{}\',\'{}\',\'{}\',\'{}\',\'{}\',\'{}\',\'{}\')'''
            .format(
                movie_id, single_quote(str(title['base']['title'])),
                title['base']['year'], title['base']['runningTimeInMinutes'],
                movie_genres[movie_id], title['ratings']['rating'],
                single_quote(title['base']['image']['url']),
                single_quote(str(title['plot']['outline']['text'])),
                single_quote(
                    str(
                        imdb.get_title_videos(movie_id)['videos'][0]
Exemple #35
0
from sklearn.metrics import mean_squared_error, r2_score
import psycopg2
import requests
import nltk

import urllib
from bs4 import BeautifulSoup
import nltk

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline


#1. Connect to the imdbpie API
imdb = Imdb()
imdb = Imdb(anonymize = True)

#2. Query the top 250 rated movies in the database
imdb.top_250()

#3. Put the information into a dataframe, then keep only relevant columns
data = pd.DataFrame(imdb.top_250())
data.head()

data.drop('can_rate', axis=1, inplace=True)
data.drop('image', axis=1, inplace=True)
data.drop('type', axis=1, inplace=True)

#4. Select only the top 100 movies
data = data.iloc[0:100]
# The python package imdbpie was used in this program
# and the repository for this can be found at:
# https://github.com/richardasaurus/imdb-pie

from imdbpie import Imdb
import fresh_tomatoes
import media

# Creating an instance of Imdb to be used to collect data via API
imdb = Imdb()
imdb = Imdb(anonymize=True)

# IMDB Id's to be used to receive info from IMDB using API's
interstellar_id = "tt0816692"
gravity_id = "tt1454468"
passengers_id = "tt1355644"
arrival_id = "tt2543164"
pulpfiction_id = "tt0110912"
contact_id = "tt0118884"

# Creation of movie objects to include required information
interstellar = media.Movie(imdb.get_title_by_id(interstellar_id).title,
                           imdb.get_title_by_id(interstellar_id).plot_outline,
                           "https://upload.wikimedia.org/wikipedia/en/b/bc/"
                           "Interstellar_film_poster.jpg",
                           "https://www.youtube.com/watch?v=3WzHXI5HizQ",
                           imdb.get_title_by_id(interstellar_id).certification,
                           imdb.get_title_by_id(interstellar_id).rating)

gravity = media.Movie(imdb.get_title_by_id(gravity_id).title,
                      imdb.get_title_by_id(gravity_id).plot_outline,
import os
import imdb
import time
import winshell
from imdbpie import Imdb
from win32com.client import Dispatch

# https://github.com/richardasaurus/imdb-pie
# https://imdbpy.sourceforge.io/support.html#documentation
t_start = time.time()

ib = imdb.IMDb()

# Import the imdb package.
ia = Imdb()

del_ext = ["txt", "nfo", "png", "jpg", "url"]
ign_ext = [
    "exe", "zip", "part", "srt", "pdf", "iso", "txt", "nfo", "png", "jpg",
    "url", "ini"
]
ign_key = []
repl_key = dict()
repl_key["Æ"] = 'ae'
error_list = list()

library = "Z:\Downloaded\Video(s)"
# library = "Z:\Ripped\Movies"
os.chdir(library)
with open("movie_management.log", 'w') as log:
    for root, subdirs, files in os.walk(os.path.join(library, 'Library')):
Exemple #38
0
def get_client():
    client = Imdb()
    return client
Exemple #39
0
def setup():
    global imdb
    imdb = Imdb(anonymize=True)
import sys
reload(sys)
sys.setdefaultencoding('UTF8')
import json, requests
from pymongo import MongoClient
#-----------usando imbd-----------------
from imdbpie import Imdb
imdb = Imdb()
imdb = Imdb(anonymize=True)
import re

#Conexion a MongoDB
cliente = MongoClient()  #Inicializar objeto
cliente = MongoClient('127.0.0.1', 27017)  #Indicar parametros del servidor
bd = cliente.taller4  #Seleccionar Schema
coleccion = bd.peliculas  #Seleccionar Coleccion

movie = str(raw_input('Movie Name: '))
movie_search = '+'.join(movie.split())

base_url = 'http://www.imdb.com/find?q='
url = base_url + movie_search + '&s=all'

title_search = re.compile('/title/tt\d+')
print base_url

#coleccion.insert_one(top)
from imdbpie import Imdb
import fresh_tomatoes
import media

# Creating an instance of Imdb to be used to collect data via API
imdb = Imdb()
imdb = Imdb(anonymize=True)

# IMDB Id's to be used to receive info from IMDB using API's
interstellar_id = "tt0816692"
gravity_id = "tt1454468"
passengers_id = "tt1355644"
arrival_id = "tt2543164"
pulpfiction_id = "tt0110912"
contact_id = "tt0118884"

# Creation of movie objects to include required information
interstellar = media.Movie(
    imdb.get_title_by_id(interstellar_id).title,
    imdb.get_title_by_id(interstellar_id).plot_outline,
    "https://upload.wikimedia.org/wikipedia/en/b/bc/Interstellar_film_poster.jpg",
    "https://www.youtube.com/watch?v=3WzHXI5HizQ",
    imdb.get_title_by_id(interstellar_id).certification,
    imdb.get_title_by_id(interstellar_id).rating)

gravity = media.Movie(
    imdb.get_title_by_id(gravity_id).title,
    imdb.get_title_by_id(gravity_id).plot_outline,
    "https://upload.wikimedia.org/wikipedia/en/f/f6/Gravity_Poster.jpg",
    "https://www.youtube.com/watch?v=OiTiKOy59o4",
    imdb.get_title_by_id(gravity_id).certification,
Exemple #42
0
class ImdbExtractor(object):

    def __init__(self, data_path=None):
        super(ImdbExtractor, self).__init__()
        self.search_api = IMDBPy()
        self.info_api = IMDBPie(anonymize=True)
        self.movie_lens = MovieLens(data_path)
        # self.data_path = "data/movies_data"
        self.data_path = data_path + ".out" if data_path \
            else "data/movies_data"
        self.errors = []

    def retrieve_objects(self):
        movies = self.movie_lens.movies()
        with open(self.data_path, "w", 1, encoding="utf-8") as file:
            for movie in movies:
                print("\n")
                print(movie.id)
                print(movie.data["name"])
                while True:
                    try:
                        m = self.find_movie(movie.data["name"])
                    except IMDbDataAccessError as e:
                        print("========== CONNECTION ERROR ==========")
                        print(e)
                        sleep(5)
                    else:
                        break

                data = str(movie.id)
                if m:
                    plots, genres = self.movie_info(m.movieID)
                    reviews = self.movie_reviews(m.movieID)
                    if plots or genres or reviews:
                        movie.data["genres"].extend(genres)
                        data += u'::' + movie.data["name"]
                        data += u'::' + u' '.join(filter(None, plots))
                        data += u'::' + u' '.join(filter(None,
                                                         movie.data["genres"]))
                        data += u'::' + u' '.join(filter(None, reviews))
                        data = data.replace('\r', ' ').replace('\n', ' ')
                    else:
                        data += u"::ERROR"
                else:
                    data += u"::ERROR"
                file.write(data + u"\n")

    def movie_reviews(self, movie_id):
        try:
            reviews = self.info_api.get_title_reviews("tt" + movie_id,
                                                      max_results=20)
        except ValueError as e:
            return []

        reviews_arr = []
        if reviews:
            for r in reviews:
                review = r.summary if r.summary else ""
                review += " " + r.text if r.text else ""
                reviews_arr.append(review)
        return reviews_arr

    def movie_info(self, movie_id):
        try:
            movie = self.info_api.get_title_by_id("tt" + movie_id)
        except ValueError as e:
            return [], []
        plots = movie.plots if movie.plots else []
        genres = movie.genres if movie.genres else []
        return plots, genres

    def find_movie(self, name):
        movies = self.search_api.search_movie(name)
        if not movies:
            name = re.sub("\((\D*)\)", "", name)
            print("---------- SEARCHING AGAIN: ----------")
            print(name)
            movies = self.search_api.search_movie(name)
            print(movies)
            if not movies:
                print("########## NO MOVIE FOUND ##########")
                return None

        def sanitize_name(_str):
            new_str = _str.strip().lower()
            for char in string.punctuation:
                new_str = new_str.replace(char, "")
            return new_str

        name_split = name.split("(")
        title = sanitize_name(name_split[0])
        year = int(name_split[-1][:-1].strip())

        movie = None
        for i in movies:
            if "year" in i.keys() and int(i["year"]) == year:
                movie = i
                break
        if not movie:
            print("########## NO MOVIE FROM SAME YEAR ##########")
            return None

        self.search_api.update(movie)

        eng_title = ""
        if "akas" in movie.keys():
            print("tem akas")
            for aka in movie["akas"]:
                aka_split = aka.split("::")
                if len(aka_split) > 1                                   \
                        and (aka_split[1].find("(English title)") != -1 \
                             or aka_split[1].find("USA") != -1):
                    eng_title = aka_split[0].strip().lower()
                    break

        imdb_title = sanitize_name(movie["title"])
        original_title = name_split[1].strip()[:-1].lower()
        print("imdb title: " + imdb_title)
        print("english title: " + eng_title)
        print("year: " + str(movie["year"]))
        if imdb_title == title or eng_title == title                    \
                or (len(name_split) == 3                                \
                    and imdb_title == original_title):
            return movie
        else:
            print("########## FOUND DIFFERENT MOVIE ##########")
            print(movie["title"] + " (" + str(movie["year"]) + ")")
            return None
Exemple #43
0
#based off details.csv from getAllMovieDetails.py

#ONLY SOME MOVIES WILL BE WRITTEN INTO F2. cOPY THEM INTO TEMPmOVIErATINGS. kEEP DOING THIS

from imdbpie import Imdb
imdb = Imdb()
imdb = Imdb(anonymize=True)
imdb = Imdb(cache=True)

f1 = open('Details.csv', 'r')
f2 = open('movieRatings.csv', 'w')
movieID = []
year = []
count = 1  #just to show how much is written in new file

for line in f1:
    attribute = line.split(",")  #splitting each item
    attribute[-1] = attribute[-1].strip()  #removing \n from the last attribute
    #year.append(attribute[0]) #storing only years
    movieID.append(attribute[1])  #storing only movie ids

for item in movieID:
    if (imdb.get_title_by_id(item)):  #checking if movie exists
        print(count)
        title = imdb.get_title_by_id(item)
        rating = str(title.rating)
        if (rating is "None"):
            f2.write("None")
        else:
            f2.write(rating)
        f2.write(',')  #so that it's in csv format

def single_quote(s):
    if len(s) == 0:
        return 'None'
    if s.find('\'') != -1:
        ss = s.split("\'")
        new = ''
        for x in ss:
            new = new + "\'" + "\'" + x
        return new[2:]
    else:
        return s


imdb = Imdb()
imdb = Imdb(anonymize=True)  # to proxy requests

top250 = []
top250 = imdb.top_250()
for item in top250:
    try:
        title = imdb.get_title_by_id(item['tconst'])
        if len(title.trailers) > 0:
            trailer_url = title.trailers[0]['url']
        else:
            trailer_url = 'None'
        new_movie = (
            '''INSERT INTO movie_movie VALUES (\'{}\',\'{}\',\'{}\',\'{}\',\'{}\',\'{}\',\'{}\',\'{}\')'''.format(
                item['tconst'],
                single_quote(str(item['title'])),
Exemple #45
0
 def test_get_title_by_id_returns_none_when_is_episode(self):
     imdb = Imdb(exclude_episodes=True)
     assert imdb.get_title_by_id('tt0615090') is None
Exemple #46
0
class PosterBot():
    def __init__(self):
        socket.setdefaulttimeout(30)
        self.plex_url = os.environ.get('PLEX_URL')
        self.plex_token = os.environ.get('PLEX_TOKEN')
        self.dir = os.environ.get('POSTER_DIR')
        if not os.path.exists(self.dir):
            os.makedirs(self.dir)
        self.imdb = Imdb()

    def run(self):
        self.getMoviePosters()
        self.getTVPosters()

    def getMoviePosters(self):
        movie_xml = ET.fromstring(urllib.request.urlopen(''.join((self.plex_url, '/library/sections/1/all?X-Plex-Token=', self.plex_token))).read())
    
        for child in movie_xml:
            title = child.attrib.get('title')
            ratingKey = child.attrib.get('ratingKey')
            thumb = child.attrib.get('thumb')
            updatedAt = int(child.attrib.get('updatedAt'))
            oldfile, exists = Posters.objects.search_entry(ratingKey, updatedAt)
            if not exists:    # couldn't find in database
                newfile = self.downloadPoster(thumb, ratingKey, updatedAt)
                if newfile:
                    imdb_url = self.getImdbLink(title)
                    Posters.objects.create_entry(ratingKey, newfile, imdb_url, title, updatedAt)
            elif oldfile: # entry needs to be updated
                newfile = self.downloadPoster(thumb, ratingKey, updatedAt)
                if newfile:
                    os.remove(os.path.join(self.dir, oldfile))
                    Posters.objects.update_entry(ratingKey, newfile, updatedAt)

    def getTVPosters(self):
        tv_xml = ET.fromstring(urllib.request.urlopen(''.join((self.plex_url, '/library/sections/2/all?X-Plex-Token=', self.plex_token))).read())

        for child in tv_xml:
            title = child.attrib.get('title')
            ratingKey = child.attrib.get('ratingKey')
            thumb = child.attrib.get('thumb')
            updatedAt = int(thumb.rsplit('/', 1)[-1])
            oldfile, exists = Posters.objects.search_entry(ratingKey, updatedAt)
            if exists:
                imdb_url = self.getImdbLink(title, ratingKey=ratingKey)
            else: # couldn't find in database
                newfile = self.downloadPoster(thumb, ratingKey, updatedAt)
                if newfile:
                    imdb_url = self.getImdbLink(title, 'TV')
                    Posters.objects.create_entry(ratingKey, newfile, imdb_url, title, updatedAt)
            if oldfile: # entry needs to be updated
                newfile = self.downloadPoster(thumb, ratingKey, updatedAt)
                if newfile:
                    os.remove(os.path.join(self.dir, oldfile))
                    Posters.objects.update_entry(ratingKey, newfile, updatedAt)

            show_xml = ET.fromstring(urllib.request.urlopen(''.join((self.plex_url, '/library/metadata/', ratingKey, '/children?X-Plex-Token=', self.plex_token))).read())
            for season in show_xml: #loop for seasons in show
                ratingKey = season.get('ratingKey')
                if ratingKey is None:
                    continue
                thumb = season.get('thumb')
                updatedAt = int(season.get('updatedAt'))
                oldfile, exists = Posters.objects.search_entry(ratingKey, updatedAt)

                if not exists:  #couldn't find entry in database
                    newfile = self.downloadPoster(thumb, ratingKey, updatedAt)
                    if newfile:
                        Posters.objects.create_entry(ratingKey, newfile, imdb_url, title, updatedAt)
                elif oldfile: #entry needs to be updated
                    newfile = self.downloadPoster(thumb, ratingKey, updatedAt)
                    if newfile:
                        os.remove(os.path.join(self.dir, oldfile))
                        Posters.objects.update_entry(ratingKey, newfile, updatedAt)

    # save directory stored in environment variable for now (could switch to db)
    def downloadPoster(self, thumb, ratingKey, updatedAt):    
        url = ''.join((self.plex_url, thumb, '?X-Plex-Token=', self.plex_token))
        filename = ''.join((ratingKey, '-', str(updatedAt), '.jpg'))
        path = os.path.join(self.dir, filename)
        try:
            urllib.request.urlretrieve(url, path)
            return filename
        except socket.timeout:
            if os.path.exists(path):
                os.remove(path)
            print("timeout error: " + filename)
        except FileNotFoundError:
            print("File or folder doesn't exist: " + path)
        except socket.error:
            if os.path.exists(path):
                os.remove(path)
            print("socket error occured: ")
        except:
            if os.path.exists(path):
                os.remove(path)
            print("Unexpected error:", sys.exc_info()[0])
        return ""

    # specify type if TV show
    def getImdbLink(self, title, type='Movie', ratingKey=''):
        if ratingKey:
            poster = Posters.objects.get(ratingKey=ratingKey)
            return poster.imdb_url
        search = self.imdb.search_for_title(title)
        if len(search) > 0:
            imdb_id = search[0].get('imdb_id')
            return "http://imdb.com/title/" + imdb_id
        title = title.replace(' ', '+')
        if type == 'Movie':
            return ''.join(('http://www.imdb.com/find?q=', title, '&s=tt&ttype=ft'))
        return ''.join(('http://www.imdb.com/find?q=', title, '&s=tt&ttype=tv'))
Exemple #47
0
from imdbpie import Imdb
import json
imdb = Imdb(anonymize=True) # to proxy requests

reviews = imdb.get_title_reviews("tt0120338", max_results=2500)

classified_reviews = []

positive_reviews = [x for x in reviews if x.rating > 7]
negative_reviews = [x for x in reviews if x.rating < 5]

for i in range(0, 550):
  classified_reviews.append({
    'text': positive_reviews[i].text,
    'class': 'POSITIVE'
  })
  classified_reviews.append({
    'text': negative_reviews[i].text,
    'class': 'NEGATIVE'
  })

with open('result.json', 'w') as fp:
    json.dump(classified_reviews, fp)
Exemple #48
0
 def test_get_episodes_raises_when_exclude_episodes_enabled(self):
     imdb = Imdb(locale='en_US', cache=False, exclude_episodes=True)
     with pytest.raises(ValueError):
         imdb.get_episodes('tt0303461')
Exemple #49
0
    def _search(self, title, year=None, fallback_search=False):
        """ Search the api for a movie.

        :param title: the title to search for
        :type title: str
        :param year: the year
        :type year: int or None
        :return: the search result or None if not found
        :rtype: imdbpie.objects.TitleSearchResult or None
        """
        name = title
        if year:
            name += ' (' + text_type(year) + ')'

        if fallback_search:
            log.info('Searching imdb api again with year included for %s',
                     name)
            search_results = ImdbFacade().search_for_title(
                re.sub('[()]', '', name))
        else:
            log.info('Searching imdb api for %s', name)
            search_results = ImdbFacade().search_for_title(title)

        # Find the first movie that matches the title (and year if present)
        for search_result in search_results:  # type: TitleSearchResult
            if self.sanitize_imdb_title(
                    search_result.title) == self.sanitize_imdb_title(title):
                # If a year is present, it should also be the same
                if year:
                    if search_result.year == int(year):
                        return search_result
                    else:
                        continue
                # If no year is present, take the first match
                else:
                    return search_result

        # If no match is found, try to search for alternative titles of the first (most relevant) result
        if len(search_results) > 0:
            best_match = search_results[0]  # type: TitleSearchResult
            best_match_title_versions = Imdb().get_title_versions(
                best_match.imdb_id)  # Not available in ImdbFacade
            if best_match_title_versions and 'alternateTitles' in best_match_title_versions:
                for alternate_title in best_match_title_versions[
                        'alternateTitles']:
                    if self.sanitize_imdb_title(
                            alternate_title['title']
                    ) == self.sanitize_imdb_title(title):
                        # If a year is present, it should also be the same
                        if year:
                            if best_match.year == int(year):
                                return TitleSearchResult(
                                    imdb_id=best_match.imdb_id,
                                    title=best_match.title,
                                    type=best_match.type,
                                    year=best_match.year)
                            else:
                                continue
                        # If no year is present, take the first match
                        else:
                            return TitleSearchResult(
                                imdb_id=best_match.imdb_id,
                                title=best_match.title,
                                type=best_match.type,
                                year=best_match.year)

        # Fallback search in case nothing could be found
        if not fallback_search:
            return self._search(title, year=year, fallback_search=True)

        return None
Exemple #50
0
from imdbpie import Imdb

imdb = Imdb({'anonymize': False,
             'locale': 'en_US',
             'exclude_episodes': False})


def run_tests():
    """
    Overall tests not using unittests
    for a simple visual results overview
    """
    print((movie.title))
    print(('year', movie.year))
    print(('type', movie.type))
    print(('tagline', movie.tagline))
    print(('rating', movie.rating))
    print(('certification', movie.certification))
    print(('genres', movie.genres))
    print(('plot', movie.plot))
    print(('runtime', movie.runtime))
    print(('writers', movie.writers))
    print(('directors', movie.directors))
    print(('creators', movie.creators))
    print(('cast summary', movie.cast_summary))
    print(('full credits', movie.credits))

if __name__ == '__main__':
    movie = imdb.find_movie_by_id('tt0705926')
    run_tests()
Exemple #51
0
from imdbpie import Imdb
import re

imdb = Imdb({'anonymize' : False})
movie = imdb.find_movie_by_id("tt1210166")

def run_tests():
    global imdb

    print('have a trailer_url:')
    match = re.findall(r'http://ia.media-imdb.com/images/.*/', movie.trailer_url)[0]
    if match:
        print('passed')



if __name__ == '__main__':
    run_tests()
Exemple #52
0
import csv
from urllib.parse import urlencode
from datetime import datetime
import json

import requests
import pandas as pd
from imdbpie import Imdb

TMDB_API_KEY = "df11d86cc7da3a00faaeafc354b858de"

OMDB_API_KEY = "83930f10"

DATA_URL = "https://pkgstore.datahub.io/36661def37f62e4130670ab75e06465a/oscars-nominees-and-winners/data_json/data/d3c23178ad964c76c8ce0ed81762ed7b/data_json.json"

imdb = Imdb()


def get_json(url):
    res = requests.get(url)
    if res:
        return res.json()
    return None


def tmdb_get(endpoint, params: dict = None):
    url = F'https://api.themoviedb.org/3{endpoint}?api_key={TMDB_API_KEY}'
    if params:
        url += f'&{urlencode(params)}'
    return get_json(url=url)
Exemple #53
0
from imdbpie import Imdb
import unittest

imdb = Imdb({'anonymize': False})
images = imdb.title_images("tt0468569")


class TestImage(unittest.TestCase):

    def test_results(self):
        self.assertGreaterEqual(len(images), 107)

    def test_caption(self):
        self.assertEqual(images[0].caption, 'Still of Gary Oldman in The Dark Knight')

    def test_url(self):
        self.assertEqual(images[0].url, 'http://ia.media-imdb.com/images/M/MV5BOTAxNzI0NDE1NF5BMl5BanBnXkFtZTcwNjczMTk2Mw@@._V1_.jpg')

if __name__ == '__main__':
    unittest.main()
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Mon Oct 23 16:02:41 2017

@author: huaqingxie
"""

# import modules
from imdbpie import Imdb
import pandas as pd

#  to proxy requests
imdb = Imdb()
imdb = Imdb(anonymize=True)

mydf = pd.read_csv("movie_data_filter.csv")
print mydf.head(20)

imdbid = mydf['imdbId']

director = []
actor = []
year = []
imdb_score = []
certification = []
n = 0
for line in imdbid:
    n += 1
    print "...", n
    if len(str(int(line))) == 6:
Exemple #55
0
 def test_get_title_by_id_returns_none_when_is_episode(self):
     imdb = Imdb(exclude_episodes=True)
     assert imdb.get_title_by_id("tt0615090") is None
Exemple #56
0
async def imdb(query, api: Imdb, localize):
    """
    Send an api request to imdb using the search query
    :param query: the search query
    :param api: the imdb api object
    :param localize: the localization strings
    :return: the result
    """
    # FIXME: Use Aiohttp instead of this api wrapper
    try:
        names = lambda x: ', '.join((p.name for p in x)) if x else 'N/A'
        null_check = lambda x: x if x and not isinstance(x, int) else 'N/A'
        id_ = api.search_for_title(query)[0]['imdb_id']
        res = api.get_title_by_id(id_)
        eps = api.get_episodes(id_) if res.type == 'tv_series' else None
        ep_count = len(eps) if eps is not None else None
        season_count = eps[-1].season if eps is not None else None
        title = null_check(res.title)
        release = null_check(res.release_date)
        runtime = res.runtime
        if runtime is not None:
            hours, seconds = divmod(runtime, 3600)
            minutes = seconds / 60
            runtime_str = '{} {} {} {}'.format(round(hours), localize['hours'],
                                               round(minutes),
                                               localize['minutes'])
        else:
            runtime_str = 'N/A'
        rated = null_check(res.certification)
        genre = ', '.join(res.genres) if res.genres else 'N/A'
        director = names(res.directors_summary)
        writer = names(res.writers_summary)
        cast = names(res.cast_summary)
        plot = null_check(res.plot_outline)
        poster = res.poster_url
        score = f'{res.rating}/10' if res.rating is not None else 'N/A'

        embed = Embed(colour=0xE5BC26)
        embed.set_author(name=title)
        if poster:
            embed.set_image(url=poster)
        if season_count is not None:
            embed.add_field(name=localize['seasons'], value=season_count)
        if ep_count is not None:
            embed.add_field(name=localize['episodes'], value=str(ep_count))

        embed.add_field(name=localize['release_date'], value=release)
        embed.add_field(name=localize['rated'], value=rated)
        embed.add_field(name=localize['runtime'], value=runtime_str)
        embed.add_field(name=localize['genre'], value=genre)
        embed.add_field(name=localize['director'], value=director)
        embed.add_field(name=localize['writer'], value=writer)
        embed.add_field(name=localize['cast'], value=cast)
        embed.add_field(name=localize['score'], value=score)
        embed.add_field(name=localize['plot_outline'],
                        value=plot,
                        inline=False)

        return embed

    except (JSONDecodeError, IndexError):
        return localize['title_not_found']
Exemple #57
0
import os
import requests
import urllib3
import PIL
from PIL import Image
from PIL import ImageDraw
from PIL import ImageFont
from imdbpie import Imdb
from io import StringIO
import textwrap

imdb = Imdb()
imdb = Imdb(anonymize=True)  # to proxy requests

pathname = 'D:\Eyes\Eng'


def GetCleanName(shortname):
    shortname = shortname.replace('480P', '')
    shortname = shortname.replace('720P', '')
    shortname = shortname.replace('1080P', '')
    shortname = shortname.replace('X264', '')
    shortname = shortname.replace('YIFY', '')
    shortname = shortname.replace('BRRIP', '')
    shortname = shortname.replace('AAC', '')
    shortname = shortname.replace('ETRG', '')
    shortname = shortname.replace('BLURAY', '')
    shortname = shortname.replace('YTS AG', '')
    shortname = shortname.replace('[YTS.AG]', '')
    shortname = shortname.replace('AC3', '')
    shortname = shortname.replace('WEB DL', '')
Exemple #58
0
# 12 Nov 2017 | Checking out imdbpie

from imdbpie import Imdb

imdb = Imdb()
lst = imdb.search_for_title("The Dark Knight")
print(lst)
Exemple #59
0
class TestImdb(object):

    imdb = Imdb(locale='en_US', cache=False)

    def test_build_url(self):
        imdb_fr = Imdb(locale='en_FR', cache=False)
        imdb_fr.timestamp = time.mktime(datetime.date.today().timetuple())

        url = imdb_fr._build_url(path='/title/maindetails',
                                 params={'tconst': 'tt1111111'})

        expected_url = ('https://app.imdb.com/'
                        'title/maindetails'
                        '?apiKey=d2bb34ec6f6d4ef3703c9b0c36c4791ef8b9ca9b'
                        '&apiPolicy=app1_1'
                        '&locale=en_FR'
                        '&timestamp={timestamp}'
                        '&tconst=tt1111111&api=v1&appid=iphone1_1').format(
                            timestamp=imdb_fr.timestamp)

        assert_urls_match(expected_url, url)

    def test_build_url_proxied(self):
        imdb_fr = Imdb(locale='en_FR',
                       cache=False,
                       anonymize=True,
                       proxy_uri='http://someproxywebsite.co.uk?url={0}')
        imdb_fr.timestamp = time.mktime(datetime.date.today().timetuple())

        url = imdb_fr._build_url(path='/title/maindetails',
                                 params={'tconst': 'tt1111111'})

        expected_url = ('http://someproxywebsite.co.uk?url=' +
                        quote('https://app.imdb.com/title/maindetails'))
        assert url.startswith(expected_url) is True

    def test_get_title_plots(self):
        plots = self.imdb.get_title_plots('tt0111161')

        expected_plot3 = ('Andy Dufresne is sent to Shawshank Prison for the '
                          'murder of his wife and her secret lover. He is very'
                          ' isolated and lonely at first, but realizes there '
                          'is something deep inside your body that people '
                          'can\'t touch or get to....\'HOPE\'. Andy becomes '
                          'friends with prison \'fixer\' Red, and Andy '
                          'epitomizes why it is crucial to have dreams. His '
                          'spirit and determination lead us into a world full '
                          'of imagination, one filled with courage and desire.'
                          ' Will Andy ever realize his dreams?')

        assert len(plots) >= 5
        assert expected_plot3 in plots

    def test_get_credits_data(self):
        credits = self.imdb._get_credits_data('tt0111161')
        expected_credits = load_test_data('get_credits_tt0111161.json')

        assert len(expected_credits) <= len(credits)
        for index, credit_item in enumerate(expected_credits):
            assert (sorted(credit_item,
                           key=itemgetter(1)) == sorted(credits[index],
                                                        key=itemgetter(1)))

    def test_get_credits_non_existant_title(self):

        with pytest.raises(HTTPError):
            self.imdb._get_credits_data('tt-non-existant-id')

    def test_get_reviews_data(self):
        reviews = self.imdb._get_reviews_data('tt0111161')
        assert len(reviews) == 10

        expected_review_keys = [
            'status', 'user_score', 'text', 'summary', 'user_score_count',
            'date', 'user_name'
        ]
        # other optional keys: user_rating, user_location

        # results are changeable so check on data structure
        for review in reviews:
            for key in expected_review_keys:
                assert key in review.keys()

    def test_get_title_reviews(self):
        reviews = self.imdb.get_title_reviews('tt0111161')
        assert 10 == len(reviews)

        assert reviews[0].username == 'carflo'
        assert reviews[0].date == '2003-11-26'
        assert reviews[0].summary == 'Tied for the best movie I have ever seen'

    def test_get_title_reviews_limit(self):
        reviews = self.imdb.get_title_reviews('tt2294629', max_results=20)
        assert 20 == len(reviews)

        reviews = self.imdb.get_title_reviews('tt2294629', max_results=31)
        assert 31 == len(reviews)

    def test_title_reviews_non_existant_title(self):

        with pytest.raises(HTTPError):
            self.imdb.get_title_reviews('tt-non-existant-id')

    def test_title_exists(self):
        result = self.imdb.title_exists('tt2322441')
        assert True is result

    def test_title_exists_non_existant_title(self):
        result = self.imdb.title_exists('tt0000000')
        assert False is result

    def test_search_for_title_searching_title(self):
        results = self.imdb.search_for_title('Shawshank redemption')
        expected_top_results = [
            {
                'imdb_id': 'tt0111161',
                'title': 'The Shawshank Redemption',
                'year': '1994'
            },
            {
                'imdb_id': 'tt0265738',
                'title': 'The SharkTank Redemption',
                'year': '2000'
            },
        ]

        assert 14 == len(results)
        assert expected_top_results == results[:2]

    def test_search_for_person(self):
        results = self.imdb.search_for_person('Andrew Lloyd Webber')

        assert 12 == len(results)
        expected_results = [{
            'name': 'Andrew Lloyd Webber',
            'imdb_id': 'nm0515908'
        }, {
            'name': 'Andrew Lloyd Walker',
            'imdb_id': 'nm3530714'
        }, {
            'name': 'Robert Lloyd',
            'imdb_id': 'nm0516115'
        }, {
            'name': 'Madeleine Gurdon',
            'imdb_id': 'nm2967056'
        }, {
            'name': 'Andrew Webberley',
            'imdb_id': 'nm1422165'
        }, {
            'name': 'Imogen Lloyd Webber',
            'imdb_id': 'nm2622250'
        }, {
            'name': 'Robert Floyd',
            'imdb_id': 'nm0283292'
        }, {
            'name': 'Andrew Webber',
            'imdb_id': 'nm0916341'
        }, {
            'name': 'Andrew Webber',
            'imdb_id': 'nm1267376'
        }, {
            'name': 'Andrew Webber',
            'imdb_id': 'nm3404464'
        }, {
            'name': 'Mark Webber',
            'imdb_id': 'nm1902514'
        }, {
            'name': 'Andrew Webber',
            'imdb_id': 'nm5409221'
        }]
        assert (sorted(expected_results, key=itemgetter('imdb_id')) == sorted(
            results, key=itemgetter('imdb_id')))

    def test_search_for_title_no_results(self):
        results = self.imdb.search_for_title('898582da396c93d5589e0')
        assert [] == results

    def test_top_250(self):
        results = self.imdb.top_250()

        assert 250 == len(results)

        expected_keys = [
            'rating', 'tconst', 'title', 'image', 'num_votes', 'year',
            'can_rate', 'type'
        ]
        # results are changeable so check on data structure
        for result in results:
            assert sorted(expected_keys) == sorted(result.keys())

    def test_popular_shows(self):
        results = self.imdb.popular_shows()

        assert 50 == len(results)

        expected_keys = [
            'tconst',
            'title',
            # 'image',  # optional key
            'year',
            'principals',
            'type'
        ]
        # results are changeable so check on data structure
        for index, result in enumerate(results):
            assert set(expected_keys).issubset(set(result.keys())) is True

    def test_get_title_by_id_returns_none_when_is_episode(self):
        imdb = Imdb(exclude_episodes=True)
        assert imdb.get_title_by_id('tt0615090') is None

    @patch('imdbpie.imdbpie.Imdb._get')
    def test_get_title_by_id_returns_none_when_no_resp(self, mock_get):
        mock_get.return_value = None
        assert self.imdb.get_title_by_id('tt0111161') is None

    def test_get_person_by_id(self):
        person = self.imdb.get_person_by_id('nm0000151')

        assert person.name == 'Morgan Freeman'
        assert person.imdb_id == 'nm0000151'
        assert is_valid_url(person.photo_url) is True

    @patch('imdbpie.imdbpie.Imdb._get')
    def test_get_person_by_id_returns_none_when_no_resp(self, mock_get):
        mock_get.return_value = None
        assert self.imdb.get_person_by_id('nm0000151') is None

    def test_get_title_by_id(self):
        title = self.imdb.get_title_by_id('tt0111161')

        assert title.title == 'The Shawshank Redemption'
        assert title.year == 1994
        assert title.type == 'feature'
        assert title.tagline == ('Fear can hold you prisoner. '
                                 'Hope can set you free.')
        assert isinstance(title.plots, list) is True
        assert len(title.plots) >= 5
        assert isinstance(title.rating, float) is True
        assert sorted(title.genres) == sorted(['Crime', 'Drama'])
        assert isinstance(title.votes, int) is True
        assert title.runtime == 8520

        assert is_valid_url(title.poster_url) is True
        assert is_valid_url(title.cover_url) is True
        assert title.release_date == '1994-10-14'
        assert title.certification == 'R'

        for trailer_url in title.trailer_image_urls:
            assert is_valid_url(trailer_url) is True

        expected_plot_outline = (
            'Two imprisoned men bond over a number '
            'of years, finding solace and eventual redemption through acts '
            'of common decency.')
        assert title.plot_outline == expected_plot_outline

        assert isinstance(title.directors_summary[0], Person)
        assert len(title.directors_summary) == 1

        assert len(title.creators) == 0
        assert len(title.cast_summary) == 4

        expected_cast_names = [
            'Tim Robbins', 'Morgan Freeman', 'Bob Gunton', 'William Sadler'
        ]
        for name in expected_cast_names:
            assert name in [p.name for p in title.cast_summary]

        expected_writers = ['Stephen King', 'Frank Darabont']
        for name in expected_writers:
            assert name in [p.name for p in title.writers_summary]

        assert len(title.credits) >= 327
        assert (sorted(load_test_data('expected_credits.json')) == sorted(
            [p.imdb_id for p in title.credits]))
        assert isinstance(title.credits[10], Person)

        assert len(title.trailers) == 3

    def test_get_title_by_id_using_proxy(self):
        imdb = Imdb(locale='en_US', cache=False, anonymize=True)
        title = imdb.get_title_by_id('tt0111161')

        assert title.title == 'The Shawshank Redemption'
        assert title.year == 1994
        assert title.type == 'feature'
        assert title.tagline == ('Fear can hold you prisoner. '
                                 'Hope can set you free.')
        assert isinstance(title.plots, list) is True
        assert len(title.plots) >= 5
        assert isinstance(title.rating, float) is True
        assert sorted(title.genres) == sorted(['Crime', 'Drama'])
        assert isinstance(title.votes, int) is True
        assert title.runtime == 8520
        assert len(title.trailers) == 3

    def test_get_title_by_id_redirection_result(self):
        assert self.imdb.get_title_by_id('tt0000021') is None

    def test_get_title_by_id_excludes_episodes(self):
        assert self.imdb.get_title_by_id('tt3181538') is not None

        imdb = Imdb(exclude_episodes=True)
        title = imdb.get_title_by_id('tt3181538')

        assert title is None

    def test_get_episodes(self):
        assert self.imdb.get_title_by_id('tt0303461') is not None

        imdb = Imdb()
        episodes = imdb.get_episodes('tt0303461')
        assert episodes is not None

        assert len(episodes) == 14
        episode_1 = episodes[0]
        assert episode_1.imdb_id == "tt0579539"
        assert episode_1.type == "tv_episode"
        assert episode_1.title == u'The Train Job'
        assert episode_1.series_name == 'Firefly'
        assert episode_1.release_date == "2002-09-20"
        assert episode_1.year == 2002

    def test_get_episodes_raises_when_exclude_episodes_enabled(self):
        imdb = Imdb(locale='en_US', cache=False, exclude_episodes=True)
        with pytest.raises(ValueError):
            imdb.get_episodes('tt0303461')

    def test_get_person_images(self):
        person_images = self.imdb.get_person_images('nm0000032')

        assert len(person_images) >= 200
        for person_image in person_images[:10]:
            assert person_image.caption is not None
            assert is_valid_url(person_image.url) is True
            assert isinstance(person_image.width, int)
            assert isinstance(person_image.height, int)

    def test_get_title_images(self):
        title_images = self.imdb.get_title_images('tt0111161')

        assert len(title_images) >= 38

        for image in title_images:
            assert isinstance(image, Image) is True

    def test_get_title_by_id_raises_not_found(self):

        with pytest.raises(HTTPError):
            self.imdb.get_title_by_id('tt9999999')
def main(title):
    reviews = []

    # Search tweets
    ts = TwitterSearch(
        consumer_key=os.environ.get('TWITTER_CONSUMER_KEY'),
        consumer_secret=os.environ.get('TWITTER_CONSUMER_SECRET'),
        access_token=os.environ.get('TWITTER_ACCESS_TOKEN'),
        access_token_secret=os.environ.get('TWITTER_TOKEN_SECRET'))
    try:
        ts.connect()

        tso = TwitterSearchOrder()  # create a TwitterSearchOrder object
        tso.setKeywords([
            '#' + title + 'Movie'
        ])  # let's define all words we would like to have a look for
        tso.setLanguage('en')  # we want to see German tweets only
        tso.setIncludeEntities(
            False)  # and don't give us all those entity information

        # add tweets to reviews list
        results = ts.getSearchResults(tso)

    except TwitterSearchException as e:  # take care of all those ugly errors if there are some
        logging.exception(str(e))
        ts.cleanUp()
    else:
        for offset in range(results.getSize()):
            if offset > 9:
                break
            tweet = results.getTweetByIndex(offset)
            reviews.append({
                'author':
                tweet.getUserName(),
                'summary':
                tweet.getText(),
                'text':
                tweet.getText(),
                'date':
                parser.parse(tweet.getCreatedDate(), ignoretz=True),
                'source':
                'Twitter'
            })
    finally:
        ts.disconnect()

    # Search Imdb
    imdb = Imdb()
    try:
        response = imdb.search_for_title(title)[0]
        title_id = response['imdb_id']
        response = imdb.get_title_reviews(title_id, max_results=10)
    except IndexError as e:
        logging.exception(str(e))
    else:
        for review in response:
            reviews.append({
                'author': review.username,
                'summary': review.summary,
                'text': review.text,
                'date': parser.parse(review.date, ignoretz=True),
                'source': 'IMDB'
            })

    # Search NYTimes
    url = "https://api.nytimes.com/svc/movies/v2/reviews/search.json"
    data = {'query': title, 'api-key': os.environ.get('NY_TIMES_API_KEY')}
    response = requests.get(url, data)
    count = 0
    for review in response.json()['results']:
        if count > 9:
            break
        reviews.append({
            'author':
            review['byline'],
            'summary':
            review['headline'],
            'text':
            review['summary_short'],
            'date':
            parser.parse(review['date_updated'], ignoretz=True),
            'source':
            'NYTimes'
        })
        count += 1

    # Sort reviews by date
    reviews.sort(cmp=_cmprev)

    # Print reviews
    for review in reviews:
        print('(%s) @%s: %s [Source: %s]' %
              (review['date'].strftime('%Y-%m-%d'), review['author'],
               review['summary'], review['source']))