Ejemplo n.º 1
0
class IMDBApi(object):
    def __init__(self, title, year=0):
        self.query = dict(title=title, year=int(year))
        self.rt = RT(rotten)

    def __call__(self):
        matches = []
        year = self.query['year']
        results = self.rt.search(self.query['title'])
        self.partial = copy.copy(results)
        if year:
            results = [x for x in results if x['year']==self.query['year']]
        for r in results:
            matches.append(self.rt.info(r['id']))
        return matches
from rottentomatoes import RT
import time

rt = RT()
#movies = ['fight club','gravity','toy story 3','american hustle','skyfall','jack and jill','basic instinct 2','white out','lost souls','babylon','argo','bears','her','up','a beautiful mind','braveheart','the hurt locker','gambit','paranoia','getaway']
movies = [line.strip('\n') for line in open('movies.txt')]
f = open('reviews-date.txt', 'w')
count = 0
for u in movies:
    movlst = rt.search(u, page_limit=1)
    if movlst:
        if movlst[0][u'id'] != '':
            review = rt.info(movlst[0][u'id'], 'reviews')
            review.viewkeys()
            rlt = review[
                u'reviews']  #review is a dict, value of each key is list, each list element is a dict
            for a in rlt:
                f.write(a[u'quote'] + '\t' + a[u'date'] + '\n')
                f2.write(a[u'date'] + '\n')
                count = count + 1
                #print a[u'quote']
            if len(rlt) == 0:
                print u
            else:
                print count
        else:
            print u
    else:
        print u
    time.sleep(5)
f.close()
from rottentomatoes import RT
import time

rt = RT()
# movies = ['fight club','gravity','toy story 3','american hustle','skyfall','jack and jill','basic instinct 2','white out','lost souls','babylon','argo','bears','her','up','a beautiful mind','braveheart','the hurt locker','gambit','paranoia','getaway']
movies = [line.strip("\n") for line in open("movies.txt")]
f = open("reviews-date.txt", "w")
count = 0
for u in movies:
    movlst = rt.search(u, page_limit=1)
    if movlst:
        if movlst[0][u"id"] != "":
            review = rt.info(movlst[0][u"id"], "reviews")
            review.viewkeys()
            rlt = review[u"reviews"]  # review is a dict, value of each key is list, each list element is a dict
            for a in rlt:
                f.write(a[u"quote"] + "\t" + a[u"date"] + "\n")
                f2.write(a[u"date"] + "\n")
                count = count + 1
                # print a[u'quote']
            if len(rlt) == 0:
                print u
            else:
                print count
        else:
            print u
    else:
        print u
    time.sleep(5)
f.close()
Ejemplo n.º 4
0
class MovieInfo(object):
    def __init__(self, movie, rotten_tomatoe_api_key, tmdb_api_key,
        aws_access_key, aws_secret_key, affiliate_key, rt_id=None, tmdb_id=None):
        self._movie = movie
        # amazon
        self._amazon_product_search = AmazonProductSearch(aws_access_key,
                                        aws_secret_key, affiliate_key)
        # rotten tomatoes
        self._rt = RT(rotten_tomatoe_api_key)
        if rt_id:
            self._rt_data = self._rt.info(rt_id)
        else:    
            self._rt_data = self._rt.search(movie)[0]
        # tmdb
        self._tmdb = tmdb
        self._tmdb.configure(tmdb_api_key)
        movie = self._tmdb.Movies(movie, limit=True,
                    expected_release_date=self._rt_data['release_dates']['theater']).get_best_match()
        self._tmdb_data = self._tmdb.Movie(movie[1]['id'])
        # youtube
        self._yt_service = gdata.youtube.service.YouTubeService()

    def get_amazon_purchase_links(self, top_cast, runtime):
        products = self._amazon_product_search.item_search(self._movie, 
                    top_cast, runtime)
        return products

    @property
    def cast(self):
        '''
        Returns the names of the full cast for this movie
        '''

        full_cast = self._rt.info(self._rt_data['id'], 'cast')
        names = [cast['name'] for cast in full_cast['cast']]
        return names

    @property
    def critic_reviews(self):
        '''
        Returns a list of critic reviews for this movie. The list
        is componsed of json document.
        '''

        reviews = self._rt.info(self._rt_data['id'], 'reviews')
        return reviews['reviews']

    @property
    def critics_score(self):
        '''
        Returns the rotten tomatoe critic score for this movie
        '''
        return self._rt_data['ratings']['critics_score']

    @property
    def director(self):
        '''
        Returns a list of directors for this movie
        '''

        return self._tmdb_data.get_director()

    @property
    def genres(self):
        '''
        Returns the genres of this movie, supplied by tmdb
        '''

        genres = self._tmdb_data.get_genres()
        genres = [genre['name'].lower() for genre in genres]
        return genres

    @property
    def imdb_id(self):
        '''
        Returns a list of directors for this movie
        '''
        try:
            return "tt" + self._rt_data['alternate_ids']['imdb']
        except:
            return self._tmdb_data.get_imdb_id()

    @property
    def poster(self):
        '''
        Returns the poster of the movie, in its original size
        '''
        return self._tmdb_data.get_poster()

    @property
    def runtime(self):
        '''
        Return the runtime of this movie in minues
        '''
        try:
            return int(self._rt_data['runtime'])
        except:
            return int(self._tmdb_data.get_runtime())

    @property
    def release_date(self):
        '''
        Returns this movie's release date in {year}-{month}-{day} format
        '''
        try:
            return parser.parse(self._rt_data['release_dates']['theater'])
        except:
            return parser.parse(self._tmdb_data.get_release_date())

    @property
    def similar_movies(self):
        '''
        Returns a list of imdb ids of movies that are similar to this one
        '''

        movies = self._rt.info(self._rt_data['id'], 'similar')['movies']

        # if movie is none or len(movies) == 0:
        # search google current movie title +
        # parse DOM for

        return movies

    @property
    def synopsis(self):
        '''
        Returns this movie's synopsis
        '''
        synopsis = self._rt_data['synopsis']
        if len(synopsis) == 0:
            synopsis = self._tmdb_data.get_overview()
        return synopsis

    @property
    def title(self):
        '''
        Returns this movie's title
        '''

        return self._rt_data['title']

    @property
    def trailers(self, limit=3):
        '''
        This function returns a list of trailers for this movie.
        
        We will use TMDB's data if it returns  3 or more trailers. If not,
        we will query youtube with the search term: "{movie_name} trailer
        {release_year}" to find trailers for this movie.

        Returns a list of youtube ids of the trailers
        '''
        trailers = self._tmdb_data.get_trailers()['youtube']
        if len(trailers) > limit:
            return [trailer['source'] for trailer in trailers]
        else:
            release_year = str(self.release_date).split('-')[0]
            query = gdata.youtube.service.YouTubeVideoQuery()
            query.vq = "{title} trailer {release_year} ".format(
                            title=self._movie, release_year=release_year)
            query.orderby = 'relevance'

            feed = self._yt_service.YouTubeQuery(query)
            entries = self._remove_long_youtube_videos(feed.entry[:3])
            entries = self._remove_unrelated_videos(entries)
            unique_entries = self._remove_duplicate_youtube_videos(entries)
            return unique_entries
            
    def _remove_duplicate_youtube_videos(self, entries, threshold=5):
        '''
        This method removes duplicate videos by measuring the runtime
        of the youtube videos.

        If two videos are within 5 seconds (the threshold) of each
        other in runtime,  we assume that one of the videos is a duplicate
        of the other.
        '''
        limit = 3
        videos = []
        for entry in entries:
            runtime = int(entry.media.duration.seconds)
            similar = [runtime >= int(video["runtime"]) - threshold and
                       runtime <= int(video["runtime"]) + threshold for
                       video in videos]
            if not any(similar):
                video_id = self._extract_youtube_id(entry.media.player.url)
                videos.append({"yt_id": video_id, "runtime": runtime})
        yt_ids = [video['yt_id'] for video in videos]
        return yt_ids

    def _remove_long_youtube_videos(self, entries, max_seconds=600):
        entries = filter(lambda entry:
                            int(entry.media.duration.seconds) < max_seconds,
                            entries)
        return entries

    def _remove_unrelated_videos(self, entries):
        entries = filter(lambda entry:
                        fuzzywuzzy.fuzz.ratio(entry.media.title.text.decode('utf-8').lower(), self._movie.lower()) > 20,
                        entries)
        return entries

    def _extract_youtube_id(self, youtube_url):
        video_id = youtube_url.split('v=')[1]
        ampersand_position = video_id.find('&')
        if(ampersand_position != -1):
          video_id = video_id[0:ampersand_position]

        return video_id
Ejemplo n.º 5
0
class MovieInfo(object):
    def __init__(self,
                 movie,
                 rotten_tomatoe_api_key,
                 tmdb_api_key,
                 aws_access_key,
                 aws_secret_key,
                 affiliate_key,
                 rt_id=None,
                 tmdb_id=None):
        self._movie = movie
        # amazon
        self._amazon_product_search = AmazonProductSearch(
            aws_access_key, aws_secret_key, affiliate_key)
        # rotten tomatoes
        self._rt = RT(rotten_tomatoe_api_key)
        if rt_id:
            self._rt_data = self._rt.info(rt_id)
        else:
            self._rt_data = self._rt.search(movie)[0]
        # tmdb
        self._tmdb = tmdb
        self._tmdb.configure(tmdb_api_key)
        movie = self._tmdb.Movies(
            movie,
            limit=True,
            expected_release_date=self._rt_data['release_dates']
            ['theater']).get_best_match()
        self._tmdb_data = self._tmdb.Movie(movie[1]['id'])
        # youtube
        self._yt_service = gdata.youtube.service.YouTubeService()

    def get_amazon_purchase_links(self, top_cast, runtime):
        products = self._amazon_product_search.item_search(
            self._movie, top_cast, runtime)
        return products

    @property
    def cast(self):
        '''
        Returns the names of the full cast for this movie
        '''

        full_cast = self._rt.info(self._rt_data['id'], 'cast')
        names = [cast['name'] for cast in full_cast['cast']]
        return names

    @property
    def critic_reviews(self):
        '''
        Returns a list of critic reviews for this movie. The list
        is componsed of json document.
        '''

        reviews = self._rt.info(self._rt_data['id'], 'reviews')
        return reviews['reviews']

    @property
    def critics_score(self):
        '''
        Returns the rotten tomatoe critic score for this movie
        '''
        return self._rt_data['ratings']['critics_score']

    @property
    def director(self):
        '''
        Returns a list of directors for this movie
        '''

        return self._tmdb_data.get_director()

    @property
    def genres(self):
        '''
        Returns the genres of this movie, supplied by tmdb
        '''

        genres = self._tmdb_data.get_genres()
        genres = [genre['name'].lower() for genre in genres]
        return genres

    @property
    def imdb_id(self):
        '''
        Returns a list of directors for this movie
        '''
        try:
            return "tt" + self._rt_data['alternate_ids']['imdb']
        except:
            return self._tmdb_data.get_imdb_id()

    @property
    def poster(self):
        '''
        Returns the poster of the movie, in its original size
        '''
        return self._tmdb_data.get_poster()

    @property
    def runtime(self):
        '''
        Return the runtime of this movie in minues
        '''
        try:
            return int(self._rt_data['runtime'])
        except:
            return int(self._tmdb_data.get_runtime())

    @property
    def release_date(self):
        '''
        Returns this movie's release date in {year}-{month}-{day} format
        '''
        try:
            return parser.parse(self._rt_data['release_dates']['theater'])
        except:
            return parser.parse(self._tmdb_data.get_release_date())

    @property
    def similar_movies(self):
        '''
        Returns a list of imdb ids of movies that are similar to this one
        '''

        movies = self._rt.info(self._rt_data['id'], 'similar')['movies']

        # if movie is none or len(movies) == 0:
        # search google current movie title +
        # parse DOM for

        return movies

    @property
    def synopsis(self):
        '''
        Returns this movie's synopsis
        '''
        synopsis = self._rt_data['synopsis']
        if len(synopsis) == 0:
            synopsis = self._tmdb_data.get_overview()
        return synopsis

    @property
    def title(self):
        '''
        Returns this movie's title
        '''

        return self._rt_data['title']

    @property
    def trailers(self, limit=3):
        '''
        This function returns a list of trailers for this movie.
        
        We will use TMDB's data if it returns  3 or more trailers. If not,
        we will query youtube with the search term: "{movie_name} trailer
        {release_year}" to find trailers for this movie.

        Returns a list of youtube ids of the trailers
        '''
        trailers = self._tmdb_data.get_trailers()['youtube']
        if len(trailers) > limit:
            return [trailer['source'] for trailer in trailers]
        else:
            release_year = str(self.release_date).split('-')[0]
            query = gdata.youtube.service.YouTubeVideoQuery()
            query.vq = "{title} trailer {release_year} ".format(
                title=self._movie, release_year=release_year)
            query.orderby = 'relevance'

            feed = self._yt_service.YouTubeQuery(query)
            entries = self._remove_long_youtube_videos(feed.entry[:3])
            entries = self._remove_unrelated_videos(entries)
            unique_entries = self._remove_duplicate_youtube_videos(entries)
            return unique_entries

    def _remove_duplicate_youtube_videos(self, entries, threshold=5):
        '''
        This method removes duplicate videos by measuring the runtime
        of the youtube videos.

        If two videos are within 5 seconds (the threshold) of each
        other in runtime,  we assume that one of the videos is a duplicate
        of the other.
        '''
        limit = 3
        videos = []
        for entry in entries:
            runtime = int(entry.media.duration.seconds)
            similar = [
                runtime >= int(video["runtime"]) - threshold
                and runtime <= int(video["runtime"]) + threshold
                for video in videos
            ]
            if not any(similar):
                video_id = self._extract_youtube_id(entry.media.player.url)
                videos.append({"yt_id": video_id, "runtime": runtime})
        yt_ids = [video['yt_id'] for video in videos]
        return yt_ids

    def _remove_long_youtube_videos(self, entries, max_seconds=600):
        entries = filter(
            lambda entry: int(entry.media.duration.seconds) < max_seconds,
            entries)
        return entries

    def _remove_unrelated_videos(self, entries):
        entries = filter(
            lambda entry: fuzzywuzzy.fuzz.ratio(
                entry.media.title.text.decode('utf-8').lower(),
                self._movie.lower()) > 20, entries)
        return entries

    def _extract_youtube_id(self, youtube_url):
        video_id = youtube_url.split('v=')[1]
        ampersand_position = video_id.find('&')
        if (ampersand_position != -1):
            video_id = video_id[0:ampersand_position]

        return video_id