Exemple #1
0
class Watcher:
    def __init__(self):
        self.imdb = Imdb(anonymize=True)
        self.tracked_shows = self.get_shows()
        self.static_dir = os.path.join(os.path.dirname(__file__),
                                       '../static/images')

    def get_shows(self):
        """
        gets all current popular shows from imdb
        """
        shows = self.imdb.popular_shows()
        tracked_shows = []
        for show in shows:
            tracked_shows_d = {}
            tracked_shows_d['id'] = show['tconst']
            tracked_shows_d['title'] = show['title']
            tracked_shows_d['poster'] = show['image']['url']
            tracked_shows.append(tracked_shows_d)
        return tracked_shows

    def get_show_id(self, show_title):
        """
        Gets show title id

        args:

        show_title: name of show to be queried

        returns:

        show_id: id of show
        """

        for show in self.tracked_shows:
            if show_title == show['title']:
                return show['id']

    def get_episodes(self, show_id):
        """
        Gets all episodes of a given show

        args:

        show_id: tconst id from imdb

        returns:

        ist of episodes
        """
        return self.imdb.get_episodes(show_id)

    def get_all_episodes(self):
        """
        Gets all episodes

        args:

        None

        returns:

        list of episodes for all shows"""

        programs = {}
        for show in self.tracked_shows:
            programs[show['title']] = self.get_episodes(show['id'])

        return programs

    def get_poster(self, show_title):
        """
        gets the img url for the poster of a show

        args:

        show_title: title of show

        returns:

        dictionary with {show_title: poster_url}
        """

        #print(self.tracked_shows[show_id]['poster'])
        for show in self.tracked_shows:
            if show['title'] == show_title:
                return {show_title: show['poster']}

    def save_posters(self, urls, title):
        title = self.sanitize_title(title)
        dest = '{}/{}.jpg'.format(self.static_dir, title)
        urllib.request.urlretrieve(url, dest)

    def sanitize_title(self, title):
        forbidden = ('<', '>', ':', '"', '/', '\\', '|', '?', '*')
        for char in forbidden:
            title = title.replace(char, '')
        return title

    def get_show_titles(self):
        """
        Gets show titles

        args:

        None

        returns:

        list of show titles
        """

        return [show['title'] for show in self.tracked_shows]
Exemple #2
0
class IMDBGraph:
    def __init__(self, anonymize=True):
        self._imdb = Imdb(anonymize=anonymize, cache=True)
        self._graph = nx.Graph()

    def _add_node(self, name, nodetype):
        ''' Add simple node without attributes
        '''
        if name not in self._graph.nodes():
            self._graph.add_node(name, node=nodetype)

    def addPerson(self, idname):
        ''' add New actor/actress no the graph
        '''
        actor = self._imdb.get_person_by_id(idname)
        self._graph.add_node(actor.name)

    def addMovie(self, idname):
        movie = self._imdb.get_title_by_id(idname)
        self._add_node(movie.title, 'movie')
        logging.info("Loading {0}".format(idname))

    def addMovieAndConnect(self, idname):
        movie = self._imdb.get_title_by_id(idname)
        self._add_node(movie.title, 'movie')
        self._add_node(movie.year, 'year')
        logging.info("Loading {0}".format(idname))
        for genre in movie.genres:
            self._add_node(genre, 'genre')
            self._graph.add_edge(movie.title, genre)
        for person in movie.credits:
            self._add_node(person.name, 'actor')
            self._graph.add_edge(movie.title, person.name, weight=movie.rating + movie.votes, rating=movie.rating, votes=movie.votes)
        for person in movie.cast_summary:
            self._add_node(person.name, "actor")
            self._graph.add_edge(movie.title, person.name)

    def addPopular(self, limit=2):
        ''' Add popular movies and shows
        '''
        shows = self._imdb.popular_shows()
        #movies = self._imdb.top_250()
        if limit > len(shows):
            limit = len(shows)
        for show in shows[:limit]:
            self.addMovie(show['tconst'])

    def removeNode(self, nodename):
        self._graph.remove_node(nodename)

    def addEdge(self, innode, outnode, prop=None):
        if innode not in self._graph:
            raise Exception("{0} not in graph".format(innode))
        if outnode not in self._graph:
            raise Exception("{0} not in graph".format(outnode))
        self._graph.add_edge(innode,outnode, prop=prop)

    def components(self):
        comp = nx.connected_components(self._graph)
        degree = nx.degree(self._graph)

    def avg_degree(self):
        ''' Return average number of degree for each node
        '''
        return nx.average_neighbor_degree(self._graph)

    def avg_degree_connectivity(self):
        return nx.average_degree_connectivity(self._graph)

    def clustering(self):
        ''' Compute a bipartite clustering coefficient for nodes.
        '''
        return nx.clustering(self._graph)

    def get_item(self, item):
        ''' Getting node from the graph
        '''
        return self._graph.node[item]

    def filter_edges(self, param, func):
        for n, nbrs in self._graph.adjacency_iter():
            for nbr, attr in nbrs.items():
                if len(attr) == 0 or param not in attr: continue
                data = attr[param]
                if func(data): yield (n, nbr, data)

    def cliques(self):
        ''' return all cluques from the graph
        '''
        return nx.find_cliques(self._graph)

    def stat(self):
        ''' Return basic statistics of the graph
        '''
        return {'nodes': self._graph.number_of_nodes(), 'edges': self._graph.number_of_edges(), 'density': nx.density(self._graph)}

    def save(self, outpath):
        ''' save graph to the file
        '''
        pass
Exemple #3
0
class IMDBGraph:
    def __init__(self, anonymize=True):
        self._imdb = Imdb(anonymize=anonymize, cache=True)
        self._graph = nx.Graph()

    def _add_node(self, name, nodetype):
        ''' Add simple node without attributes
        '''
        if name not in self._graph.nodes():
            self._graph.add_node(name, node=nodetype)

    def addPerson(self, idname):
        ''' add New actor/actress no the graph
        '''
        actor = self._imdb.get_person_by_id(idname)
        self._graph.add_node(actor.name)

    def addMovie(self, idname):
        movie = self._imdb.get_title_by_id(idname)
        self._add_node(movie.title, 'movie')
        logging.info("Loading {0}".format(idname))

    def addMovieAndConnect(self, idname):
        movie = self._imdb.get_title_by_id(idname)
        self._add_node(movie.title, 'movie')
        self._add_node(movie.year, 'year')
        logging.info("Loading {0}".format(idname))
        for genre in movie.genres:
            self._add_node(genre, 'genre')
            self._graph.add_edge(movie.title, genre)
        for person in movie.credits:
            self._add_node(person.name, 'actor')
            self._graph.add_edge(movie.title,
                                 person.name,
                                 weight=movie.rating + movie.votes,
                                 rating=movie.rating,
                                 votes=movie.votes)
        for person in movie.cast_summary:
            self._add_node(person.name, "actor")
            self._graph.add_edge(movie.title, person.name)

    def addPopular(self, limit=2):
        ''' Add popular movies and shows
        '''
        shows = self._imdb.popular_shows()
        #movies = self._imdb.top_250()
        if limit > len(shows):
            limit = len(shows)
        for show in shows[:limit]:
            self.addMovie(show['tconst'])

    def removeNode(self, nodename):
        self._graph.remove_node(nodename)

    def addEdge(self, innode, outnode, prop=None):
        if innode not in self._graph:
            raise Exception("{0} not in graph".format(innode))
        if outnode not in self._graph:
            raise Exception("{0} not in graph".format(outnode))
        self._graph.add_edge(innode, outnode, prop=prop)

    def components(self):
        comp = nx.connected_components(self._graph)
        degree = nx.degree(self._graph)

    def avg_degree(self):
        ''' Return average number of degree for each node
        '''
        return nx.average_neighbor_degree(self._graph)

    def avg_degree_connectivity(self):
        return nx.average_degree_connectivity(self._graph)

    def clustering(self):
        ''' Compute a bipartite clustering coefficient for nodes.
        '''
        return nx.clustering(self._graph)

    def get_item(self, item):
        ''' Getting node from the graph
        '''
        return self._graph.node[item]

    def filter_edges(self, param, func):
        for n, nbrs in self._graph.adjacency_iter():
            for nbr, attr in nbrs.items():
                if len(attr) == 0 or param not in attr: continue
                data = attr[param]
                if func(data): yield (n, nbr, data)

    def cliques(self):
        ''' return all cluques from the graph
        '''
        return nx.find_cliques(self._graph)

    def stat(self):
        ''' Return basic statistics of the graph
        '''
        return {
            'nodes': self._graph.number_of_nodes(),
            'edges': self._graph.number_of_edges(),
            'density': nx.density(self._graph)
        }

    def save(self, outpath):
        ''' save graph to the file
        '''
        pass
Exemple #4
0
from imdbpie import Imdb
import pandas as pd
from halo import Halo

imdb = Imdb(anonymize=True)

series = imdb.popular_shows()

cols = [
    "Title", "Actors", "Genres", "Rating", "Running Time", "Year",
    "Certification", "Writers"
]

df = pd.DataFrame(columns=cols)

spinner = Halo(text='Loading', spinner='dots')

spinner.start()

for j, el in enumerate(series):
    show = imdb.get_title_by_id(el["tconst"])
    title = show.title
    actors = ', '.join(i["name"] for i in el["principals"])
    genres = ', '.join(i for i in show.genres)
    genres = ', '.join(i for i in show.genres)
    rating = show.rating
    rtime = show.runtime
    year = show.year
    cert = show.certification
    writers = ', '.join(i.name for i in show.writers_summary)
    spinner.text = "Running - " + str((j + 1) / 0.5) + "%"