class Watcher: def __init__(self): self.imdb = Imdb(anonymize=True) self.tracked_shows = self.get_shows() self.static_dir = os.path.join(os.path.dirname(__file__), '../static/images') def get_shows(self): """ gets all current popular shows from imdb """ shows = self.imdb.popular_shows() tracked_shows = [] for show in shows: tracked_shows_d = {} tracked_shows_d['id'] = show['tconst'] tracked_shows_d['title'] = show['title'] tracked_shows_d['poster'] = show['image']['url'] tracked_shows.append(tracked_shows_d) return tracked_shows def get_show_id(self, show_title): """ Gets show title id args: show_title: name of show to be queried returns: show_id: id of show """ for show in self.tracked_shows: if show_title == show['title']: return show['id'] def get_episodes(self, show_id): """ Gets all episodes of a given show args: show_id: tconst id from imdb returns: ist of episodes """ return self.imdb.get_episodes(show_id) def get_all_episodes(self): """ Gets all episodes args: None returns: list of episodes for all shows""" programs = {} for show in self.tracked_shows: programs[show['title']] = self.get_episodes(show['id']) return programs def get_poster(self, show_title): """ gets the img url for the poster of a show args: show_title: title of show returns: dictionary with {show_title: poster_url} """ #print(self.tracked_shows[show_id]['poster']) for show in self.tracked_shows: if show['title'] == show_title: return {show_title: show['poster']} def save_posters(self, urls, title): title = self.sanitize_title(title) dest = '{}/{}.jpg'.format(self.static_dir, title) urllib.request.urlretrieve(url, dest) def sanitize_title(self, title): forbidden = ('<', '>', ':', '"', '/', '\\', '|', '?', '*') for char in forbidden: title = title.replace(char, '') return title def get_show_titles(self): """ Gets show titles args: None returns: list of show titles """ return [show['title'] for show in self.tracked_shows]
class IMDBGraph: def __init__(self, anonymize=True): self._imdb = Imdb(anonymize=anonymize, cache=True) self._graph = nx.Graph() def _add_node(self, name, nodetype): ''' Add simple node without attributes ''' if name not in self._graph.nodes(): self._graph.add_node(name, node=nodetype) def addPerson(self, idname): ''' add New actor/actress no the graph ''' actor = self._imdb.get_person_by_id(idname) self._graph.add_node(actor.name) def addMovie(self, idname): movie = self._imdb.get_title_by_id(idname) self._add_node(movie.title, 'movie') logging.info("Loading {0}".format(idname)) def addMovieAndConnect(self, idname): movie = self._imdb.get_title_by_id(idname) self._add_node(movie.title, 'movie') self._add_node(movie.year, 'year') logging.info("Loading {0}".format(idname)) for genre in movie.genres: self._add_node(genre, 'genre') self._graph.add_edge(movie.title, genre) for person in movie.credits: self._add_node(person.name, 'actor') self._graph.add_edge(movie.title, person.name, weight=movie.rating + movie.votes, rating=movie.rating, votes=movie.votes) for person in movie.cast_summary: self._add_node(person.name, "actor") self._graph.add_edge(movie.title, person.name) def addPopular(self, limit=2): ''' Add popular movies and shows ''' shows = self._imdb.popular_shows() #movies = self._imdb.top_250() if limit > len(shows): limit = len(shows) for show in shows[:limit]: self.addMovie(show['tconst']) def removeNode(self, nodename): self._graph.remove_node(nodename) def addEdge(self, innode, outnode, prop=None): if innode not in self._graph: raise Exception("{0} not in graph".format(innode)) if outnode not in self._graph: raise Exception("{0} not in graph".format(outnode)) self._graph.add_edge(innode,outnode, prop=prop) def components(self): comp = nx.connected_components(self._graph) degree = nx.degree(self._graph) def avg_degree(self): ''' Return average number of degree for each node ''' return nx.average_neighbor_degree(self._graph) def avg_degree_connectivity(self): return nx.average_degree_connectivity(self._graph) def clustering(self): ''' Compute a bipartite clustering coefficient for nodes. ''' return nx.clustering(self._graph) def get_item(self, item): ''' Getting node from the graph ''' return self._graph.node[item] def filter_edges(self, param, func): for n, nbrs in self._graph.adjacency_iter(): for nbr, attr in nbrs.items(): if len(attr) == 0 or param not in attr: continue data = attr[param] if func(data): yield (n, nbr, data) def cliques(self): ''' return all cluques from the graph ''' return nx.find_cliques(self._graph) def stat(self): ''' Return basic statistics of the graph ''' return {'nodes': self._graph.number_of_nodes(), 'edges': self._graph.number_of_edges(), 'density': nx.density(self._graph)} def save(self, outpath): ''' save graph to the file ''' pass
class IMDBGraph: def __init__(self, anonymize=True): self._imdb = Imdb(anonymize=anonymize, cache=True) self._graph = nx.Graph() def _add_node(self, name, nodetype): ''' Add simple node without attributes ''' if name not in self._graph.nodes(): self._graph.add_node(name, node=nodetype) def addPerson(self, idname): ''' add New actor/actress no the graph ''' actor = self._imdb.get_person_by_id(idname) self._graph.add_node(actor.name) def addMovie(self, idname): movie = self._imdb.get_title_by_id(idname) self._add_node(movie.title, 'movie') logging.info("Loading {0}".format(idname)) def addMovieAndConnect(self, idname): movie = self._imdb.get_title_by_id(idname) self._add_node(movie.title, 'movie') self._add_node(movie.year, 'year') logging.info("Loading {0}".format(idname)) for genre in movie.genres: self._add_node(genre, 'genre') self._graph.add_edge(movie.title, genre) for person in movie.credits: self._add_node(person.name, 'actor') self._graph.add_edge(movie.title, person.name, weight=movie.rating + movie.votes, rating=movie.rating, votes=movie.votes) for person in movie.cast_summary: self._add_node(person.name, "actor") self._graph.add_edge(movie.title, person.name) def addPopular(self, limit=2): ''' Add popular movies and shows ''' shows = self._imdb.popular_shows() #movies = self._imdb.top_250() if limit > len(shows): limit = len(shows) for show in shows[:limit]: self.addMovie(show['tconst']) def removeNode(self, nodename): self._graph.remove_node(nodename) def addEdge(self, innode, outnode, prop=None): if innode not in self._graph: raise Exception("{0} not in graph".format(innode)) if outnode not in self._graph: raise Exception("{0} not in graph".format(outnode)) self._graph.add_edge(innode, outnode, prop=prop) def components(self): comp = nx.connected_components(self._graph) degree = nx.degree(self._graph) def avg_degree(self): ''' Return average number of degree for each node ''' return nx.average_neighbor_degree(self._graph) def avg_degree_connectivity(self): return nx.average_degree_connectivity(self._graph) def clustering(self): ''' Compute a bipartite clustering coefficient for nodes. ''' return nx.clustering(self._graph) def get_item(self, item): ''' Getting node from the graph ''' return self._graph.node[item] def filter_edges(self, param, func): for n, nbrs in self._graph.adjacency_iter(): for nbr, attr in nbrs.items(): if len(attr) == 0 or param not in attr: continue data = attr[param] if func(data): yield (n, nbr, data) def cliques(self): ''' return all cluques from the graph ''' return nx.find_cliques(self._graph) def stat(self): ''' Return basic statistics of the graph ''' return { 'nodes': self._graph.number_of_nodes(), 'edges': self._graph.number_of_edges(), 'density': nx.density(self._graph) } def save(self, outpath): ''' save graph to the file ''' pass
from imdbpie import Imdb import pandas as pd from halo import Halo imdb = Imdb(anonymize=True) series = imdb.popular_shows() cols = [ "Title", "Actors", "Genres", "Rating", "Running Time", "Year", "Certification", "Writers" ] df = pd.DataFrame(columns=cols) spinner = Halo(text='Loading', spinner='dots') spinner.start() for j, el in enumerate(series): show = imdb.get_title_by_id(el["tconst"]) title = show.title actors = ', '.join(i["name"] for i in el["principals"]) genres = ', '.join(i for i in show.genres) genres = ', '.join(i for i in show.genres) rating = show.rating rtime = show.runtime year = show.year cert = show.certification writers = ', '.join(i.name for i in show.writers_summary) spinner.text = "Running - " + str((j + 1) / 0.5) + "%"