def test_graph_mutual_link(self): g = graph.Graph() m = movie.Movie('https://en.wikipedia.org/wiki/Marie_(film)') m.set_name('Marie') m.set_year(1985) m.set_gross(3712170) a1 = actor.Actor('https://en.wikipedia.org/wiki/Sissy_Spacek') a1.set_name('Sissy Spacek') a1.set_age(69) a2 = actor.Actor('https://en.wikipedia.org/wiki/Jeff_Daniels') a2.set_name('Jeff Daniels') a2.set_age(64) g.add_movie(m) g.add_actor(a1) g.add_actor(a2) g.movies['Marie'].add_actor(a1) g.movies['Marie'].add_actor(a2) g.add_edge(m, a1, 69) g.add_edge(m, a2, 64) assert g.is_connected(m, a1) and g.is_connected(a1, m) assert g.is_connected(m, a2) and g.is_connected(a2, m) JSON.store_to_Json(g, 'test2.json')
def test_store_to_Json(self): g = graph.Graph() m = movie.Movie('https://en.wikipedia.org/wiki/Marie_(film)') m.set_name('Marie (film)') m.set_year(1985) m.set_gross(3712170) a1 = actor.Actor('https://en.wikipedia.org/wiki/Sissy_Spacek') a1.set_name('Sissy Spacek') a1.set_age(69) a2 = actor.Actor('https://en.wikipedia.org/wiki/Jeff_Daniels') a2.set_name('Jeff Daniels') a2.set_age(64) g.add_movie(m) g.add_actor(a1) g.add_actor(a2) # m.add_actor(a1) # m.add_actor(a2) # a1.add_movie(m) # a2.add_movie(m) # movieList = [m] # actorList = [a1, a2] g.add_edge(m, a1) g.add_edge(m, a2) JSON.store_to_Json(g, 'test.json')
def test_query(self): g = graph.Graph() m = movie.Movie('https://en.wikipedia.org/wiki/Marie_(film)') m.set_name('Marie (film)') m.set_year(1985) m.set_gross(3712170) a1 = actor.Actor('https://en.wikipedia.org/wiki/Sissy_Spacek') a1.set_name('Sissy Spacek') a1.set_age(69) a2 = actor.Actor('https://en.wikipedia.org/wiki/Jeff_Daniels') a2.set_name('Jeff Daniels') a2.set_age(64) g.add_movie(m) g.add_actor(a1) g.add_actor(a2) g.add_edge(m, a1) g.add_edge(m, a2) assert g.find_movie_gross(m) == m.gross #(g.list_actor_movies(a1)) assert g.list_actor_movies(a1) == [m.movie_name] assert g.list_actor_movies(a2) == [m.movie_name] assert g.list_movie_actors(m) == [a1.actor_name, a2.actor_name] assert g.list_oldest_x_actors(1) == [a1] assert g.list_movies_for_a_year(1970) == [] assert g.list_movies_for_a_year(1980) == [] assert g.list_movies_for_a_year(1985) == [m] assert g.list_actors_for_a_year(1800) == [] assert g.list_actors_for_a_year(2019 - 69) == [a1]
def test_graph(self): g = graph.Graph() assert isinstance(g.movies, dict) assert isinstance(g.actors, dict) m1 = movie.Movie('https://en.wikipedia.org/wiki/Marie_(film)') m1.set_name("Marie") g.add_movie(m1) assert g.movies == {m1.movie_name: m1} a1 = actor.Actor('https://en.wikipedia.org/wiki/Sissy_Spacek') a1.set_name('Sissy Spacek') g.add_actor(a1) assert g.actors == {a1.actor_name: a1} g.add_edge(m1, a1, 69) # date of birth as edge weight assert (m1.actorList == [a1.actor_name]) assert (a1.movieList == [m1.movie_name]) a2 = actor.Actor('https://en.wikipedia.org/wiki/Jeff_Daniels') a2.set_name('eff Daniels') g.add_actor(a2) g.add_edge(a2, m1, 64) assert a2.movieList == [m1.movie_name] assert m1.actorList == [a1.actor_name, a2.actor_name] assert g.is_connected(a2, m1) is True assert g.is_connected(m1, a2) is True assert g.is_connected(a1, m1) is True assert g.is_connected(m1, a1) is True assert g.get_neighbors(m1) == [a1.actor_name, a2.actor_name] assert g.get_neighbors(a1) == [m1.movie_name] assert g.get_neighbors(a2) == [m1.movie_name] a3 = actor.Actor('https://en.wikipedia.org/wiki/Morgan_Freeman') g.add_actor(a3) assert g.get_neighbors(a3) == [] g.add_edge(m1, a2, 60) # already exist this edge assert m1.actorList == g.get_neighbors(m1) assert a1.movieList == g.get_neighbors(a1) assert a2.movieList == g.get_neighbors(a2)
def test_movie(self): m = movie.Movie( 'https://en.wikipedia.org/wiki/Corridor_of_Mirrors_(film)') m.set_name('Corridor of Mirrors') assert isinstance(m, movie.Movie) m.set_year(1948) assert (m.year == 1948) m.set_gross(100000) assert m.gross == 100000 a = actor.Actor('https://en.wikipedia.org/wiki/Eric_Portman') a.set_name('Eric Portman') m.add_actor(a) assert m.actorList == [a.actor_name]
def test_actor(self): a = actor.Actor('https://en.wikipedia.org/wiki/Morgan_Freeman') a.set_name('Morgan Freeman') assert isinstance(a, actor.Actor) m1 = movie.Movie('https://en.wikipedia.org/wiki/Brubaker') m1.set_name('Brubaker') m1.set_year(1980) a.add_movie(m1) assert a.movieList == [m1.movie_name] m2 = movie.Movie('https://en.wikipedia.org/wiki/Marie_(film)') m2.set_name('Marie') m2.set_year(1985) a.add_movie(m2) assert a.movieList == [m1.movie_name, m2.movie_name]
def get_actor_from_movie(url, urlQueue, g): if wiki + url not in g.movies.keys(): g.movies[wiki + url] = movie.Movie(wiki + url) g.movies[wiki + url].set_gross(get_movie_gross(url)) g.movies[wiki + url].set_year(get_movie_year(url)) soup = read_url(url) if not soup: logging.error('cannot open the url ', wiki + url) return cast = soup.find_all('span', {'id': ['Cast']}) if not cast: logging.warning( 'Soup find_all Warning: cannot find any cast information') return cast_list = cast[0].find_next('ul') # unordered list cast_link = cast_list.find_all('a') if not cast_link: logging.warning( 'Soup find_all Warning: cannot find any actor link from this film page' ) actors = [] for link in cast_link: actor_url = link.get('href') new_actor = actor.Actor(wiki + actor_url) new_actor.set_name(link.get('title')) new_actor.set_age(get_actor_age(actor_url)) g.add_actor(new_actor) print(new_actor.actor_name, new_actor.age) g.movies[wiki + url].add_actor(new_actor) # add actor to movie actors.append(new_actor) urlQueue.append(actor_url) return actors
def retrieve_from_Json(file): wiki = 'https://en.wikipedia.org' data = json.loads(open(file).read()) actor_dict = {} movie_dict = {} g = graph.Graph() #print(data) actor_data = data[0] movie_data = data[1] actor_list = [*actor_data] movie_list = [*movie_data] for key, value in actor_data.items(): if value['json_class'] == 'Actor': if key in actor_dict: # exist before new_actor = actor_dict[key] else: # get data related to actor actor_name = value['name'] actor_age = value['age'] actor_gross = value['total_gross'] actor_movies = value['movies'] # construct new actor class and set info actor_url = wiki + '/wiki/' + actor_name.replace(" ", "_") new_actor = actor.Actor(actor_url) new_actor.set_name(actor_name) new_actor.set_age(actor_age) new_actor.set_gross(actor_gross) g.add_actor(new_actor) # # add a new actor vertex to the graph for m in actor_movies: new_actor.movieList.append(m) actor_dict[actor_name] = new_actor # add new movie for m in actor_movies: if m not in movie_data: # not exist in the movie database continue movie_item = movie_data[m] if m in movie_dict: # exist before new_movie = movie_dict[m] else: movie_name = movie_item['name'] movie_url = movie_item['wiki_page'] movie_gross = movie_item['box_office'] movie_year = movie_item['year'] movie_actors = movie_item['actors'] # construct new movie class and set info new_movie = movie.Movie(movie_url) new_movie.set_name(movie_name) new_movie.set_year(movie_year) new_movie.set_gross(movie_gross) g.add_movie( new_movie) # add a new movie vertex to the graph # add new edges to the graph for a in movie_actors: new_movie.actorList.append(a) movie_dict[m] = new_movie g.add_edge(new_actor, new_movie, movie_gross) # for key, value in movie_data.items(): # # if value['json_class'] == 'Movie': # # get data related to movie # movie_name = value['name'] # movie_url = value['wiki_page'] # movie_gross = value['box_office'] # movie_year = value['year'] # movie_actors = value['actors'] # # # construct new movie class and set info # new_movie = movie.Movie(movie_url) # new_movie.set_name(movie_name) # new_movie.set_year(movie_year) # new_movie.set_gross(movie_gross) # g.add_movie(new_movie) # add a new movie vertex to the graph # # add new edges to the graph # for a in movie_actors: # new_movie.actorList.append(a) return g, actor_data, movie_data