Example #1
0
    def test_graph_mutual_link(self):
        g = graph.Graph()

        m = movie.Movie('https://en.wikipedia.org/wiki/Marie_(film)')
        m.set_name('Marie')
        m.set_year(1985)
        m.set_gross(3712170)

        a1 = actor.Actor('https://en.wikipedia.org/wiki/Sissy_Spacek')
        a1.set_name('Sissy Spacek')
        a1.set_age(69)
        a2 = actor.Actor('https://en.wikipedia.org/wiki/Jeff_Daniels')
        a2.set_name('Jeff Daniels')
        a2.set_age(64)

        g.add_movie(m)
        g.add_actor(a1)
        g.add_actor(a2)

        g.movies['Marie'].add_actor(a1)
        g.movies['Marie'].add_actor(a2)

        g.add_edge(m, a1, 69)
        g.add_edge(m, a2, 64)

        assert g.is_connected(m, a1) and g.is_connected(a1, m)
        assert g.is_connected(m, a2) and g.is_connected(a2, m)

        JSON.store_to_Json(g, 'test2.json')
Example #2
0
    def test_store_to_Json(self):
        g = graph.Graph()

        m = movie.Movie('https://en.wikipedia.org/wiki/Marie_(film)')
        m.set_name('Marie (film)')
        m.set_year(1985)
        m.set_gross(3712170)

        a1 = actor.Actor('https://en.wikipedia.org/wiki/Sissy_Spacek')
        a1.set_name('Sissy Spacek')
        a1.set_age(69)
        a2 = actor.Actor('https://en.wikipedia.org/wiki/Jeff_Daniels')
        a2.set_name('Jeff Daniels')
        a2.set_age(64)

        g.add_movie(m)
        g.add_actor(a1)
        g.add_actor(a2)

        # m.add_actor(a1)
        # m.add_actor(a2)
        # a1.add_movie(m)
        # a2.add_movie(m)

        # movieList = [m]
        # actorList = [a1, a2]

        g.add_edge(m, a1)
        g.add_edge(m, a2)

        JSON.store_to_Json(g, 'test.json')
Example #3
0
    def test_query(self):

        g = graph.Graph()
        m = movie.Movie('https://en.wikipedia.org/wiki/Marie_(film)')
        m.set_name('Marie (film)')
        m.set_year(1985)
        m.set_gross(3712170)

        a1 = actor.Actor('https://en.wikipedia.org/wiki/Sissy_Spacek')
        a1.set_name('Sissy Spacek')
        a1.set_age(69)
        a2 = actor.Actor('https://en.wikipedia.org/wiki/Jeff_Daniels')
        a2.set_name('Jeff Daniels')
        a2.set_age(64)

        g.add_movie(m)
        g.add_actor(a1)
        g.add_actor(a2)
        g.add_edge(m, a1)
        g.add_edge(m, a2)

        assert g.find_movie_gross(m) == m.gross
        #(g.list_actor_movies(a1))
        assert g.list_actor_movies(a1) == [m.movie_name]
        assert g.list_actor_movies(a2) == [m.movie_name]
        assert g.list_movie_actors(m) == [a1.actor_name, a2.actor_name]
        assert g.list_oldest_x_actors(1) == [a1]
        assert g.list_movies_for_a_year(1970) == []
        assert g.list_movies_for_a_year(1980) == []
        assert g.list_movies_for_a_year(1985) == [m]
        assert g.list_actors_for_a_year(1800) == []
        assert g.list_actors_for_a_year(2019 - 69) == [a1]
Example #4
0
    def test_graph(self):
        g = graph.Graph()
        assert isinstance(g.movies, dict)
        assert isinstance(g.actors, dict)

        m1 = movie.Movie('https://en.wikipedia.org/wiki/Marie_(film)')
        m1.set_name("Marie")
        g.add_movie(m1)
        assert g.movies == {m1.movie_name: m1}

        a1 = actor.Actor('https://en.wikipedia.org/wiki/Sissy_Spacek')
        a1.set_name('Sissy Spacek')
        g.add_actor(a1)
        assert g.actors == {a1.actor_name: a1}

        g.add_edge(m1, a1, 69)  # date of birth as edge weight
        assert (m1.actorList == [a1.actor_name])
        assert (a1.movieList == [m1.movie_name])

        a2 = actor.Actor('https://en.wikipedia.org/wiki/Jeff_Daniels')
        a2.set_name('eff Daniels')
        g.add_actor(a2)

        g.add_edge(a2, m1, 64)
        assert a2.movieList == [m1.movie_name]
        assert m1.actorList == [a1.actor_name, a2.actor_name]
        assert g.is_connected(a2, m1) is True
        assert g.is_connected(m1, a2) is True
        assert g.is_connected(a1, m1) is True
        assert g.is_connected(m1, a1) is True

        assert g.get_neighbors(m1) == [a1.actor_name, a2.actor_name]
        assert g.get_neighbors(a1) == [m1.movie_name]
        assert g.get_neighbors(a2) == [m1.movie_name]

        a3 = actor.Actor('https://en.wikipedia.org/wiki/Morgan_Freeman')
        g.add_actor(a3)
        assert g.get_neighbors(a3) == []

        g.add_edge(m1, a2, 60)  # already exist this edge

        assert m1.actorList == g.get_neighbors(m1)
        assert a1.movieList == g.get_neighbors(a1)
        assert a2.movieList == g.get_neighbors(a2)
Example #5
0
 def test_movie(self):
     m = movie.Movie(
         'https://en.wikipedia.org/wiki/Corridor_of_Mirrors_(film)')
     m.set_name('Corridor of Mirrors')
     assert isinstance(m, movie.Movie)
     m.set_year(1948)
     assert (m.year == 1948)
     m.set_gross(100000)
     assert m.gross == 100000
     a = actor.Actor('https://en.wikipedia.org/wiki/Eric_Portman')
     a.set_name('Eric Portman')
     m.add_actor(a)
     assert m.actorList == [a.actor_name]
Example #6
0
    def test_actor(self):
        a = actor.Actor('https://en.wikipedia.org/wiki/Morgan_Freeman')
        a.set_name('Morgan Freeman')
        assert isinstance(a, actor.Actor)
        m1 = movie.Movie('https://en.wikipedia.org/wiki/Brubaker')
        m1.set_name('Brubaker')
        m1.set_year(1980)

        a.add_movie(m1)
        assert a.movieList == [m1.movie_name]
        m2 = movie.Movie('https://en.wikipedia.org/wiki/Marie_(film)')
        m2.set_name('Marie')
        m2.set_year(1985)

        a.add_movie(m2)
        assert a.movieList == [m1.movie_name, m2.movie_name]
Example #7
0
def get_actor_from_movie(url, urlQueue, g):

    if wiki + url not in g.movies.keys():
        g.movies[wiki + url] = movie.Movie(wiki + url)
        g.movies[wiki + url].set_gross(get_movie_gross(url))
        g.movies[wiki + url].set_year(get_movie_year(url))

    soup = read_url(url)
    if not soup:
        logging.error('cannot open the url ', wiki + url)
        return
    cast = soup.find_all('span', {'id': ['Cast']})
    if not cast:
        logging.warning(
            'Soup find_all Warning: cannot find any cast information')
        return

    cast_list = cast[0].find_next('ul')  # unordered list
    cast_link = cast_list.find_all('a')
    if not cast_link:
        logging.warning(
            'Soup find_all Warning: cannot find any actor link from this film page'
        )

    actors = []
    for link in cast_link:
        actor_url = link.get('href')
        new_actor = actor.Actor(wiki + actor_url)
        new_actor.set_name(link.get('title'))
        new_actor.set_age(get_actor_age(actor_url))
        g.add_actor(new_actor)
        print(new_actor.actor_name, new_actor.age)
        g.movies[wiki + url].add_actor(new_actor)  # add actor to movie
        actors.append(new_actor)
        urlQueue.append(actor_url)

    return actors
Example #8
0
def retrieve_from_Json(file):

    wiki = 'https://en.wikipedia.org'

    data = json.loads(open(file).read())

    actor_dict = {}
    movie_dict = {}
    g = graph.Graph()

    #print(data)

    actor_data = data[0]
    movie_data = data[1]

    actor_list = [*actor_data]
    movie_list = [*movie_data]

    for key, value in actor_data.items():

        if value['json_class'] == 'Actor':
            if key in actor_dict:  # exist before
                new_actor = actor_dict[key]
            else:
                # get data related to actor
                actor_name = value['name']
                actor_age = value['age']
                actor_gross = value['total_gross']
                actor_movies = value['movies']

                # construct new actor class and set info
                actor_url = wiki + '/wiki/' + actor_name.replace(" ", "_")
                new_actor = actor.Actor(actor_url)
                new_actor.set_name(actor_name)
                new_actor.set_age(actor_age)
                new_actor.set_gross(actor_gross)
                g.add_actor(new_actor)  # # add a new actor vertex to the graph
                for m in actor_movies:
                    new_actor.movieList.append(m)
                actor_dict[actor_name] = new_actor

                # add new movie
                for m in actor_movies:
                    if m not in movie_data:  # not exist in the movie database
                        continue

                    movie_item = movie_data[m]
                    if m in movie_dict:  # exist before
                        new_movie = movie_dict[m]
                    else:
                        movie_name = movie_item['name']
                        movie_url = movie_item['wiki_page']
                        movie_gross = movie_item['box_office']
                        movie_year = movie_item['year']
                        movie_actors = movie_item['actors']

                        # construct new movie class and set info
                        new_movie = movie.Movie(movie_url)
                        new_movie.set_name(movie_name)
                        new_movie.set_year(movie_year)
                        new_movie.set_gross(movie_gross)
                        g.add_movie(
                            new_movie)  # add a new movie vertex to the graph
                        # add new edges to the graph
                        for a in movie_actors:
                            new_movie.actorList.append(a)
                        movie_dict[m] = new_movie

                    g.add_edge(new_actor, new_movie, movie_gross)

    # for key, value in movie_data.items():
    #
    #     if value['json_class'] == 'Movie':
    #         # get data related to movie
    #         movie_name = value['name']
    #         movie_url = value['wiki_page']
    #         movie_gross = value['box_office']
    #         movie_year = value['year']
    #         movie_actors = value['actors']
    #
    #         # construct new movie class and set info
    #         new_movie = movie.Movie(movie_url)
    #         new_movie.set_name(movie_name)
    #         new_movie.set_year(movie_year)
    #         new_movie.set_gross(movie_gross)
    #         g.add_movie(new_movie)  # add a new movie vertex to the graph
    #         # add new edges to the graph
    #         for a in movie_actors:
    #             new_movie.actorList.append(a)

    return g, actor_data, movie_data