Exemple #1
0
def test_multi_src_dst_edge_loader(graphscope_session, student_group_e,
                                   teacher_group_e, student_v, teacher_v):
    graph = Graph(graphscope_session)
    graph = graph.add_vertices(student_v, "student",
                               ["name", "lesson_nums", "avg_score"],
                               "student_id")
    graph = graph.add_vertices(teacher_v, "teacher",
                               ["student_num", "score", "email", "tel"],
                               "teacher_id")
    graph = graph.add_edges(
        student_group_e,
        "group",
        ["group_id", "member_size"],
        src_label="student",
        dst_label="student",
        src_field="leader_student_id",
        dst_field="member_student_id",
    )
    graph = graph.add_edges(
        teacher_group_e,
        "group",
        ["group_id", "member_size"],
        src_label="teacher",
        dst_label="teacher",
        src_field="leader_teacher_id",
        dst_field="member_teacher_id",
    )
    assert graph.loaded()
Exemple #2
0
def test_Load_complex_graph_variants(
    graphscope_session,
    score_e,
    student_group_e_df,
    student_v_array,
    teacher_v_oss,
    lesson_v_mars,
):
    graph = Graph(graphscope_session)
    graph = graph.add_vertices(student_v_array, "student",
                               ["name", "lesson_nums", "avg_score"],
                               "student_id")
    graph = graph.add_vertices(teacher_v_oss, "teacher",
                               ["student_num", "score", "email", "tel"])
    graph = graph.add_vertices(lesson_v_mars, "lesson")
    graph = graph.add_edges(
        score_e,
        "score",
        ["score", "score_id"],
        src_label="student",
        dst_label="lesson",
        src_field="studnet_id",
        dst_field="subject",
    )
    graph = graph.add_edges(
        student_group_e_df,
        "group",
        ["member_size"],
        src_label="student",
        dst_label="student",
        src_field="leader_studnet_id",
        dst_field="member_student_id",
    )
    assert graph.schema is not None
Exemple #3
0
def test_error_on_duplicate_labels(graphscope_session, student_group_e,
                                   student_v):
    graph = Graph(graphscope_session)
    graph = graph.add_vertices(student_v, "student")
    with pytest.raises(ValueError,
                       match="Label student already existed in graph"):
        graph = graph.add_vertices(student_v, "student")
    graph = graph.add_edges(student_group_e, "group")
    with pytest.raises(ValueError, match="already existed in graph"):
        graph = graph.add_edges(student_group_e, "group")
Exemple #4
0
def test_multiple_add_vertices_edges(graphscope_session):
    prefix = os.path.expandvars("${GS_TEST_DIR}/modern_graph")
    graph = Graph(graphscope_session)
    graph = graph.add_vertices(Loader(f"{prefix}/person.csv", delimiter="|"),
                               "person")
    graph = graph.add_edges(Loader(f"{prefix}/knows.csv", delimiter="|"),
                            "knows")
    graph = graph.add_vertices(Loader(f"{prefix}/software.csv", delimiter="|"),
                               "software")
    graph = graph.add_edges(
        Loader(f"{prefix}/created.csv", delimiter="|"),
        "created",
        src_label="person",
        dst_label="software",
    )

    assert graph.schema.vertex_labels == ["person", "software"]
    assert graph.schema.edge_labels == ["created", "knows"]

    graph = graph.add_vertices(Loader(f"{prefix}/person.csv", delimiter="|"),
                               "person2")
    graph = graph.add_edges(
        Loader(f"{prefix}/knows.csv", delimiter="|"),
        "knows2",
        src_label="person2",
        dst_label="person2",
    )
    graph = graph.add_vertices(Loader(f"{prefix}/software.csv", delimiter="|"),
                               "software2")
    graph = graph.add_edges(
        Loader(f"{prefix}/created.csv", delimiter="|"),
        "created2",
        src_label="person2",
        dst_label="software2",
    )

    assert sorted(graph.schema.vertex_labels) == [
        "person",
        "person2",
        "software",
        "software2",
    ]
    assert sorted(graph.schema.edge_labels) == [
        "created",
        "created2",
        "knows",
        "knows2",
    ]
def test_serialize_roundtrip(gs_session_distributed, p2p_property_dir):
    graph = Graph(gs_session_distributed, generate_eid=False)
    graph = graph.add_vertices(f"{p2p_property_dir}/p2p-31_property_v_0",
                               "person")
    graph = graph.add_edges(
        f"{p2p_property_dir}/p2p-31_property_e_0",
        label="knows",
        src_label="person",
        dst_label="person",
    )

    graph.save_to("/tmp/serialize")
    new_graph = Graph.load_from("/tmp/serialize", gs_session_distributed)
    pg = new_graph.project_to_simple(0, 0, 0, 2)
    ctx = graphscope.sssp(pg, src=6)
    ret = (ctx.to_dataframe({
        "node": "v.id",
        "r": "r"
    },
                            vertex_range={
                                "end": 6
                            }).sort_values(by=["node"]).to_numpy(dtype=float))
    expect = np.array([[1.0, 260.0], [2.0, 229.0], [3.0, 310.0], [4.0, 256.0],
                       [5.0, 303.0]])
    assert np.all(ret == expect)
Exemple #6
0
def test_properties_omitted_loader(graphscope_session, student_group_e,
                                   student_v):
    graph = Graph(graphscope_session, generate_eid=False)
    graph = graph.add_vertices(student_v, "student", [], "student_id")
    graph = graph.add_edges(student_group_e, "group", [])
    assert len(graph.schema.vertex_properties[0]) == 4
    assert len(graph.schema.edge_properties[0]) == 2
Exemple #7
0
def test_error_on_ambigious_default_label(graphscope_session, student_group_e,
                                          student_v, teacher_v):
    graph = Graph(graphscope_session)
    graph = graph.add_vertices(student_v, "student")
    graph = graph.add_vertices(teacher_v, "teacher")

    with pytest.raises(AssertionError, match="ambiguous vertex label"):
        graph = graph.add_edges(student_group_e, "group")
Exemple #8
0
def arrow_property_graph_lpa(graphscope_session):
    g = Graph(graphscope_session, generate_eid=False)
    g = g.add_vertices(f"{property_dir}/lpa_dataset/lpa_3000_v_0", "v0")
    g = g.add_vertices(f"{property_dir}/lpa_dataset/lpa_3000_v_1", "v1")
    g = g.add_edges(f"{property_dir}/lpa_dataset/lpa_3000_e_0", "e0",
                    ["weight"], "v0", "v1")
    yield g
    g.unload()
Exemple #9
0
def test_add_vertices_edges(graphscope_session):
    prefix = os.path.expandvars("${GS_TEST_DIR}/modern_graph")
    graph = Graph(graphscope_session)
    graph = graph.add_vertices(Loader(f"{prefix}/person.csv", delimiter="|"),
                               "person")
    graph = graph.add_edges(Loader(f"{prefix}/knows.csv", delimiter="|"),
                            "knows")

    assert graph.schema.vertex_labels == ["person"]
    assert graph.schema.edge_labels == ["knows"]

    with pytest.raises(ValueError,
                       match="src label and dst label cannot be None"):
        graph = graph.add_edges(Loader(f"{prefix}/knows.csv", delimiter="|"),
                                "created")
    with pytest.raises(ValueError,
                       match="src label or dst_label not existed in graph"):
        graph = graph.add_edges(
            Loader(f"{prefix}/created.csv", delimiter="|"),
            "created",
            src_label="person",
            dst_label="software",
        )

    graph = graph.add_vertices(Loader(f"{prefix}/software.csv", delimiter="|"),
                               "software")

    with pytest.raises(ValueError,
                       match="Cannot add new relation to existed graph"):
        graph = graph.add_edges(
            Loader(f"{prefix}/knows.csv", delimiter="|"),
            "knows",
            src_label="software",
            dst_label="software",
        )

    graph = graph.add_edges(
        Loader(f"{prefix}/created.csv", delimiter="|"),
        "created",
        src_label="person",
        dst_label="software",
    )

    assert graph.schema.vertex_labels == ["person", "software"]
    assert graph.schema.edge_labels == ["knows", "created"]
Exemple #10
0
def test_load_complex_graph(
    graphscope_session,
    score_e,
    student_teacher_e,
    teacher_lesson_e,
    student_v,
    teacher_v,
    lesson_v,
):
    graph = Graph(graphscope_session, oid_type="string")
    graph = graph.add_vertices(student_v, "student",
                               ["name", "lesson_nums", "avg_score"],
                               "student_id")
    graph = graph.add_vertices(teacher_v, "teacher",
                               ["student_num", "score", "email", "tel"])
    graph = graph.add_vertices(lesson_v, "lesson")
    graph = graph.add_edges(
        score_e,
        "score",
        ["score", "score_id"],
        src_label="student",
        dst_label="lesson",
        src_field="student_id",
        dst_field="subject",
    )
    graph = graph.add_edges(
        student_teacher_e,
        "student_teacher",
        ["teaching_score"],
        src_label="student",
        dst_label="teacher",
        src_field="student_id",
        dst_field="teacher_id",
    )
    graph = graph.add_edges(
        teacher_lesson_e,
        "teacher_lesson",
        ["times"],
        src_label="teacher",
        dst_label="lesson",
        src_field="teacher_id",
        dst_field="lesson",
    )
    assert graph.schema is not None
Exemple #11
0
def p2p_property_graph_undirected(graphscope_session):
    g = Graph(graphscope_session, directed=False, generate_eid=False)
    g = g.add_vertices(f"{property_dir}/p2p-31_property_v_0", "person")
    g = g.add_edges(
        f"{property_dir}/p2p-31_property_e_0",
        label="knows",
        src_label="person",
        dst_label="person",
    )
    yield g
    g.unload()
Exemple #12
0
def test_error_on_non_default_and_non_existing_v_label(graphscope_session,
                                                       student_group_e,
                                                       student_v):
    graph = Graph(graphscope_session)
    graph = graph.add_vertices(student_v, "student")
    with pytest.raises(ValueError,
                       match="src label or dst_label not existed in graph"):
        graph = graph.add_edges(student_group_e,
                                "group",
                                src_label="v",
                                dst_label="v")
    with pytest.raises(ValueError,
                       match="must be both specified or either unspecified"):
        graph = graph.add_edges(student_group_e, src_label="v")
Exemple #13
0
def test_v_property_omitted_form_loader(graphscope_session, student_group_e,
                                        student_v):
    # properties for v can be omit, all columns will be load,
    # the first one used as vid by # default. default vlabel would be '_';
    graph = Graph(graphscope_session)
    graph = graph.add_vertices(student_v, "student")
    graph = graph.add_edges(
        student_group_e,
        "group",
        ["group_id", "member_size"],
        src_field="leader_student_id",
        dst_field="member_student_id",
    )
    assert graph.loaded()
Exemple #14
0
def test_error_on_remove_vertices_edges(graphscope_session):
    prefix = os.path.expandvars("${GS_TEST_DIR}/modern_graph")
    graph = Graph(graphscope_session)
    graph = graph.add_vertices(Loader(f"{prefix}/person.csv", delimiter="|"),
                               "person")
    graph = graph.add_edges(Loader(f"{prefix}/knows.csv", delimiter="|"),
                            "knows")

    graph = graph.add_vertices(Loader(f"{prefix}/software.csv", delimiter="|"),
                               "software")
    graph = graph.add_edges(
        Loader(f"{prefix}/created.csv", delimiter="|"),
        "created",
        src_label="person",
        dst_label="software",
    )

    with pytest.raises(ValueError,
                       match="Vertex software has usage in relation"):
        graph = graph.remove_vertices("software")

    with pytest.raises(ValueError, match="label xxx not in vertices"):
        graph = graph.remove_vertices("xxx")
    with pytest.raises(ValueError, match="label xxx not in edges"):
        graph = graph.remove_edges("xxx")
    with pytest.raises(ValueError, match="Cannot find edges to remove"):
        graph = graph.remove_edges("knows", src_label="xxx", dst_label="xxx")

    assert graph.loaded()
    with pytest.raises(
            ValueError,
            match="Remove vertices from a loaded graph doesn't supported yet"):
        graph = graph.remove_vertices("person")
    with pytest.raises(
            ValueError,
            match="Remove edges from a loaded graph doesn't supported yet"):
        graph = graph.remove_edges("knows")
Exemple #15
0
def test_vid_omitted_form_loader(graphscope_session, student_group_e,
                                 student_v):
    # vid can be omit, the first column will be used as vid;
    graph = Graph(graphscope_session)
    graph = graph.add_vertices(student_v, "student",
                               ["name", "lesson_nums", "avg_score"])
    graph = graph.add_edges(
        student_group_e,
        "group",
        ["group_id", "member_size"],
        src_label="student",
        dst_label="student",
        src_field="leader_student_id",
        dst_field="member_student_id",
    )
    assert graph.loaded()
Exemple #16
0
def load_ogbn_mag(sess, prefix):
    """Load ogbn_mag graph.
    The ogbn-mag dataset is a heterogeneous network composed of a subset of the Microsoft Academic Graph (MAG).
    See more details here:
    https://ogb.stanford.edu/docs/nodeprop/#ogbn-mag

    Args:
        sess (:class:`graphscope.Session`): Load graph within the session.
        prefix (str): Data directory.
        directed (bool, optional): Determine to load a directed or undirected graph.
            Defaults to True.

    Returns:
        :class:`graphscope.Graph`: A Graph object which graph type is ArrowProperty
    """
    graph = Graph(sess)
    graph = (graph.add_vertices(os.path.join(
        prefix, "paper.csv"), "paper").add_vertices(
            os.path.join(prefix, "author.csv"), "author").add_vertices(
                os.path.join(prefix, "institution.csv"),
                "institution").add_vertices(
                    os.path.join(prefix, "field_of_study.csv"),
                    "field_of_study").add_edges(
                        os.path.join(prefix,
                                     "author_affiliated_with_institution.csv"),
                        "affiliated",
                        src_label="author",
                        dst_label="institution",
                    ).add_edges(
                        os.path.join(prefix,
                                     "paper_has_topic_field_of_study.csv"),
                        "hasTopic",
                        src_label="paper",
                        dst_label="field_of_study",
                    ).add_edges(
                        os.path.join(prefix, "paper_cites_paper.csv"),
                        "cites",
                        src_label="paper",
                        dst_label="paper",
                    ).add_edges(
                        os.path.join(prefix, "author_writes_paper.csv"),
                        "writes",
                        src_label="author",
                        dst_label="paper",
                    ))

    return graph
Exemple #17
0
def test_complete_form_loader(graphscope_session, student_group_e, student_v):
    # a complete form for loading from ev files.
    # types are inferred from Loader.
    graph = Graph(graphscope_session)
    graph = graph.add_vertices(student_v, "student",
                               ["name", "lesson_nums", "avg_score"],
                               "student_id")
    graph = graph.add_edges(
        student_group_e,
        "group",
        ["group_id", "member_size"],
        src_label="student",
        dst_label="student",
        src_field="leader_student_id",
        dst_field="member_student_id",
    )
    assert graph.loaded()
Exemple #18
0
def test_loader_with_specified_data_type(graphscope_session, student_group_e,
                                         student_v):
    graph = Graph(graphscope_session, oid_type="string", generate_eid=False)
    graph = graph.add_vertices(
        student_v,
        "student",
        ["name", ("lesson_nums", "int"), ("avg_score", "float")],
        "student_id",
    )
    graph = graph.add_edges(student_group_e, "group",
                            ["group_id", ("member_size", "int")])
    assert graph.schema.vertex_properties == [{
        "name": 21,
        "lesson_nums": 11,
        "avg_score": 18,
        "student_id": 21
    }]
    assert graph.schema.edge_properties == [{
        "group_id": 21,
        "member_size": 11
    }]
Exemple #19
0
def arrow_property_graph_undirected(graphscope_session):
    g = Graph(graphscope_session, directed=False, generate_eid=False)
    g = g.add_vertices(f"{new_property_dir}/twitter_v_0", "v0")
    g = g.add_vertices(f"{new_property_dir}/twitter_v_1", "v1")
    g = g.add_edges(f"{new_property_dir}/twitter_e_0_0_0", "e0", ["weight"],
                    "v0", "v0")
    g = g.add_edges(f"{new_property_dir}/twitter_e_0_1_0", "e0", ["weight"],
                    "v0", "v1")
    g = g.add_edges(f"{new_property_dir}/twitter_e_1_0_0", "e0", ["weight"],
                    "v1", "v0")
    g = g.add_edges(f"{new_property_dir}/twitter_e_1_1_0", "e0", ["weight"],
                    "v1", "v1")
    g = g.add_edges(f"{new_property_dir}/twitter_e_0_0_1", "e1", ["weight"],
                    "v0", "v0")
    g = g.add_edges(f"{new_property_dir}/twitter_e_0_1_1", "e1", ["weight"],
                    "v0", "v1")
    g = g.add_edges(f"{new_property_dir}/twitter_e_1_0_1", "e1", ["weight"],
                    "v1", "v0")
    g = g.add_edges(f"{new_property_dir}/twitter_e_1_1_1", "e1", ["weight"],
                    "v1", "v1")

    yield g
    g.unload()
Exemple #20
0
def load_ldbc(sess, prefix, directed=True):
    """Load ldbc dataset as a ArrowProperty Graph.

    Args:
        sess (:class:`graphscope.Session`): Load graph within the session.
        prefix (str): Data directory.
        directed (bool, optional): Determine to load a directed or undirected graph.
            Defaults to True.

    Returns:
        :class:`graphscope.Graph`: A Graph object which graph type is ArrowProperty
    """
    graph = Graph(sess, directed=directed)
    graph = (graph.add_vertices(
        Loader(os.path.join(prefix, "comment_0_0.csv"), delimiter="|"),
        "comment",
        ["creationDate", "locationIP", "browserUsed", "content", "length"],
        "id",
    ).add_vertices(
        Loader(os.path.join(prefix, "organisation_0_0.csv"), delimiter="|"),
        "organisation",
        ["type", "name", "url"],
        "id",
    ).add_vertices(
        Loader(os.path.join(prefix, "tagclass_0_0.csv"), delimiter="|"),
        "tagclass",
        ["name", "url"],
        "id",
    ).add_vertices(
        Loader(os.path.join(prefix, "person_0_0.csv"), delimiter="|"),
        "person",
        [
            "firstName",
            "lastName",
            "gender",
            "birthday",
            "creationDate",
            "locationIP",
            "browserUsed",
        ],
        "id",
    ).add_vertices(
        Loader(os.path.join(prefix, "forum_0_0.csv"), delimiter="|"),
        "forum",
        ["title", "creationDate"],
        "id",
    ).add_vertices(
        Loader(os.path.join(prefix, "place_0_0.csv"), delimiter="|"),
        "place",
        ["name", "url", "type"],
        "id",
    ).add_vertices(
        Loader(os.path.join(prefix, "post_0_0.csv"), delimiter="|"),
        "post",
        [
            "imageFile",
            "creationDate",
            "locationIP",
            "browserUsed",
            "language",
            "content",
            "length",
        ],
        "id",
    ).add_vertices(
        Loader(os.path.join(prefix, "tag_0_0.csv"), delimiter="|"),
        "tag",
        ["name", "url"],
        "id",
    ).add_edges(
        Loader(os.path.join(prefix, "comment_replyOf_comment_0_0.csv"),
               delimiter="|"),
        "replyOf",
        src_label="comment",
        dst_label="comment",
    ).add_edges(
        Loader(os.path.join(prefix, "comment_replyOf_post_0_0.csv"),
               delimiter="|"),
        "replyOf",
        src_label="comment",
        dst_label="post",
    ).add_edges(
        Loader(os.path.join(prefix, "place_isPartOf_place_0_0.csv"),
               delimiter="|"),
        "isPartOf",
        src_label="place",
        dst_label="place",
    ).add_edges(
        Loader(
            os.path.join(prefix, "tagclass_isSubclassOf_tagclass_0_0.csv"),
            delimiter="|",
        ),
        "isSubclassOf",
        src_label="tagclass",
        dst_label="tagclass",
    ).add_edges(
        Loader(os.path.join(prefix, "forum_hasTag_tag_0_0.csv"),
               delimiter="|"),
        "hasTag",
        src_label="forum",
        dst_label="tag",
    ).add_edges(
        Loader(os.path.join(prefix, "comment_hasTag_tag_0_0.csv"),
               delimiter="|"),
        "hasTag",
        src_label="comment",
        dst_label="tag",
    ).add_edges(
        Loader(os.path.join(prefix, "post_hasTag_tag_0_0.csv"), delimiter="|"),
        "hasTag",
        src_label="post",
        dst_label="tag",
    ).add_edges(
        Loader(os.path.join(prefix, "person_knows_person_0_0.csv"),
               delimiter="|"),
        "knows",
        ["creationDate"],
        src_label="person",
        dst_label="person",
    ).add_edges(
        Loader(os.path.join(prefix, "forum_hasModerator_person_0_0.csv"),
               delimiter="|"),
        "hasModerator",
        src_label="forum",
        dst_label="person",
    ).add_edges(
        Loader(os.path.join(prefix, "person_hasInterest_tag_0_0.csv"),
               delimiter="|"),
        "hasInterest",
        src_label="person",
        dst_label="tag",
    ).add_edges(
        Loader(os.path.join(prefix, "post_isLocatedIn_place_0_0.csv"),
               delimiter="|"),
        "isLocatedIn",
        src_label="post",
        dst_label="place",
    ).add_edges(
        Loader(os.path.join(prefix, "comment_isLocatedIn_place_0_0.csv"),
               delimiter="|"),
        "isLocatedIn",
        src_label="comment",
        dst_label="place",
    ).add_edges(
        Loader(
            os.path.join(prefix, "organisation_isLocatedIn_place_0_0.csv"),
            delimiter="|",
        ),
        "isLocatedIn",
        src_label="organisation",
        dst_label="place",
    ).add_edges(
        Loader(os.path.join(prefix, "person_isLocatedIn_place_0_0.csv"),
               delimiter="|"),
        "isLocatedIn",
        src_label="person",
        dst_label="place",
    ).add_edges(
        Loader(os.path.join(prefix, "tag_hasType_tagclass_0_0.csv"),
               delimiter="|"),
        "hasType",
        src_label="tag",
        dst_label="tagclass",
    ).add_edges(
        Loader(os.path.join(prefix, "post_hasCreator_person_0_0.csv"),
               delimiter="|"),
        "hasCreator",
        src_label="post",
        dst_label="person",
    ).add_edges(
        Loader(os.path.join(prefix, "comment_hasCreator_person_0_0.csv"),
               delimiter="|"),
        "hasCreator",
        src_label="comment",
        dst_label="person",
    ).add_edges(
        Loader(os.path.join(prefix, "forum_containerOf_post_0_0.csv"),
               delimiter="|"),
        "containerOf",
        src_label="forum",
        dst_label="post",
    ).add_edges(
        Loader(os.path.join(prefix, "forum_hasMember_person_0_0.csv"),
               delimiter="|"),
        "hasMember",
        ["joinDate"],
        src_label="forum",
        dst_label="person",
    ).add_edges(
        Loader(
            os.path.join(prefix, "person_workAt_organisation_0_0.csv"),
            delimiter="|",
        ),
        "workAt",
        ["workFrom"],
        src_label="person",
        dst_label="organisation",
    ).add_edges(
        Loader(os.path.join(prefix, "person_likes_comment_0_0.csv"),
               delimiter="|"),
        "likes",
        ["creationDate"],
        src_label="person",
        dst_label="comment",
    ).add_edges(
        Loader(os.path.join(prefix, "person_likes_post_0_0.csv"),
               delimiter="|"),
        "likes",
        ["creationDate"],
        src_label="person",
        dst_label="post",
    ).add_edges(
        Loader(
            os.path.join(prefix, "person_studyAt_organisation_0_0.csv"),
            delimiter="|",
        ),
        "studyAt",
        ["classYear"],
        src_label="person",
        dst_label="organisation",
    ))
    return graph
Exemple #21
0
def test_load_from_pandas(graphscope_session, student_group_e_df,
                          student_v_df):
    graph = Graph(graphscope_session)
    graph = graph.add_vertices(student_v_df)
    graph = graph.add_edges(student_group_e_df)
    assert graph.loaded()
Exemple #22
0
def test_load_from_numpy(graphscope_session, student_group_e_array,
                         student_v_array):
    graph = Graph(graphscope_session)
    graph = graph.add_vertices(student_v_array)
    graph = graph.add_edges(student_group_e_array)
    assert graph.loaded()