def test_multi_src_dst_edge_loader(graphscope_session, student_group_e, teacher_group_e, student_v, teacher_v): graph = Graph(graphscope_session) graph = graph.add_vertices(student_v, "student", ["name", "lesson_nums", "avg_score"], "student_id") graph = graph.add_vertices(teacher_v, "teacher", ["student_num", "score", "email", "tel"], "teacher_id") graph = graph.add_edges( student_group_e, "group", ["group_id", "member_size"], src_label="student", dst_label="student", src_field="leader_student_id", dst_field="member_student_id", ) graph = graph.add_edges( teacher_group_e, "group", ["group_id", "member_size"], src_label="teacher", dst_label="teacher", src_field="leader_teacher_id", dst_field="member_teacher_id", ) assert graph.loaded()
def test_Load_complex_graph_variants( graphscope_session, score_e, student_group_e_df, student_v_array, teacher_v_oss, lesson_v_mars, ): graph = Graph(graphscope_session) graph = graph.add_vertices(student_v_array, "student", ["name", "lesson_nums", "avg_score"], "student_id") graph = graph.add_vertices(teacher_v_oss, "teacher", ["student_num", "score", "email", "tel"]) graph = graph.add_vertices(lesson_v_mars, "lesson") graph = graph.add_edges( score_e, "score", ["score", "score_id"], src_label="student", dst_label="lesson", src_field="studnet_id", dst_field="subject", ) graph = graph.add_edges( student_group_e_df, "group", ["member_size"], src_label="student", dst_label="student", src_field="leader_studnet_id", dst_field="member_student_id", ) assert graph.schema is not None
def test_error_on_duplicate_labels(graphscope_session, student_group_e, student_v): graph = Graph(graphscope_session) graph = graph.add_vertices(student_v, "student") with pytest.raises(ValueError, match="Label student already existed in graph"): graph = graph.add_vertices(student_v, "student") graph = graph.add_edges(student_group_e, "group") with pytest.raises(ValueError, match="already existed in graph"): graph = graph.add_edges(student_group_e, "group")
def test_multiple_add_vertices_edges(graphscope_session): prefix = os.path.expandvars("${GS_TEST_DIR}/modern_graph") graph = Graph(graphscope_session) graph = graph.add_vertices(Loader(f"{prefix}/person.csv", delimiter="|"), "person") graph = graph.add_edges(Loader(f"{prefix}/knows.csv", delimiter="|"), "knows") graph = graph.add_vertices(Loader(f"{prefix}/software.csv", delimiter="|"), "software") graph = graph.add_edges( Loader(f"{prefix}/created.csv", delimiter="|"), "created", src_label="person", dst_label="software", ) assert graph.schema.vertex_labels == ["person", "software"] assert graph.schema.edge_labels == ["created", "knows"] graph = graph.add_vertices(Loader(f"{prefix}/person.csv", delimiter="|"), "person2") graph = graph.add_edges( Loader(f"{prefix}/knows.csv", delimiter="|"), "knows2", src_label="person2", dst_label="person2", ) graph = graph.add_vertices(Loader(f"{prefix}/software.csv", delimiter="|"), "software2") graph = graph.add_edges( Loader(f"{prefix}/created.csv", delimiter="|"), "created2", src_label="person2", dst_label="software2", ) assert sorted(graph.schema.vertex_labels) == [ "person", "person2", "software", "software2", ] assert sorted(graph.schema.edge_labels) == [ "created", "created2", "knows", "knows2", ]
def test_serialize_roundtrip(gs_session_distributed, p2p_property_dir): graph = Graph(gs_session_distributed, generate_eid=False) graph = graph.add_vertices(f"{p2p_property_dir}/p2p-31_property_v_0", "person") graph = graph.add_edges( f"{p2p_property_dir}/p2p-31_property_e_0", label="knows", src_label="person", dst_label="person", ) graph.save_to("/tmp/serialize") new_graph = Graph.load_from("/tmp/serialize", gs_session_distributed) pg = new_graph.project_to_simple(0, 0, 0, 2) ctx = graphscope.sssp(pg, src=6) ret = (ctx.to_dataframe({ "node": "v.id", "r": "r" }, vertex_range={ "end": 6 }).sort_values(by=["node"]).to_numpy(dtype=float)) expect = np.array([[1.0, 260.0], [2.0, 229.0], [3.0, 310.0], [4.0, 256.0], [5.0, 303.0]]) assert np.all(ret == expect)
def test_properties_omitted_loader(graphscope_session, student_group_e, student_v): graph = Graph(graphscope_session, generate_eid=False) graph = graph.add_vertices(student_v, "student", [], "student_id") graph = graph.add_edges(student_group_e, "group", []) assert len(graph.schema.vertex_properties[0]) == 4 assert len(graph.schema.edge_properties[0]) == 2
def test_error_on_ambigious_default_label(graphscope_session, student_group_e, student_v, teacher_v): graph = Graph(graphscope_session) graph = graph.add_vertices(student_v, "student") graph = graph.add_vertices(teacher_v, "teacher") with pytest.raises(AssertionError, match="ambiguous vertex label"): graph = graph.add_edges(student_group_e, "group")
def arrow_property_graph_lpa(graphscope_session): g = Graph(graphscope_session, generate_eid=False) g = g.add_vertices(f"{property_dir}/lpa_dataset/lpa_3000_v_0", "v0") g = g.add_vertices(f"{property_dir}/lpa_dataset/lpa_3000_v_1", "v1") g = g.add_edges(f"{property_dir}/lpa_dataset/lpa_3000_e_0", "e0", ["weight"], "v0", "v1") yield g g.unload()
def test_add_vertices_edges(graphscope_session): prefix = os.path.expandvars("${GS_TEST_DIR}/modern_graph") graph = Graph(graphscope_session) graph = graph.add_vertices(Loader(f"{prefix}/person.csv", delimiter="|"), "person") graph = graph.add_edges(Loader(f"{prefix}/knows.csv", delimiter="|"), "knows") assert graph.schema.vertex_labels == ["person"] assert graph.schema.edge_labels == ["knows"] with pytest.raises(ValueError, match="src label and dst label cannot be None"): graph = graph.add_edges(Loader(f"{prefix}/knows.csv", delimiter="|"), "created") with pytest.raises(ValueError, match="src label or dst_label not existed in graph"): graph = graph.add_edges( Loader(f"{prefix}/created.csv", delimiter="|"), "created", src_label="person", dst_label="software", ) graph = graph.add_vertices(Loader(f"{prefix}/software.csv", delimiter="|"), "software") with pytest.raises(ValueError, match="Cannot add new relation to existed graph"): graph = graph.add_edges( Loader(f"{prefix}/knows.csv", delimiter="|"), "knows", src_label="software", dst_label="software", ) graph = graph.add_edges( Loader(f"{prefix}/created.csv", delimiter="|"), "created", src_label="person", dst_label="software", ) assert graph.schema.vertex_labels == ["person", "software"] assert graph.schema.edge_labels == ["knows", "created"]
def test_load_complex_graph( graphscope_session, score_e, student_teacher_e, teacher_lesson_e, student_v, teacher_v, lesson_v, ): graph = Graph(graphscope_session, oid_type="string") graph = graph.add_vertices(student_v, "student", ["name", "lesson_nums", "avg_score"], "student_id") graph = graph.add_vertices(teacher_v, "teacher", ["student_num", "score", "email", "tel"]) graph = graph.add_vertices(lesson_v, "lesson") graph = graph.add_edges( score_e, "score", ["score", "score_id"], src_label="student", dst_label="lesson", src_field="student_id", dst_field="subject", ) graph = graph.add_edges( student_teacher_e, "student_teacher", ["teaching_score"], src_label="student", dst_label="teacher", src_field="student_id", dst_field="teacher_id", ) graph = graph.add_edges( teacher_lesson_e, "teacher_lesson", ["times"], src_label="teacher", dst_label="lesson", src_field="teacher_id", dst_field="lesson", ) assert graph.schema is not None
def p2p_property_graph_undirected(graphscope_session): g = Graph(graphscope_session, directed=False, generate_eid=False) g = g.add_vertices(f"{property_dir}/p2p-31_property_v_0", "person") g = g.add_edges( f"{property_dir}/p2p-31_property_e_0", label="knows", src_label="person", dst_label="person", ) yield g g.unload()
def test_error_on_non_default_and_non_existing_v_label(graphscope_session, student_group_e, student_v): graph = Graph(graphscope_session) graph = graph.add_vertices(student_v, "student") with pytest.raises(ValueError, match="src label or dst_label not existed in graph"): graph = graph.add_edges(student_group_e, "group", src_label="v", dst_label="v") with pytest.raises(ValueError, match="must be both specified or either unspecified"): graph = graph.add_edges(student_group_e, src_label="v")
def test_v_property_omitted_form_loader(graphscope_session, student_group_e, student_v): # properties for v can be omit, all columns will be load, # the first one used as vid by # default. default vlabel would be '_'; graph = Graph(graphscope_session) graph = graph.add_vertices(student_v, "student") graph = graph.add_edges( student_group_e, "group", ["group_id", "member_size"], src_field="leader_student_id", dst_field="member_student_id", ) assert graph.loaded()
def test_error_on_remove_vertices_edges(graphscope_session): prefix = os.path.expandvars("${GS_TEST_DIR}/modern_graph") graph = Graph(graphscope_session) graph = graph.add_vertices(Loader(f"{prefix}/person.csv", delimiter="|"), "person") graph = graph.add_edges(Loader(f"{prefix}/knows.csv", delimiter="|"), "knows") graph = graph.add_vertices(Loader(f"{prefix}/software.csv", delimiter="|"), "software") graph = graph.add_edges( Loader(f"{prefix}/created.csv", delimiter="|"), "created", src_label="person", dst_label="software", ) with pytest.raises(ValueError, match="Vertex software has usage in relation"): graph = graph.remove_vertices("software") with pytest.raises(ValueError, match="label xxx not in vertices"): graph = graph.remove_vertices("xxx") with pytest.raises(ValueError, match="label xxx not in edges"): graph = graph.remove_edges("xxx") with pytest.raises(ValueError, match="Cannot find edges to remove"): graph = graph.remove_edges("knows", src_label="xxx", dst_label="xxx") assert graph.loaded() with pytest.raises( ValueError, match="Remove vertices from a loaded graph doesn't supported yet"): graph = graph.remove_vertices("person") with pytest.raises( ValueError, match="Remove edges from a loaded graph doesn't supported yet"): graph = graph.remove_edges("knows")
def test_vid_omitted_form_loader(graphscope_session, student_group_e, student_v): # vid can be omit, the first column will be used as vid; graph = Graph(graphscope_session) graph = graph.add_vertices(student_v, "student", ["name", "lesson_nums", "avg_score"]) graph = graph.add_edges( student_group_e, "group", ["group_id", "member_size"], src_label="student", dst_label="student", src_field="leader_student_id", dst_field="member_student_id", ) assert graph.loaded()
def load_ogbn_mag(sess, prefix): """Load ogbn_mag graph. The ogbn-mag dataset is a heterogeneous network composed of a subset of the Microsoft Academic Graph (MAG). See more details here: https://ogb.stanford.edu/docs/nodeprop/#ogbn-mag Args: sess (:class:`graphscope.Session`): Load graph within the session. prefix (str): Data directory. directed (bool, optional): Determine to load a directed or undirected graph. Defaults to True. Returns: :class:`graphscope.Graph`: A Graph object which graph type is ArrowProperty """ graph = Graph(sess) graph = (graph.add_vertices(os.path.join( prefix, "paper.csv"), "paper").add_vertices( os.path.join(prefix, "author.csv"), "author").add_vertices( os.path.join(prefix, "institution.csv"), "institution").add_vertices( os.path.join(prefix, "field_of_study.csv"), "field_of_study").add_edges( os.path.join(prefix, "author_affiliated_with_institution.csv"), "affiliated", src_label="author", dst_label="institution", ).add_edges( os.path.join(prefix, "paper_has_topic_field_of_study.csv"), "hasTopic", src_label="paper", dst_label="field_of_study", ).add_edges( os.path.join(prefix, "paper_cites_paper.csv"), "cites", src_label="paper", dst_label="paper", ).add_edges( os.path.join(prefix, "author_writes_paper.csv"), "writes", src_label="author", dst_label="paper", )) return graph
def test_complete_form_loader(graphscope_session, student_group_e, student_v): # a complete form for loading from ev files. # types are inferred from Loader. graph = Graph(graphscope_session) graph = graph.add_vertices(student_v, "student", ["name", "lesson_nums", "avg_score"], "student_id") graph = graph.add_edges( student_group_e, "group", ["group_id", "member_size"], src_label="student", dst_label="student", src_field="leader_student_id", dst_field="member_student_id", ) assert graph.loaded()
def test_loader_with_specified_data_type(graphscope_session, student_group_e, student_v): graph = Graph(graphscope_session, oid_type="string", generate_eid=False) graph = graph.add_vertices( student_v, "student", ["name", ("lesson_nums", "int"), ("avg_score", "float")], "student_id", ) graph = graph.add_edges(student_group_e, "group", ["group_id", ("member_size", "int")]) assert graph.schema.vertex_properties == [{ "name": 21, "lesson_nums": 11, "avg_score": 18, "student_id": 21 }] assert graph.schema.edge_properties == [{ "group_id": 21, "member_size": 11 }]
def arrow_property_graph_undirected(graphscope_session): g = Graph(graphscope_session, directed=False, generate_eid=False) g = g.add_vertices(f"{new_property_dir}/twitter_v_0", "v0") g = g.add_vertices(f"{new_property_dir}/twitter_v_1", "v1") g = g.add_edges(f"{new_property_dir}/twitter_e_0_0_0", "e0", ["weight"], "v0", "v0") g = g.add_edges(f"{new_property_dir}/twitter_e_0_1_0", "e0", ["weight"], "v0", "v1") g = g.add_edges(f"{new_property_dir}/twitter_e_1_0_0", "e0", ["weight"], "v1", "v0") g = g.add_edges(f"{new_property_dir}/twitter_e_1_1_0", "e0", ["weight"], "v1", "v1") g = g.add_edges(f"{new_property_dir}/twitter_e_0_0_1", "e1", ["weight"], "v0", "v0") g = g.add_edges(f"{new_property_dir}/twitter_e_0_1_1", "e1", ["weight"], "v0", "v1") g = g.add_edges(f"{new_property_dir}/twitter_e_1_0_1", "e1", ["weight"], "v1", "v0") g = g.add_edges(f"{new_property_dir}/twitter_e_1_1_1", "e1", ["weight"], "v1", "v1") yield g g.unload()
def load_ldbc(sess, prefix, directed=True): """Load ldbc dataset as a ArrowProperty Graph. Args: sess (:class:`graphscope.Session`): Load graph within the session. prefix (str): Data directory. directed (bool, optional): Determine to load a directed or undirected graph. Defaults to True. Returns: :class:`graphscope.Graph`: A Graph object which graph type is ArrowProperty """ graph = Graph(sess, directed=directed) graph = (graph.add_vertices( Loader(os.path.join(prefix, "comment_0_0.csv"), delimiter="|"), "comment", ["creationDate", "locationIP", "browserUsed", "content", "length"], "id", ).add_vertices( Loader(os.path.join(prefix, "organisation_0_0.csv"), delimiter="|"), "organisation", ["type", "name", "url"], "id", ).add_vertices( Loader(os.path.join(prefix, "tagclass_0_0.csv"), delimiter="|"), "tagclass", ["name", "url"], "id", ).add_vertices( Loader(os.path.join(prefix, "person_0_0.csv"), delimiter="|"), "person", [ "firstName", "lastName", "gender", "birthday", "creationDate", "locationIP", "browserUsed", ], "id", ).add_vertices( Loader(os.path.join(prefix, "forum_0_0.csv"), delimiter="|"), "forum", ["title", "creationDate"], "id", ).add_vertices( Loader(os.path.join(prefix, "place_0_0.csv"), delimiter="|"), "place", ["name", "url", "type"], "id", ).add_vertices( Loader(os.path.join(prefix, "post_0_0.csv"), delimiter="|"), "post", [ "imageFile", "creationDate", "locationIP", "browserUsed", "language", "content", "length", ], "id", ).add_vertices( Loader(os.path.join(prefix, "tag_0_0.csv"), delimiter="|"), "tag", ["name", "url"], "id", ).add_edges( Loader(os.path.join(prefix, "comment_replyOf_comment_0_0.csv"), delimiter="|"), "replyOf", src_label="comment", dst_label="comment", ).add_edges( Loader(os.path.join(prefix, "comment_replyOf_post_0_0.csv"), delimiter="|"), "replyOf", src_label="comment", dst_label="post", ).add_edges( Loader(os.path.join(prefix, "place_isPartOf_place_0_0.csv"), delimiter="|"), "isPartOf", src_label="place", dst_label="place", ).add_edges( Loader( os.path.join(prefix, "tagclass_isSubclassOf_tagclass_0_0.csv"), delimiter="|", ), "isSubclassOf", src_label="tagclass", dst_label="tagclass", ).add_edges( Loader(os.path.join(prefix, "forum_hasTag_tag_0_0.csv"), delimiter="|"), "hasTag", src_label="forum", dst_label="tag", ).add_edges( Loader(os.path.join(prefix, "comment_hasTag_tag_0_0.csv"), delimiter="|"), "hasTag", src_label="comment", dst_label="tag", ).add_edges( Loader(os.path.join(prefix, "post_hasTag_tag_0_0.csv"), delimiter="|"), "hasTag", src_label="post", dst_label="tag", ).add_edges( Loader(os.path.join(prefix, "person_knows_person_0_0.csv"), delimiter="|"), "knows", ["creationDate"], src_label="person", dst_label="person", ).add_edges( Loader(os.path.join(prefix, "forum_hasModerator_person_0_0.csv"), delimiter="|"), "hasModerator", src_label="forum", dst_label="person", ).add_edges( Loader(os.path.join(prefix, "person_hasInterest_tag_0_0.csv"), delimiter="|"), "hasInterest", src_label="person", dst_label="tag", ).add_edges( Loader(os.path.join(prefix, "post_isLocatedIn_place_0_0.csv"), delimiter="|"), "isLocatedIn", src_label="post", dst_label="place", ).add_edges( Loader(os.path.join(prefix, "comment_isLocatedIn_place_0_0.csv"), delimiter="|"), "isLocatedIn", src_label="comment", dst_label="place", ).add_edges( Loader( os.path.join(prefix, "organisation_isLocatedIn_place_0_0.csv"), delimiter="|", ), "isLocatedIn", src_label="organisation", dst_label="place", ).add_edges( Loader(os.path.join(prefix, "person_isLocatedIn_place_0_0.csv"), delimiter="|"), "isLocatedIn", src_label="person", dst_label="place", ).add_edges( Loader(os.path.join(prefix, "tag_hasType_tagclass_0_0.csv"), delimiter="|"), "hasType", src_label="tag", dst_label="tagclass", ).add_edges( Loader(os.path.join(prefix, "post_hasCreator_person_0_0.csv"), delimiter="|"), "hasCreator", src_label="post", dst_label="person", ).add_edges( Loader(os.path.join(prefix, "comment_hasCreator_person_0_0.csv"), delimiter="|"), "hasCreator", src_label="comment", dst_label="person", ).add_edges( Loader(os.path.join(prefix, "forum_containerOf_post_0_0.csv"), delimiter="|"), "containerOf", src_label="forum", dst_label="post", ).add_edges( Loader(os.path.join(prefix, "forum_hasMember_person_0_0.csv"), delimiter="|"), "hasMember", ["joinDate"], src_label="forum", dst_label="person", ).add_edges( Loader( os.path.join(prefix, "person_workAt_organisation_0_0.csv"), delimiter="|", ), "workAt", ["workFrom"], src_label="person", dst_label="organisation", ).add_edges( Loader(os.path.join(prefix, "person_likes_comment_0_0.csv"), delimiter="|"), "likes", ["creationDate"], src_label="person", dst_label="comment", ).add_edges( Loader(os.path.join(prefix, "person_likes_post_0_0.csv"), delimiter="|"), "likes", ["creationDate"], src_label="person", dst_label="post", ).add_edges( Loader( os.path.join(prefix, "person_studyAt_organisation_0_0.csv"), delimiter="|", ), "studyAt", ["classYear"], src_label="person", dst_label="organisation", )) return graph
def test_load_from_pandas(graphscope_session, student_group_e_df, student_v_df): graph = Graph(graphscope_session) graph = graph.add_vertices(student_v_df) graph = graph.add_edges(student_group_e_df) assert graph.loaded()
def test_load_from_numpy(graphscope_session, student_group_e_array, student_v_array): graph = Graph(graphscope_session) graph = graph.add_vertices(student_v_array) graph = graph.add_edges(student_group_e_array) assert graph.loaded()