def test_load_modern_graph(modern_graph_data_dir): image, ci = get_gs_image_on_ci_env() if ci: sess = graphscope.session( show_log=True, num_workers=1, k8s_gs_image=image, ) else: sess = graphscope.session( show_log=True, num_workers=1, ) graph = load_modern_graph(sess, modern_graph_data_dir) interactive = sess.gremlin(graph) queries = [ "g.V().has('name','marko').count()", "g.V().has('person','name','marko').count()", "g.V().has('person','name','marko').outE('created').count()", "g.V().has('person','name','marko').outE('created').inV().count()", "g.V().has('person','name','marko').out('created').count()", "g.V().has('person','name','marko').out('created').values('name').count()", ] for q in queries: result = interactive.execute(q).all().result()[0] assert result == 1
def test_border_cases(): s1 = graphscope.session(cluster_type="hosts") s2 = graphscope.session(cluster_type="hosts") s3 = graphscope.session(cluster_type="hosts") s1.as_default() assert graphscope.get_default_session() == s1 g3 = load_p2p_network(s3) pg3 = g3.project(vertices={"host": ["id"]}, edges={"connect": ["dist"]}) with pytest.raises( ValueError, match= "A default session is already active. You must explicitly call Session.close().", ): s2.as_default() s1.close() s2.as_default() assert graphscope.get_default_session() == s2 s2.close() s3.as_default() assert graphscope.get_default_session() == s3 sssp = graphscope.sssp(pg3, src=4) # ok, g3 belong to s3 s3.close()
def test_demo(data_dir): image, ci = get_gs_image_on_ci_env() if ci: sess = graphscope.session( show_log=True, num_workers=1, k8s_gs_image=image, ) else: sess = graphscope.session( show_log=True, num_workers=1, ) graph = load_ldbc(sess, data_dir) # Interactive engine interactive = sess.gremlin(graph) sub_graph = interactive.subgraph( # noqa: F841 'g.V().hasLabel("person").outE("knows")') # Analytical engine # project the projected graph to simple graph. simple_g = sub_graph.project_to_simple(v_label="person", e_label="knows") pr_result = graphscope.pagerank(simple_g, delta=0.8) tc_result = graphscope.triangles(simple_g) # add the PageRank and triangle-counting results as new columns to the property graph # FIXME: Add column to sub_graph sub_graph.add_column(pr_result, {"Ranking": "r"}) sub_graph.add_column(tc_result, {"TC": "r"}) # GNN engine sess.close()
def test_demo_distribute(data_dir, modern_graph_data_dir): image, ci = get_gs_image_on_ci_env() if ci: sess = graphscope.session( show_log=True, num_workers=2, k8s_gs_image=image, ) else: sess = graphscope.session( show_log=True, num_workers=2, ) graph = load_ldbc(sess, data_dir) # Interactive engine interactive = sess.gremlin(graph) sub_graph = interactive.subgraph( # noqa: F841 'g.V().hasLabel("person").outE("knows")') person_count = (interactive.execute( 'g.V().hasLabel("person").outE("knows").bothV().dedup().count()').all( ).result()[0]) knows_count = (interactive.execute( 'g.V().hasLabel("person").outE("knows").count()').all().result()[0]) interactive2 = sess.gremlin(sub_graph) sub_person_count = interactive2.execute("g.V().count()").all().result()[0] sub_knows_count = interactive2.execute("g.E().count()").all().result()[0] assert person_count == sub_person_count assert knows_count == sub_knows_count # Analytical engine # project the projected graph to simple graph. simple_g = sub_graph.project_to_simple(v_label="person", e_label="knows") pr_result = graphscope.pagerank(simple_g, delta=0.8) tc_result = graphscope.triangles(simple_g) # add the PageRank and triangle-counting results as new columns to the property graph # FIXME: Add column to sub_graph sub_graph.add_column(pr_result, {"Ranking": "r"}) sub_graph.add_column(tc_result, {"TC": "r"}) # test subgraph on modern graph mgraph = load_modern_graph(sess, modern_graph_data_dir) # Interactive engine minteractive = sess.gremlin(mgraph) msub_graph = minteractive.subgraph( # noqa: F841 'g.V().hasLabel("person").outE("knows")') person_count = (minteractive.execute( 'g.V().hasLabel("person").outE("knows").bothV().dedup().count()').all( ).result()[0]) msub_interactive = sess.gremlin(msub_graph) sub_person_count = msub_interactive.execute( "g.V().count()").all().result()[0] assert person_count == sub_person_count # GNN engine sess.close()
def test_error_on_used_after_close(): # use after session close s1 = graphscope.session(run_on_local=True) s1.close() with pytest.raises(RuntimeError, match="Attempted to use a closed Session."): load_graph(s1) with pytest.raises(RuntimeError, match="No default session found."): g = graphscope.load_from( edges={ "e0": ( "twitter_property_e_0#header_row=true", ["dist"], ("src_id", "v0"), ("dst_id", "v1"), ), }, vertices={ "v0": "{}/twitter_property_v_0#header_row=true", "v1": "{}/twitter_property_v_1#header_row=true", }, ) # close after close s2 = graphscope.session(run_on_local=True) s2.close() assert s2.info["status"] == "closed" s2.close() assert s2.info["status"] == "closed"
def test_multiple_session(): namespace = "gs-multi-" + "".join( [random.choice(string.ascii_lowercase) for _ in range(6)]) gs_image = get_gs_image_on_ci_env() sess = graphscope.session( num_workers=1, k8s_gs_image=gs_image, k8s_volumes=get_k8s_volumes(), ) info = sess.info assert info["status"] == "active" assert len(info["engine_hosts"].split(",")) == 1 sess2 = graphscope.session( k8s_namespace=namespace, num_workers=2, k8s_gs_image=gs_image, k8s_volumes=get_k8s_volumes(), ) info = sess2.info assert info["status"] == "active" assert len(info["engine_hosts"].split(",")) == 2 sess2.close() sess.close()
def test_with(): with graphscope.session(cluster_type="hosts") as sess: assert graphscope.get_default_session() == sess sess = graphscope.session(cluster_type="hosts") with sess: pass assert sess.info["status"] == "closed"
def test_border_cases(): s1 = graphscope.session(run_on_local=True) s2 = graphscope.session(run_on_local=True) s3 = graphscope.session(run_on_local=True) with pytest.raises(RuntimeError, match="No default session found."): g = graphscope.load_from( edges={ "e0": ( "twitter_property_e_0#header_row=true", ["dist"], ("src_id", "v0"), ("dst_id", "v1"), ), }, vertices={ "v0": "{}/twitter_property_v_0#header_row=true", "v1": "{}/twitter_property_v_1#header_row=true", }, ) s1.as_default() assert graphscope.get_default_session() == s1 g3 = load_graph(s3) # g3 is op of s3 with pytest.raises( ValueError, match= "A default session is already active. You must explicitly call Session.close().", ): s2.as_default() s1.close() s2.as_default() assert graphscope.get_default_session() == s2 s2.close() s3.as_default() assert graphscope.get_default_session() == s3 sssp = graphscope.property_sssp(g3, src=4) # ok, g3 belong to s3 s3.close() with pytest.raises(RuntimeError, match="No default session found."): g = graphscope.load_from( edges={ "e0": ( "twitter_property_e_0#header_row=true", ["dist"], ("src_id", "v0"), ("dst_id", "v1"), ), }, vertices={ "v0": "{}/twitter_property_v_0#header_row=true", "v1": "{}/twitter_property_v_1#header_row=true", }, )
def graphscope_session(): graphscope.set_option(show_log=True) graphscope.set_option(initializing_interactive_engine=False) if os.environ.get("DEPLOYMENT", None) == "standalone": sess = graphscope.session(cluster_type="hosts", num_workers=1) else: sess = graphscope.session(cluster_type="hosts") yield sess sess.close()
def test_multiple_session(ogbn_small_script): s1 = graphscope.session(cluster_type="hosts", num_workers=1) assert s1.info["status"] == "active" s2 = graphscope.session(cluster_type="hosts", num_workers=2) assert s2.info["status"] == "active" simple_flow(s1, ogbn_small_script) simple_flow(s2, ogbn_small_script) s1.close() s2.close()
def test_multiple_session(data_dir): namespace = "gs-multi-" + "".join( [random.choice(string.ascii_lowercase) for _ in range(6)] ) gs_image, gie_manager_image = get_gs_image_on_ci_env() sess = graphscope.session( num_workers=1, k8s_gs_image=gs_image, k8s_gie_graph_manager_image=gie_manager_image, k8s_coordinator_cpu=0.5, k8s_coordinator_mem="2500Mi", k8s_vineyard_cpu=0.1, k8s_vineyard_mem="512Mi", k8s_engine_cpu=0.1, k8s_engine_mem="1500Mi", k8s_vineyard_shared_mem="2Gi", k8s_etcd_cpu=2, k8s_volumes=get_k8s_volumes(), ) info = sess.info assert info["status"] == "active" assert info["type"] == "k8s" assert len(info["engine_hosts"].split(",")) == 1 sess2 = graphscope.session( k8s_namespace=namespace, num_workers=2, k8s_gs_image=gs_image, k8s_gie_graph_manager_image=gie_manager_image, k8s_coordinator_cpu=0.5, k8s_coordinator_mem="2500Mi", k8s_vineyard_cpu=0.1, k8s_vineyard_mem="512Mi", k8s_engine_cpu=0.1, k8s_engine_mem="1500Mi", k8s_vineyard_shared_mem="2Gi", k8s_etcd_cpu=2, k8s_volumes=get_k8s_volumes(), ) info = sess2.info assert info["status"] == "active" assert info["type"] == "k8s" assert len(info["engine_hosts"].split(",")) == 2 sess2.close() sess.close()
def test_demo(data_dir): gs_image, gie_manager_image = get_gs_image_on_ci_env() sess = graphscope.session( num_workers=1, k8s_gs_image=gs_image, k8s_gie_graph_manager_image=gie_manager_image, k8s_coordinator_cpu=0.5, k8s_coordinator_mem="2500Mi", k8s_vineyard_cpu=0.1, k8s_vineyard_mem="512Mi", k8s_engine_cpu=0.1, k8s_engine_mem="1500Mi", k8s_vineyard_shared_mem="2Gi", ) graph = load_ldbc(sess, data_dir) # Interactive engine interactive = sess.gremlin(graph) sub_graph = interactive.subgraph( # noqa: F841 'g.V().hasLabel("person").outE("knows")') # Analytical engine # project the projected graph to simple graph. simple_g = sub_graph.project_to_simple(v_label="person", e_label="knows") pr_result = graphscope.pagerank(simple_g, delta=0.8) tc_result = graphscope.triangles(simple_g) # add the PageRank and triangle-counting results as new columns to the property graph # FIXME: Add column to sub_graph sub_graph.add_column(pr_result, {"Ranking": "r"}) sub_graph.add_column(tc_result, {"TC": "r"}) # GNN engine sess.close()
def test_correct_closing_on_hosts(): s1 = graphscope.session(cluster_type="hosts") s1.close() # check, launched coordinator and graphscope-engines on local are correctly closed. # test close twice s1.close()
def test_launch_session_from_dict(): conf_dict = {"num_workers": 4} s = graphscope.session(cluster_type="hosts", config=conf_dict) info = s.info assert info["status"] == "active" s.close()
def test_launch_session_from_dict(): conf_dict = {"num_workers": 4} s = graphscope.session(run_on_local=True, config=conf_dict) info = s.info assert info["status"] == "active" s.close()
def test_traversal_modern_graph(modern_graph_data_dir): from gremlin_python.process.traversal import Order from gremlin_python.process.traversal import P gs_image, gie_manager_image = get_gs_image_on_ci_env() sess = graphscope.session( show_log=True, num_workers=1, k8s_gs_image=gs_image, k8s_gie_graph_manager_image=gie_manager_image, k8s_coordinator_cpu=0.5, k8s_coordinator_mem="2500Mi", k8s_vineyard_cpu=0.1, k8s_vineyard_mem="512Mi", k8s_engine_cpu=0.1, k8s_engine_mem="1500Mi", k8s_vineyard_shared_mem="2Gi", k8s_volumes=get_k8s_volumes(), ) graph = load_modern_graph(sess, modern_graph_data_dir) interactive = sess.gremlin(graph) g = interactive.traversal_source() assert g.V().has("name", "marko").count().toList()[0] == 1 assert g.V().has("person", "name", "marko").count().toList()[0] == 1 assert g.V().has("person", "name", "marko").outE("created").count().toList()[0] == 1 assert (g.V().has("person", "name", "marko").outE("created").inV().count().toList()[0] == 1) assert g.V().has("person", "name", "marko").out("created").count().toList()[0] == 1 assert (g.V().has( "person", "name", "marko").out("created").values("name").count().toList()[0] == 1) assert (g.V().hasLabel("person").has("age", P.gt(30)).order().by( "age", Order.desc).count().toList()[0] == 2)
def test_query_modern_graph(modern_graph_data_dir): gs_image, gie_manager_image = get_gs_image_on_ci_env() sess = graphscope.session( num_workers=1, k8s_gs_image=gs_image, k8s_gie_graph_manager_image=gie_manager_image, k8s_coordinator_cpu=0.5, k8s_coordinator_mem="2500Mi", k8s_vineyard_cpu=0.1, k8s_vineyard_mem="512Mi", k8s_engine_cpu=0.1, k8s_engine_mem="1500Mi", k8s_vineyard_shared_mem="2Gi", k8s_volumes=get_k8s_volumes(), ) graph = load_modern_graph(sess, modern_graph_data_dir) interactive = sess.gremlin(graph) queries = [ "g.V().has('name','marko').count()", "g.V().has('person','name','marko').count()", "g.V().has('person','name','marko').outE('created').count()", "g.V().has('person','name','marko').outE('created').inV().count()", "g.V().has('person','name','marko').out('created').count()", "g.V().has('person','name','marko').out('created').values('name').count()", ] for q in queries: result = interactive.execute(q).all().result()[0] assert result == 1
def test_correct_closing_on_hosts(): s1 = graphscope.session(run_on_local=True) s1.close() # check, launched coordinator and graphscope-engines on local are correctly closed. # test close twice s1.close()
def sess(): session = graphscope.session(cluster_type="hosts", num_workers=2, mode="lazy") session.as_default() yield session session.close()
def test_config_dict_has_highest_priority(local_config_file): s = graphscope.session(run_on_local=True, config=local_config_file, num_workers=2) info = s.info assert info["status"] == "active" s.close()
def graphscope_session(): graphscope.set_option(show_log=True) graphscope.set_option(initializing_interactive_engine=False) sess = graphscope.session(run_on_local=True, num_workers=1) sess.as_default() yield sess sess.close()
def test_config_dict_has_highest_priority(local_config_file): s = graphscope.session(cluster_type="hosts", config=local_config_file, num_workers=2) info = s.info assert info["status"] == "active" s.close()
def p2p_property_graph(num_workers, directed=True): data_dir = os.path.expandvars("${GS_TEST_DIR}/property") graphscope.set_option(show_log=True) graphscope.set_option(initializing_interactive_engine=False) sess = graphscope.session(num_workers=num_workers, run_on_local=True) graph = graphscope.Graph(sess, directed=directed) graph = graph.add_vertices("{}/p2p-31_property_v_0".format(data_dir), "person") graph = graph.add_edges("{}/p2p-31_property_e_0".format(data_dir), "knows") return sess, graph
def test_multiple_session(data_dir): namespace = "gs-multi-" + "".join( [random.choice(string.ascii_lowercase) for _ in range(6)]) image, ci = get_gs_image_on_ci_env() if ci: sess = graphscope.session( show_log=True, k8s_namespace=namespace, num_workers=2, k8s_gs_image=image, ) else: sess = graphscope.session( show_log=True, k8s_namespace=namespace, num_workers=2, ) info = sess.info assert info["status"] == "active" assert info["type"] == "k8s" assert len(info["engine_hosts"].split(",")) == 2 if ci: sess2 = graphscope.session( show_log=True, k8s_namespace=namespace, num_workers=2, k8s_gs_image=image, ) else: sess2 = graphscope.session( show_log=True, k8s_namespace=namespace, num_workers=2, ) info = sess2.info assert info["status"] == "active" assert info["type"] == "k8s" assert len(info["engine_hosts"].split(",")) == 2 sess2.close() sess.close()
def test_minimum_networkx(): s = graphscope.session(cluster_type="hosts", num_workers=2) s.as_default() # case-1 run app G = nx.path_graph(10) nx.builtin.pagerank(G) # case-2 transfer nx graph to gs graph nx_g = nx.Graph(dist=True) nx_g.add_nodes_from(range(100), type="node") gs_g = s.g(nx_g) s.close()
def test_launch_session_from_config(local_config_file): saved = os.environ.get("GS_CONFIG_PATH", "") try: os.environ["GS_CONFIG_PATH"] = local_config_file s = graphscope.session(run_on_local=True) info = s.info assert info["status"] == "active" s.close() finally: os.environ["GS_CONFIG_PATH"] = saved
def test_multiple_sessions(self): sess2 = graphscope.session(cluster_type="hosts", num_workers=1) nx2 = sess2.nx() gs_g = self.single_label_g if self.NXGraph is nx.Graph: gs_g2 = ldbc_sample_single_label_with_sess(sess2, self.data_dir, False) else: gs_g2 = ldbc_sample_single_label_with_sess(sess2, self.data_dir, True) assert gs_g.session_id != gs_g2.session_id nx_g = self.NXGraph(gs_g, dist=True) if nx_g.is_directed(): nx_g2 = nx2.DiGraph(gs_g2, dist=True) else: nx_g2 = nx2.Graph(gs_g2, dist=True) self.assert_convert_success(gs_g2, nx_g2) assert nx_g.session_id == gs_g.session_id assert nx_g2.session_id == gs_g2.session_id # copies cg1 = nx_g2.copy() assert cg1.session_id == nx_g2.session_id dg1 = nx_g2.to_directed() assert dg1.session_id == nx_g2.session_id dg2 = nx_g2.to_directed(as_view=True) assert dg2.session_id == nx_g2.session_id # subgraph sg1 = nx_g2.subgraph([274877907301, 274877907299]) assert sg1.session_id == nx_g2.session_id sg2 = nx_g2.edge_subgraph([(274877907301, 274877907299)]) assert sg2.session_id == nx_g2.session_id # error raise if gs graph and nx graph not in the same session. with pytest.raises( RuntimeError, match= "graphscope graph and networkx graph not in the same session.", ): tmp = self.NXGraph(gs_g2) with pytest.raises( RuntimeError, match= "networkx graph and graphscope graph not in the same session.", ): tmp = g(nx_g2) print(tmp.session_id, nx_g2.session_id) sess2.close()
def test_error_on_used_after_close(): # use after session close s1 = graphscope.session(run_on_local=True) s1.close() with pytest.raises(ValueError, match="Session not exists."): g = load_graph(s1) g._ensure_loaded() with pytest.raises(RuntimeError, match="No default session found."): g = graphscope.load_from(edges={ "e0": "twitter_property_e_0#header_row=true", }) g._ensure_loaded() # close after close s2 = graphscope.session(run_on_local=True) s2.close() assert s2.info["status"] == "closed" s2.close() assert s2.info["status"] == "closed"
def test_minimum_udf_app(): @pregel(vd_type="string", md_type="string") class DummyClass(AppAssets): @staticmethod def Init(v, context): pass @staticmethod def Compute(messages, v, context): v.vote_to_halt() s = graphscope.session(cluster_type="hosts", num_workers=1) g = load_ogbn_mag(s) a = DummyClass() r = a(g) s.close()
def test_serialize_roundtrip(p2p_property_dir): gs_image, gie_manager_image = get_gs_image_on_ci_env() sess = graphscope.session( num_workers=2, k8s_gs_image=gs_image, k8s_gie_graph_manager_image=gie_manager_image, k8s_coordinator_cpu=0.5, k8s_coordinator_mem="2500Mi", k8s_vineyard_cpu=0.1, k8s_vineyard_mem="512Mi", k8s_engine_cpu=0.1, k8s_engine_mem="1500Mi", k8s_vineyard_shared_mem="2Gi", k8s_volumes=get_k8s_volumes(), ) graph = sess.load_from( edges={ "knows": ( Loader("{}/p2p-31_property_e_0".format(p2p_property_dir), header_row=True), ["src_label_id", "dst_label_id", "dist"], ("src_id", "person"), ("dst_id", "person"), ), }, vertices={ "person": Loader("{}/p2p-31_property_v_0".format(p2p_property_dir), header_row=True), }, generate_eid=False, ) graph.serialize("/tmp/serialize") graph.unload() new_graph = Graph.deserialize("/tmp/serialize", sess) pg = new_graph.project_to_simple(0, 0, 0, 2) ctx = graphscope.sssp(pg, src=6) ret = (ctx.to_dataframe({ "node": "v.id", "r": "r" }, vertex_range={ "end": 6 }).sort_values(by=["node"]).to_numpy(dtype=float)) expect = np.array([[1.0, 260.0], [2.0, 229.0], [3.0, 310.0], [4.0, 256.0], [5.0, 303.0]]) assert np.all(ret == expect)