def test_louvain_with_edgevals(graph_file): gc.collect() if is_device_version_less_than((7, 0)): cu_M = utils.read_csv_file(graph_file) with pytest.raises(RuntimeError): cu_parts, cu_mod = cugraph_call(cu_M) else: M = utils.read_csv_for_nx(graph_file) cu_M = utils.read_csv_file(graph_file) cu_parts, cu_mod = cugraph_call(cu_M, edgevals=True) nx_parts = networkx_call(M) # Calculating modularity scores for comparison Gnx = nx.from_pandas_edgelist( M, source="0", target="1", edge_attr="weight", create_using=nx.Graph() ) cu_parts = cu_parts.to_pandas() cu_map = dict(zip(cu_parts["vertex"], cu_parts["partition"])) assert set(nx_parts.keys()) == set(cu_map.keys()) cu_mod_nx = community.modularity(cu_map, Gnx) nx_mod = community.modularity(nx_parts, Gnx) assert len(cu_parts) == len(nx_parts) assert cu_mod > (0.82 * nx_mod) assert abs(cu_mod - cu_mod_nx) < 0.0001
def test_hits(graph_file, max_iter, tol): gc.collect() M = utils.read_csv_for_nx(graph_file) hubs, authorities = networkx_call(M, max_iter, tol) cu_M = utils.read_csv_file(graph_file) cugraph_hits = cugraph_call(cu_M, max_iter, tol) pdf = pd.DataFrame.from_dict(hubs, orient="index").sort_index() cugraph_hits["nx_hubs"] = cudf.Series.from_pandas(pdf[0]) pdf = pd.DataFrame.from_dict(authorities, orient="index").sort_index() cugraph_hits["nx_authorities"] = cudf.Series.from_pandas(pdf[0]) hubs_diffs1 = cugraph_hits.query('hubs - nx_hubs > 0.00001') hubs_diffs2 = cugraph_hits.query('hubs - nx_hubs < -0.00001') authorities_diffs1 = cugraph_hits.query( 'authorities - nx_authorities > 0.0001') authorities_diffs2 = cugraph_hits.query( 'authorities - nx_authorities < -0.0001') assert len(hubs_diffs1) == 0 assert len(hubs_diffs2) == 0 assert len(authorities_diffs1) == 0 assert len(authorities_diffs2) == 0
def test_multigraph(graph_file): # FIXME: Migrate to new test fixtures for Graph setup once available cuM = utils.read_csv_file(graph_file) G = cugraph.MultiDiGraph() G.from_cudf_edgelist(cuM, source="0", destination="1", edge_attr="2") nxM = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True) Gnx = nx.from_pandas_edgelist( nxM, source="0", target="1", edge_attr="weight", create_using=nx.MultiDiGraph(), ) assert G.number_of_edges() == Gnx.number_of_edges() assert G.number_of_nodes() == Gnx.number_of_nodes() cuedges = cugraph.to_pandas_edgelist(G) cuedges.rename(columns={"src": "source", "dst": "target", "weights": "weight"}, inplace=True) cuedges["weight"] = cuedges["weight"].round(decimals=3) nxedges = nx.to_pandas_edgelist(Gnx).astype(dtype={"source": "int32", "target": "int32", "weight": "float32"}) cuedges = cuedges.sort_values(by=["source", "target"]).\ reset_index(drop=True) nxedges = nxedges.sort_values(by=["source", "target"]).\ reset_index(drop=True) nxedges["weight"] = nxedges["weight"].round(decimals=3) assert nxedges.equals(cuedges[["source", "target", "weight"]])
def test_to_undirected(graph_file): gc.collect() cu_M = utils.read_csv_file(graph_file) cu_M = cu_M[cu_M["0"] <= cu_M["1"]].reset_index(drop=True) M = utils.read_csv_for_nx(graph_file) M = M[M["0"] <= M["1"]] assert len(cu_M) == len(M) # cugraph add_edge_list DiG = cugraph.DiGraph() DiG.from_cudf_edgelist(cu_M, source="0", destination="1") DiGnx = nx.from_pandas_edgelist(M, source="0", target="1", create_using=nx.DiGraph()) G = DiG.to_undirected() Gnx = DiGnx.to_undirected() assert G.number_of_nodes() == Gnx.number_of_nodes() assert G.number_of_edges() == Gnx.number_of_edges() edgelist_df = G.edgelist.edgelist_df for i in range(len(edgelist_df)): assert Gnx.has_edge(edgelist_df.iloc[i]["src"], edgelist_df.iloc[i]["dst"])
def test_symmetrize_unweighted(graph_file): gc.collect() cu_M = utils.read_csv_file(graph_file) sym_sources, sym_destinations = cugraph.symmetrize(cu_M["0"], cu_M["1"]) # # Check to see if all pairs in sources/destinations exist in # both directions # # Try this with join logic. Note that if we create data frames # we can join the data frames (using the DataFrame.merge function). # The symmetrize function should contain every edge that was contained # in the input data. So if we join the input data with the output # the length of the data frames should be equal. # sym_df = cudf.DataFrame() sym_df["src_s"] = sym_sources sym_df["dst_s"] = sym_destinations orig_df = cudf.DataFrame() orig_df["src"] = cu_M["0"] orig_df["dst"] = cu_M["1"] compare( orig_df["src"], orig_df["dst"], None, sym_df["src_s"], sym_df["dst_s"], None, )
def calc_cg_core_number(graph_file): M = utils.read_csv_file(graph_file) G = cugraph.Graph() G.from_cudf_edgelist(M, source="0", destination="1") cn = cugraph.core_number(G) return cn
def test_jaccard_edgevals(managed, pool, graph_file): gc.collect() rmm.reinitialize( managed_memory=managed, pool_allocator=pool, initial_pool_size=2 << 27 ) assert(rmm.is_initialized()) M = utils.read_csv_for_nx(graph_file) cu_M = utils.read_csv_file(graph_file) cu_src, cu_dst, cu_coeff = cugraph_call(cu_M, edgevals=True) nx_src, nx_dst, nx_coeff = networkx_call(M) # Calculating mismatch err = 0 tol = 1.0e-06 assert len(cu_coeff) == len(nx_coeff) for i in range(len(cu_coeff)): if(abs(cu_coeff[i] - nx_coeff[i]) > tol*1.1 and cu_src[i] == nx_src[i] and cu_dst[i] == nx_dst[i]): err += 1 print("Mismatches: %d" % err) assert err == 0
def test_edges_for_Graph(graph_file): cu_M = utils.read_csv_file(graph_file) # Create nx Graph pdf = cu_M.to_pandas()[['0', '1']] nx_graph = nx.from_pandas_edgelist(pdf, source='0', target='1', create_using=nx.Graph) nx_edges = nx_graph.edges() # Create Cugraph Graph from DataFrame # Force it to use renumber_from_cudf G = cugraph.from_cudf_edgelist(cu_M, source=['0'], destination=['1'], create_using=cugraph.Graph) cu_edge_list = G.edges() # Check if number of Edges is same assert len(nx_edges) == len(cu_edge_list) assert nx_graph.number_of_edges() == G.number_of_edges() # Compare nx and cugraph edges when viewing edgelist edges = [] for edge in nx_edges: if edge[0] > edge[1]: edges.append([edge[1], edge[0]]) else: edges.append([edge[0], edge[1]]) nx_edge_list = cudf.DataFrame(list(edges), columns=['src', 'dst']) assert_eq( nx_edge_list.sort_values(by=['src', 'dst']).reset_index(drop=True), cu_edge_list.sort_values(by=['src', 'dst']).reset_index(drop=True), check_dtype=False )
def test_jaccard_two_hop_edge_vals(graph_file): gc.collect() M = utils.read_csv_for_nx(graph_file) cu_M = utils.read_csv_file(graph_file) Gnx = nx.from_pandas_edgelist(M, source="0", target="1", edge_attr="weight", create_using=nx.Graph()) G = cugraph.Graph() G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2") pairs = (G.get_two_hop_neighbors().sort_values(["first", "second" ]).reset_index(drop=True)) nx_pairs = [] for i in range(len(pairs)): nx_pairs.append((pairs["first"].iloc[i], pairs["second"].iloc[i])) preds = nx.jaccard_coefficient(Gnx, nx_pairs) nx_coeff = [] for u, v, p in preds: nx_coeff.append(p) df = cugraph.jaccard(G, pairs) df = df.sort_values(by=["source", "destination"]).reset_index(drop=True) assert len(nx_coeff) == len(df) for i in range(len(df)): diff = abs(nx_coeff[i] - df["jaccard_coeff"].iloc[i]) assert diff < 1.0e-6
def test_overlap(graph_file): gc.collect() Mnx = utils.read_csv_for_nx(graph_file) N = max(max(Mnx["0"]), max(Mnx["1"])) + 1 M = scipy.sparse.csr_matrix((Mnx.weight, (Mnx["0"], Mnx["1"])), shape=(N, N)) cu_M = utils.read_csv_file(graph_file) G = cugraph.Graph() G.from_cudf_edgelist(cu_M, source="0", destination="1") pairs = (G.get_two_hop_neighbors().sort_values(["first", "second" ]).reset_index(drop=True)) cu_coeff = cugraph_call(cu_M, pairs) cpu_coeff = cpu_call(M, pairs["first"], pairs["second"]) assert len(cu_coeff) == len(cpu_coeff) for i in range(len(cu_coeff)): if np.isnan(cpu_coeff[i]): assert np.isnan(cu_coeff[i]) elif np.isnan(cu_coeff[i]): assert cpu_coeff[i] == cu_coeff[i] else: diff = abs(cpu_coeff[i] - cu_coeff[i]) assert diff < 1.0e-6
def test_woverlap(managed, pool, graph_file): gc.collect() rmm.reinitialize( managed_memory=managed, pool_allocator=pool, initial_pool_size=2 << 27 ) assert(rmm.is_initialized()) Mnx = utils.read_csv_for_nx(graph_file) N = max(max(Mnx['0']), max(Mnx['1'])) + 1 M = scipy.sparse.csr_matrix((Mnx.weight, (Mnx['0'], Mnx['1'])), shape=(N, N)) cu_M = utils.read_csv_file(graph_file) G = cugraph.Graph() G.from_cudf_edgelist(cu_M, source='0', destination='1') pairs = G.get_two_hop_neighbors() cu_coeff = cugraph_call(cu_M, pairs) cpu_coeff = cpu_call(M, pairs['first'], pairs['second']) assert len(cu_coeff) == len(cpu_coeff) for i in range(len(cu_coeff)): if np.isnan(cpu_coeff[i]): assert np.isnan(cu_coeff[i]) elif np.isnan(cu_coeff[i]): assert cpu_coeff[i] == cu_coeff[i] else: diff = abs(cpu_coeff[i] - cu_coeff[i]) assert diff < 1.0e-6
def test_louvain(graph_file): gc.collect() M = utils.read_csv_for_nx(graph_file) cu_M = utils.read_csv_file(graph_file) cu_parts, cu_mod = cugraph_call(cu_M) nx_parts = networkx_call(M) # Calculating modularity scores for comparison Gnx = nx.from_pandas_edgelist(M, source="0", target="1", edge_attr="weight", create_using=nx.Graph()) cu_parts = cu_parts.to_pandas() cu_map = dict(zip(cu_parts["vertex"], cu_parts["partition"])) assert set(nx_parts.keys()) == set(cu_map.keys()) cu_mod_nx = community.modularity(cu_map, Gnx) nx_mod = community.modularity(nx_parts, Gnx) assert len(cu_parts) == len(nx_parts) assert cu_mod > (0.82 * nx_mod) assert abs(cu_mod - cu_mod_nx) < 0.0001
def test_degrees_functionality(managed, pool, graph_file): gc.collect() rmm.reinitialize(managed_memory=managed, pool_allocator=pool, initial_pool_size=2 << 27) assert (rmm.is_initialized()) M = utils.read_csv_for_nx(graph_file) cu_M = utils.read_csv_file(graph_file) G = cugraph.DiGraph() G.from_cudf_edgelist(cu_M, source='0', target='1', edge_attr='2') Gnx = nx.DiGraph(M) df = G.degrees() nx_in_degree = Gnx.in_degree() nx_out_degree = Gnx.out_degree() err_in_degree = 0 err_out_degree = 0 for i in range(len(df)): if (df['in_degree'][i] != nx_in_degree[i]): err_in_degree = err_in_degree + 1 if (df['out_degree'][i] != nx_out_degree[i]): err_out_degree = err_out_degree + 1 assert err_in_degree == 0 assert err_out_degree == 0
def test_add_edge_list_to_adj_list(managed, pool, graph_file): gc.collect() rmm.reinitialize(managed_memory=managed, pool_allocator=pool, initial_pool_size=2 << 27) assert (rmm.is_initialized()) cu_M = utils.read_csv_file(graph_file) M = utils.read_csv_for_nx(graph_file).tocsr() if M is None: raise TypeError('Could not read the input graph') if M.shape[0] != M.shape[1]: raise TypeError('Shape is not square') offsets_exp = M.indptr indices_exp = M.indices # cugraph add_egde_list to_adj_list call G = cugraph.DiGraph() G.from_cudf_edgelist(cu_M, source='0', target='1') offsets_cu, indices_cu, values_cu = G.view_adj_list() assert compare_offsets(offsets_cu, offsets_exp) assert compare_series(indices_cu, indices_exp) assert values_cu is None
def test_Graph_from_MultiGraph(managed, pool, graph_file): gc.collect() rmm.reinitialize(managed_memory=managed, pool_allocator=pool, initial_pool_size=2 << 27) assert (rmm.is_initialized()) cu_M = utils.read_csv_file(graph_file) # create dataframe for MultiGraph cu_M['3'] = cudf.Series([2.0] * len(cu_M), dtype=np.float32) cu_M['4'] = cudf.Series([3.0] * len(cu_M), dtype=np.float32) # initialize MultiGraph G_multi = cugraph.MultiGraph() G_multi.from_cudf_edgelist(cu_M, source='0', destination='1', edge_attr=['2', '3', '4']) # initialize Graph G = cugraph.Graph() G.from_cudf_edgelist(cu_M, source='0', destination='1', edge_attr='2') # create Graph from MultiGraph G_from_multi = cugraph.Graph(G_multi, edge_attr='2') assert G.edgelist.edgelist_df == G_from_multi.edgelist.edgelist_df
def test_to_directed(graph_file): gc.collect() cu_M = utils.read_csv_file(graph_file) cu_M = cu_M[cu_M["0"] <= cu_M["1"]].reset_index(drop=True) M = utils.read_csv_for_nx(graph_file) M = M[M["0"] <= M["1"]] assert len(cu_M) == len(M) # cugraph add_edge_list G = cugraph.Graph() G.from_cudf_edgelist(cu_M, source="0", destination="1") Gnx = nx.from_pandas_edgelist(M, source="0", target="1", create_using=nx.Graph()) DiG = G.to_directed() DiGnx = Gnx.to_directed() assert DiG.number_of_nodes() == DiGnx.number_of_nodes() assert DiG.number_of_edges() == DiGnx.number_of_edges() for index, row in cu_M.to_pandas().iterrows(): assert G.has_edge(row['0'], row['1']) assert G.has_edge(row['1'], row['0'])
def test_filter_unreachable(graph_file, source): gc.collect() cu_M = utils.read_csv_file(graph_file) print("sources size = " + str(len(cu_M))) print("destinations size = " + str(len(cu_M))) # cugraph Pagerank Call G = cugraph.DiGraph() G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2") print("cugraph Solving... ") t1 = time.time() df = cugraph.sssp(G, source) t2 = time.time() - t1 print("Time : " + str(t2)) reachable_df = cugraph.filter_unreachable(df) if np.issubdtype(df["distance"].dtype, np.integer): inf = np.iinfo(reachable_df["distance"].dtype).max # noqa: F841 assert len(reachable_df.query("distance == @inf")) == 0 elif np.issubdtype(df["distance"].dtype, np.inexact): inf = np.finfo(reachable_df["distance"].dtype).max # noqa: F841 assert len(reachable_df.query("distance == @inf")) == 0 assert len(reachable_df) != 0
def test_overlap(managed, pool, graph_file): gc.collect() rmm.finalize() rmm_cfg.use_managed_memory = managed rmm_cfg.use_pool_allocator = pool rmm.initialize() assert (rmm.is_initialized()) M = utils.read_mtx_file(graph_file + '.mtx') M = M.tocsr() cu_M = utils.read_csv_file(graph_file + '.csv') row_offsets = cudf.Series(M.indptr) col_indices = cudf.Series(M.indices) G = cugraph.Graph() G.add_adj_list(row_offsets, col_indices, None) pairs = G.get_two_hop_neighbors() cu_coeff = cugraph_call(cu_M, pairs['first'], pairs['second']) cpu_coeff = cpu_call(M, pairs['first'], pairs['second']) assert len(cu_coeff) == len(cpu_coeff) for i in range(len(cu_coeff)): diff = abs(cpu_coeff[i] - cu_coeff[i]) assert diff < 1.0e-6
def test_to_undirected(graph_file): # Read data and then convert to directed by dropped some edges cu_M = utils.read_csv_file(graph_file) cu_M = cu_M[cu_M["0"] <= cu_M["1"]].reset_index(drop=True) M = utils.read_csv_for_nx(graph_file) M = M[M["0"] <= M["1"]] assert len(cu_M) == len(M) # cugraph add_edge_list DiG = cugraph.DiGraph() DiG.from_cudf_edgelist(cu_M, source="0", destination="1") DiGnx = nx.from_pandas_edgelist( M, source="0", target="1", create_using=nx.DiGraph() ) for index, row in cu_M.to_pandas().iterrows(): assert DiG.has_edge(row['0'], row['1']) assert not DiG.has_edge(row['1'], row['0']) G = DiG.to_undirected() Gnx = DiGnx.to_undirected() assert G.number_of_nodes() == Gnx.number_of_nodes() assert G.number_of_edges() == Gnx.number_of_edges() for index, row in cu_M.to_pandas().iterrows(): assert G.has_edge(row['0'], row['1']) assert G.has_edge(row['1'], row['0'])
def test_symmetrize_unweighted(managed, pool, graph_file): gc.collect() rmm.reinitialize(managed_memory=managed, pool_allocator=pool) assert (rmm.is_initialized()) cu_M = utils.read_csv_file(graph_file + '.csv') sym_sources, sym_destinations = cugraph.symmetrize(cu_M['0'], cu_M['1']) # # Check to see if all pairs in sources/destinations exist in # both directions # # Try this with join logic. Note that if we create data frames # we can join the data frames (using the DataFrame.merge function). # The symmetrize function should contain every edge that was contained # in the input data. So if we join the input data with the output # the length of the data frames should be equal. # sym_df = cudf.DataFrame() sym_df['src_s'] = sym_sources sym_df['dst_s'] = sym_destinations orig_df = cudf.DataFrame() orig_df['src'] = cu_M['0'] orig_df['dst'] = cu_M['1'] compare(orig_df['src'], orig_df['dst'], None, sym_df['src_s'], sym_df['dst_s'], None)
def test_strong_cc(managed, pool, graph_file): gc.collect() rmm.reinitialize( managed_memory=managed, pool_allocator=pool, initial_pool_size=2 << 27 ) assert(rmm.is_initialized()) M = utils.read_csv_for_nx(graph_file) netx_labels = networkx_strong_call(M) cu_M = utils.read_csv_file(graph_file) cugraph_labels = cugraph_strong_call(cu_M) # NetX returns a list of components, each component being a # collection (set{}) of vertex indices; # # while cugraph returns a component label for each vertex; nx_n_components = len(netx_labels) cg_n_components = get_n_uniqs(cugraph_labels) assert nx_n_components == cg_n_components lst_nx_components_lens = [len(c) for c in sorted(netx_labels, key=len)] # get counts of uniques: # lst_cg_components_lens = sorted(get_uniq_counts(cugraph_labels)) assert lst_nx_components_lens == lst_cg_components_lens
def test_modularity_clustering_with_edgevals(graph_file, partitions): # Read in the graph and get a cugraph object M = utils.read_mtx_file(graph_file).tocsr() cu_M = utils.read_csv_file(graph_file + '.csv', read_weights_in_sp=False) row_offsets = cudf.Series(M.indptr) col_indices = cudf.Series(M.indices) val = cudf.Series(M.data) G_adj = cugraph.Graph() G_adj.add_adj_list(row_offsets, col_indices, val) sources = cu_M['0'] destinations = cu_M['1'] values = cu_M['2'] G_edge = cugraph.Graph() G_edge.add_edge_list(sources, destinations, values) # Get the modularity score for partitioning versus random assignment cu_vid, cu_score = cugraph_call(G_adj, partitions) rand_vid, rand_score = random_call(G_adj, partitions) # Assert that the partitioning has better modularity than the random # assignment assert cu_score < rand_score # Get the modularity score for partitioning versus random assignment cu_vid, cu_score = cugraph_call(G_edge, partitions) rand_vid, rand_score = random_call(G_edge, partitions) # Assert that the partitioning has better modularity than the random # assignment assert cu_score < rand_score
def test_force_atlas2(graph_file, score, max_iter, barnes_hut_optimize): cu_M = utils.read_csv_file(graph_file) cu_pos = cugraph_call(cu_M, max_iter=max_iter, pos_list=None, outbound_attraction_distribution=True, lin_log_mode=False, prevent_overlapping=False, edge_weight_influence=1.0, jitter_tolerance=1.0, barnes_hut_optimize=False, barnes_hut_theta=0.5, scaling_ratio=2.0, strong_gravity_mode=False, gravity=1.0) ''' Trustworthiness score can be used for Force Atlas 2 as the algorithm optimizes modularity. The final layout will result in different communities being drawn out. We consider here the n x n adjacency matrix of the graph as an embedding of the nodes in high dimension. The results of force atlas 2 corresponds to the layout in a 2d space. Here we check that nodes belonging to the same community or neighbors are close to each other in the final embedding. Thresholds are based on the best score that is achived after 500 iterations on a given graph. ''' matrix_file = graph_file[:-4] + '.mtx' M = scipy.io.mmread(matrix_file) M = M.todense() cu_trust = trustworthiness(M, cu_pos[['x', 'y']].to_pandas()) print(cu_trust, score) assert cu_trust > score
def test_sssp_edgevals(managed, pool, graph_file, source): gc.collect() rmm.reinitialize(managed_memory=managed, pool_allocator=pool, initial_pool_size=2 << 27) assert (rmm.is_initialized()) M = utils.read_csv_for_nx(graph_file) cu_M = utils.read_csv_file(graph_file) cu_paths, max_val = cugraph_call(cu_M, source, edgevals=True) nx_paths, Gnx = networkx_call(M, source, edgevals=True) # Calculating mismatch err = 0 for vid in cu_paths: # Validate vertices that are reachable # NOTE : If distance type is float64 then cu_paths[vid][0] # should be compared against np.finfo(np.float64).max) if (cu_paths[vid][0] != max_val): if (cu_paths[vid][0] != nx_paths[vid]): err = err + 1 # check pred dist + edge_weight = current dist if (vid != source): pred = cu_paths[vid][1] edge_weight = Gnx[pred][vid]['weight'] if (cu_paths[pred][0] + edge_weight != cu_paths[vid][0]): err = err + 1 else: if (vid in nx_paths.keys()): err = err + 1 assert err == 0
def test_degrees_functionality(graph_file): gc.collect() M = utils.read_csv_for_nx(graph_file) cu_M = utils.read_csv_file(graph_file) G = cugraph.DiGraph() G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2") Gnx = nx.from_pandas_edgelist(M, source="0", target="1", create_using=nx.DiGraph()) df = G.degrees() nx_in_degree = Gnx.in_degree() nx_out_degree = Gnx.out_degree() err_in_degree = 0 err_out_degree = 0 for i in range(len(df)): if df["in_degree"][i] != nx_in_degree[df["vertex"][i]]: err_in_degree = err_in_degree + 1 if df["out_degree"][i] != nx_out_degree[df["vertex"][i]]: err_out_degree = err_out_degree + 1 assert err_in_degree == 0 assert err_out_degree == 0
def test_pagerank(managed, pool, graph_file, max_iter, tol, alpha, personalization_perc, has_guess): gc.collect() rmm.reinitialize(managed_memory=managed, pool_allocator=pool, initial_pool_size=2 << 27) assert (rmm.is_initialized()) M = utils.read_csv_for_nx(graph_file) networkx_pr, networkx_prsn = networkx_call(M, max_iter, tol, alpha, personalization_perc) cu_nstart = None if has_guess == 1: cu_nstart = cudify(networkx_pr) max_iter = 5 cu_prsn = cudify(networkx_prsn) cu_M = utils.read_csv_file(graph_file) cugraph_pr = cugraph_call(cu_M, max_iter, tol, alpha, cu_prsn, cu_nstart) # Calculating mismatch networkx_pr = sorted(networkx_pr.items(), key=lambda x: x[0]) err = 0 assert len(cugraph_pr) == len(networkx_pr) for i in range(len(cugraph_pr)): if (abs(cugraph_pr[i][1] - networkx_pr[i][1]) > tol * 1.1 and cugraph_pr[i][0] == networkx_pr[i][0]): err = err + 1 print("Mismatches:", err) assert err < (0.01 * len(cugraph_pr))
def test_bipartite_api(graph_file): # This test only tests the functionality of adding set of nodes and # retrieving them. The datasets currently used are not truly bipartite. gc.collect() cu_M = utils.read_csv_file(graph_file) nodes = cudf.concat([cu_M['0'], cu_M['1']]).unique() # Create set of nodes for partition set1_exp = cudf.Series(nodes[0:int(len(nodes) / 2)]) set2_exp = cudf.Series(set(nodes.values_host) - set(set1_exp.values_host)) G = cugraph.Graph() assert not G.is_bipartite() # Add a set of nodes present in one partition G.add_nodes_from(set1_exp, bipartite='set1') G.from_cudf_edgelist(cu_M, source='0', destination='1') # Check if Graph is bipartite. It should return True since we have # added the partition in add_nodes_from() assert G.is_bipartite() # Call sets() to get the bipartite set of nodes. set1, set2 = G.sets() # assert if the input set1_exp is same as returned bipartite set1 assert set1.equals(set1_exp) # assert if set2 is the remaining set of nodes not in set1_exp assert set2.equals(set2_exp)
def test_jaccard_two_hop_edge_vals(managed, pool, graph_file): gc.collect() rmm.reinitialize(managed_memory=managed, pool_allocator=pool, initial_pool_size=2 << 27) assert (rmm.is_initialized()) M = utils.read_csv_for_nx(graph_file) cu_M = utils.read_csv_file(graph_file) Gnx = nx.from_pandas_edgelist(M, source='0', target='1', edge_attr='weight', create_using=nx.Graph()) G = cugraph.Graph() G.from_cudf_edgelist(cu_M, source='0', destination='1', edge_attr='2') pairs = G.get_two_hop_neighbors() nx_pairs = [] for i in range(len(pairs)): nx_pairs.append((pairs['first'][i], pairs['second'][i])) preds = nx.jaccard_coefficient(Gnx, nx_pairs) nx_coeff = [] for u, v, p in preds: nx_coeff.append(p) df = cugraph.jaccard(G, pairs) df = df.sort_values(by=['source', 'destination']) assert len(nx_coeff) == len(df) for i in range(len(df)): diff = abs(nx_coeff[i] - df['jaccard_coeff'][i]) assert diff < 1.0e-6
def test_Graph_from_MultiGraph(graph_file): # FIXME: Migrate to new test fixtures for Graph setup once available cuM = utils.read_csv_file(graph_file) GM = cugraph.MultiGraph() GM.from_cudf_edgelist(cuM, source="0", destination="1", edge_attr="2") nxM = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True) GnxM = nx.from_pandas_edgelist( nxM, source="0", target="1", edge_attr="weight", create_using=nx.MultiGraph(), ) G = cugraph.Graph(GM) Gnx = nx.Graph(GnxM) assert Gnx.number_of_edges() == G.number_of_edges() GdM = cugraph.MultiDiGraph() GdM.from_cudf_edgelist(cuM, source="0", destination="1", edge_attr="2") GnxdM = nx.from_pandas_edgelist( nxM, source="0", target="1", edge_attr="weight", create_using=nx.MultiGraph(), ) Gd = cugraph.DiGraph(GdM) Gnxd = nx.DiGraph(GnxdM) assert Gnxd.number_of_edges() == Gd.number_of_edges()
def test_overlap_edge_vals(managed, pool, graph_file): gc.collect() rmm.finalize() rmm_config.use_managed_memory = managed rmm_config.use_pool_allocator = pool rmm_config.initial_pool_size = 2 << 27 rmm.initialize() assert (rmm.is_initialized()) M = utils.read_csv_for_nx(graph_file) M = M.tocsr().sorted_indices() cu_M = utils.read_csv_file(graph_file) row_offsets = cudf.Series(M.indptr) col_indices = cudf.Series(M.indices) G = cugraph.Graph() G.add_adj_list(row_offsets, col_indices, None) pairs = G.get_two_hop_neighbors() cu_coeff = cugraph_call(cu_M, pairs['first'], pairs['second'], edgevals=True) cpu_coeff = cpu_call(M, pairs['first'], pairs['second']) assert len(cu_coeff) == len(cpu_coeff) for i in range(len(cu_coeff)): if np.isnan(cpu_coeff[i]): assert np.isnan(cu_coeff[i]) elif np.isnan(cu_coeff[i]): assert cpu_coeff[i] == cu_coeff[i] else: diff = abs(cpu_coeff[i] - cu_coeff[i]) assert diff < 1.0e-6