def test_edge_cut_clustering_with_edgevals_nx(graph_file, partitions): gc.collect() # Read in the graph and create a NetworkX Graph # FIXME: replace with utils.generate_nx_graph_from_file() NM = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True) G = nx.from_pandas_edgelist(NM, create_using=nx.Graph(), source="0", target="1", edge_attr="weight") # Get the edge_cut score for partitioning versus random assignment df = cugraph.spectralBalancedCutClustering(G, partitions, num_eigen_vects=partitions) pdf = pd.DataFrame.from_dict(df, orient='index').reset_index() pdf.columns = ["vertex", "cluster"] gdf = cudf.from_pandas(pdf) cu_score = cugraph.analyzeClustering_edge_cut(G, partitions, gdf, 'vertex', 'cluster') df = set(gdf["vertex"].to_array()) Gcu = cugraph.utilities.convert_from_nx(G) rand_vid, rand_score = random_call(Gcu, partitions) # Assert that the partitioning has better edge_cut than the random # assignment print(cu_score, rand_score) assert cu_score < rand_score
def cugraph_call(G, partitions): df = cugraph.spectralBalancedCutClustering(G, partitions, num_eigen_vects=partitions) score = cugraph.analyzeClustering_edge_cut(G, partitions, df, 'vertex', 'cluster') return set(df["vertex"].to_array()), score
def cugraph_call(G, partitions): df = cugraph.spectralBalancedCutClustering( G, partitions, num_eigen_vects=partitions ) df = df.sort_values("vertex") score = cugraph.analyzeClustering_edge_cut(G, partitions, df["cluster"]) return set(df["vertex"].to_array()), score
def random_call(G, partitions): random.seed(0) num_verts = G.number_of_vertices() assignment = [] for i in range(num_verts): assignment.append(random.randint(0, partitions - 1)) assignment_cu = cudf.Series(assignment) score = cugraph.analyzeClustering_edge_cut(G, partitions, assignment_cu) return set(range(num_verts)), score
def random_call(G, partitions): random.seed(0) num_verts = G.number_of_vertices() score = 0.0 for repeat in range(20): assignment = [] for i in range(num_verts): assignment.append(random.randint(0, partitions - 1)) assignment_cu = cudf.DataFrame(assignment, columns=['cluster']) assignment_cu['vertex'] = assignment_cu.index score += cugraph.analyzeClustering_edge_cut(G, partitions, assignment_cu) return set(range(num_verts)), (score / 10.0)