Ejemplo n.º 1
0
def test_edge_cut_clustering_with_edgevals_nx(graph_file, partitions):
    gc.collect()

    # Read in the graph and create a NetworkX Graph
    # FIXME: replace with utils.generate_nx_graph_from_file()
    NM = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True)
    G = nx.from_pandas_edgelist(NM,
                                create_using=nx.Graph(),
                                source="0",
                                target="1",
                                edge_attr="weight")

    # Get the edge_cut score for partitioning versus random assignment
    df = cugraph.spectralBalancedCutClustering(G,
                                               partitions,
                                               num_eigen_vects=partitions)

    pdf = pd.DataFrame.from_dict(df, orient='index').reset_index()
    pdf.columns = ["vertex", "cluster"]
    gdf = cudf.from_pandas(pdf)

    cu_score = cugraph.analyzeClustering_edge_cut(G, partitions, gdf, 'vertex',
                                                  'cluster')

    df = set(gdf["vertex"].to_array())

    Gcu = cugraph.utilities.convert_from_nx(G)
    rand_vid, rand_score = random_call(Gcu, partitions)

    # Assert that the partitioning has better edge_cut than the random
    # assignment
    print(cu_score, rand_score)
    assert cu_score < rand_score
Ejemplo n.º 2
0
def cugraph_call(G, partitions):
    df = cugraph.spectralBalancedCutClustering(G,
                                               partitions,
                                               num_eigen_vects=partitions)

    score = cugraph.analyzeClustering_edge_cut(G, partitions, df, 'vertex',
                                               'cluster')
    return set(df["vertex"].to_array()), score
Ejemplo n.º 3
0
def cugraph_call(G, partitions):
    df = cugraph.spectralBalancedCutClustering(
        G, partitions, num_eigen_vects=partitions
    )

    df = df.sort_values("vertex")
    score = cugraph.analyzeClustering_edge_cut(G, partitions, df["cluster"])
    return set(df["vertex"].to_array()), score
Ejemplo n.º 4
0
def random_call(G, partitions):
    random.seed(0)
    num_verts = G.number_of_vertices()
    assignment = []
    for i in range(num_verts):
        assignment.append(random.randint(0, partitions - 1))
    assignment_cu = cudf.Series(assignment)
    score = cugraph.analyzeClustering_edge_cut(G, partitions, assignment_cu)
    return set(range(num_verts)), score
Ejemplo n.º 5
0
def random_call(G, partitions):
    random.seed(0)
    num_verts = G.number_of_vertices()

    score = 0.0
    for repeat in range(20):
        assignment = []
        for i in range(num_verts):
            assignment.append(random.randint(0, partitions - 1))

        assignment_cu = cudf.DataFrame(assignment, columns=['cluster'])
        assignment_cu['vertex'] = assignment_cu.index

        score += cugraph.analyzeClustering_edge_cut(G, partitions,
                                                    assignment_cu)

    return set(range(num_verts)), (score / 10.0)