Example #1
0
def test_wjaccard_multi_column(graph_file):
    gc.collect()

    M = utils.read_csv_for_nx(graph_file)

    cu_M = cudf.DataFrame()
    cu_M["src_0"] = cudf.Series(M["0"])
    cu_M["dst_0"] = cudf.Series(M["1"])
    cu_M["src_1"] = cu_M["src_0"] + 1000
    cu_M["dst_1"] = cu_M["dst_0"] + 1000
    G1 = cugraph.Graph()
    G1.from_cudf_edgelist(cu_M,
                          source=["src_0", "src_1"],
                          destination=["dst_0", "dst_1"])

    G2 = cugraph.Graph()
    G2.from_cudf_edgelist(cu_M, source="src_0", destination="dst_0")

    vertex_pair = cu_M[["src_0", "src_1", "dst_0", "dst_1"]]
    vertex_pair = vertex_pair[:5]

    weight_arr = cudf.Series(np.ones(G2.number_of_vertices(),
                                     dtype=np.float32))
    weights = cudf.DataFrame()
    weights['vertex'] = G2.nodes()
    weights['vertex_'] = weights['vertex'] + 1000
    weights['weight'] = weight_arr

    df_res = cugraph.jaccard_w(G1, weights, vertex_pair)

    weights = weights[['vertex', 'weight']]
    df_exp = cugraph.jaccard_w(G2, weights, vertex_pair[["src_0", "dst_0"]])

    # Calculating mismatch
    assert df_res["jaccard_coeff"].equals(df_exp["jaccard_coeff"])
Example #2
0
def test_wjaccard_multi_column(read_csv):

    M, _ = read_csv

    cu_M = cudf.DataFrame()
    cu_M["src_0"] = cudf.Series(M["0"])
    cu_M["dst_0"] = cudf.Series(M["1"])
    cu_M["src_1"] = cu_M["src_0"] + 1000
    cu_M["dst_1"] = cu_M["dst_0"] + 1000
    G1 = cugraph.Graph()
    G1.from_cudf_edgelist(cu_M,
                          source=["src_0", "src_1"],
                          destination=["dst_0", "dst_1"])

    G2 = cugraph.Graph()
    G2.from_cudf_edgelist(cu_M, source="src_0", destination="dst_0")

    vertex_pair = cu_M[["src_0", "src_1", "dst_0", "dst_1"]]
    vertex_pair = vertex_pair[:5]

    weight_arr = cudf.Series(np.ones(G2.number_of_vertices(),
                                     dtype=np.float32))
    weights = cudf.DataFrame()
    weights['vertex'] = G2.nodes()
    weights['vertex_'] = weights['vertex'] + 1000
    weights['weight'] = weight_arr

    df_res = cugraph.jaccard_w(G1, weights, vertex_pair)

    weights = weights[['vertex', 'weight']]
    df_exp = cugraph.jaccard_w(G2, weights, vertex_pair[["src_0", "dst_0"]])

    # Calculating mismatch
    actual = df_res.sort_values("0_source").reset_index()
    expected = df_exp.sort_values("source").reset_index()
    assert_series_equal(actual["jaccard_coeff"], expected["jaccard_coeff"])
Example #3
0
def cugraph_call(cu_M):
    # Device data
    weights_arr = cudf.Series(
        np.ones(max(cu_M['0'].max(), cu_M['1'].max()) + 1, dtype=np.float32))

    G = cugraph.Graph()
    G.from_cudf_edgelist(cu_M, source='0', destination='1')

    # cugraph Jaccard Call
    t1 = time.time()
    df = cugraph.jaccard_w(G, weights_arr)
    t2 = time.time() - t1
    print('Time : ' + str(t2))
    print(df)
    return df['jaccard_coeff']
Example #4
0
def cugraph_call(cu_M):
    # Device data
    weights_arr = cudf.Series(
        np.ones(max(cu_M["0"].max(), cu_M["1"].max()) + 1, dtype=np.float32))

    G = cugraph.Graph()
    G.from_cudf_edgelist(cu_M, source="0", destination="1")

    # cugraph Jaccard Call
    t1 = time.time()
    df = cugraph.jaccard_w(G, weights_arr)
    t2 = time.time() - t1
    print("Time : " + str(t2))

    df = df.sort_values(["source", "destination"]).reset_index(drop=True)

    return df["jaccard_coeff"]
Example #5
0
def cugraph_call(cu_M):
    # Device data
    sources = cu_M['0']
    destinations = cu_M['1']
    # values = cudf.Series(np.ones(len(col_indices), dtype=np.float32),
    # nan_as_null=False)
    weights_arr = cudf.Series(np.ones(max(sources.max(),
                              destinations.max())+1, dtype=np.float32))

    G = cugraph.Graph()
    G.add_edge_list(sources, destinations, None)

    # cugraph Jaccard Call
    t1 = time.time()
    df = cugraph.jaccard_w(G, weights_arr)
    t2 = time.time() - t1
    print('Time : '+str(t2))

    return df['jaccard_coeff']