Python sssp Examples, cugraph.sssp Python Examples

Example #1

0

Show file

def cugraph_Call(M, source):

    # Device data
    sources = cudf.Series(M.row)
    destinations = cudf.Series(M.col)

    print('sources size = ' + str(len(sources)))
    print('destinations size = ' + str(len(destinations)))

    # cugraph Pagerank Call
    G = cugraph.Graph()
    G.add_edge_list(sources, destinations, None)

    print('cugraph Solving... ')
    t1 = time.time()

    dist = cugraph.sssp(G, source)

    t2 = time.time() - t1
    print('Time : ' + str(t2))

    distances = []
    for i, d in enumerate(dist['distance']):
        distances.append((i, d))

    return distances

Example #2

0

Show file

def cugraph_call(cu_M, source, edgevals=False):

    G = cugraph.DiGraph()
    if edgevals is True:
        G.from_cudf_edgelist(cu_M, source='0', destination='1', edge_attr='2')
    else:
        G.from_cudf_edgelist(cu_M, source='0', destination='1')
    print('sources size = ' + str(len(cu_M['0'])))
    print('destinations size = ' + str(len(cu_M['1'])))

    print('cugraph Solving... ')
    t1 = time.time()

    df = cugraph.sssp(G, source)

    t2 = time.time() - t1
    print('Cugraph Time : ' + str(t2))

    if (np.issubdtype(df['distance'].dtype, np.integer)):
        max_val = np.iinfo(df['distance'].dtype).max
    else:
        max_val = np.finfo(df['distance'].dtype).max

    verts_np = df['vertex'].to_array()
    dist_np = df['distance'].to_array()
    pred_np = df['predecessor'].to_array()
    result = dict(zip(verts_np, zip(dist_np, pred_np)))
    return result, max_val

Example #3

0

Show file

File: routing_dsh.py Project: cjber/ahah

    def get_shortest_dists(self, poi: namedtuple) -> None:
        """
        Use `cugraph.sssp` to calculate shortest paths from POI to postcodes

        First subsets road graph, then finds shortest paths, ensuring all paths are
        routed that are known to be important to each POI. Saves to `hdf` to allow
        restarts.

        Parameters
        ----------
        poi : namedtuple
            Single POI created from `df.itertuples()`
        """
        if self.buffer:
            self.graph = self.create_sub_graph(poi=poi)

        shortest_paths: cudf.DataFrame = cugraph.filter_unreachable(
            cugraph.sssp(self.graph, source=poi.node_id))
        pc_dist = shortest_paths[shortest_paths.vertex.isin(self.postcode_ids)]

        self.idx += 1
        pc_dist["idx"] = self.idx

        if self.log_file.exists():
            self.distances = cudf.read_csv(self.log_file).append(pc_dist)
        else:
            self.distances = pc_dist[["vertex", "distance", "idx"]]

        self.distances = (self.distances.sort_values(
            "distance").drop_duplicates("vertex").reset_index()[[
                "vertex", "distance", "idx"
            ]])
        self.distances.to_csv(self.log_file, index=False)

Example #4

0

Show file

File: test_sssp.py Project: stjordanis/cugraph

def cugraph_call(cu_M, source, edgevals=False):

    # Device data
    sources = cu_M['0']
    destinations = cu_M['1']
    if edgevals is False:
        values = None
    else:
        values = cu_M['2']

    print('sources size = ' + str(len(sources)))
    print('destinations size = ' + str(len(destinations)))

    # cugraph Pagerank Call
    G = cugraph.Graph()
    G.add_edge_list(sources, destinations, values)

    print('cugraph Solving... ')
    t1 = time.time()

    dist = cugraph.sssp(G, source)

    t2 = time.time() - t1
    print('Time : ' + str(t2))

    distances = []
    dist_np = dist['distance'].to_array()
    for i, d in enumerate(dist_np):
        distances.append((i, d))

    return distances

Example #5

0

Show file

def cugraph_call(cu_M, source, edgevals=False):

    # Device data
    sources = cu_M['0']
    destinations = cu_M['1']
    if edgevals is False:
        values = None
    else:
        values = cu_M['2']

    print('sources size = ' + str(len(sources)))
    print('destinations size = ' + str(len(destinations)))

    # cugraph Pagerank Call
    G = cugraph.Graph()
    G.add_edge_list(sources, destinations, values)

    print('cugraph Solving... ')
    t1 = time.time()

    df = cugraph.sssp(G, source)

    t2 = time.time() - t1
    print('Time : ' + str(t2))

    verts_np = df['vertex'].to_array()
    dist_np = df['distance'].to_array()
    pred_np = df['predecessor'].to_array()
    result = dict(zip(verts_np, zip(dist_np, pred_np)))
    return result

Example #6

0

Show file

def test_filter_unreachable(graph_file, source):
    gc.collect()

    cu_M = utils.read_csv_file(graph_file)

    print("sources size = " + str(len(cu_M)))
    print("destinations size = " + str(len(cu_M)))

    # cugraph Pagerank Call
    G = cugraph.DiGraph()
    G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2")

    print("cugraph Solving... ")
    t1 = time.time()

    df = cugraph.sssp(G, source)

    t2 = time.time() - t1
    print("Time : " + str(t2))

    reachable_df = cugraph.filter_unreachable(df)

    if np.issubdtype(df["distance"].dtype, np.integer):
        inf = np.iinfo(reachable_df["distance"].dtype).max  # noqa: F841
        assert len(reachable_df.query("distance == @inf")) == 0
    elif np.issubdtype(df["distance"].dtype, np.inexact):
        inf = np.finfo(reachable_df["distance"].dtype).max  # noqa: F841
        assert len(reachable_df.query("distance == @inf")) == 0

    assert len(reachable_df) != 0

Example #7

0

Show file

File: test_sssp.py Project: hyperbolic2346/cugraph

def cugraph_call(cu_M, source, edgevals=False):

    G = cugraph.DiGraph()
    if edgevals is True:
        G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2")
    else:
        G.from_cudf_edgelist(cu_M, source="0", destination="1")
    print("sources size = " + str(len(cu_M["0"])))
    print("destinations size = " + str(len(cu_M["1"])))

    print("cugraph Solving... ")
    t1 = time.time()

    df = cugraph.sssp(G, source)

    t2 = time.time() - t1
    print("Cugraph Time : " + str(t2))

    if np.issubdtype(df["distance"].dtype, np.integer):
        max_val = np.iinfo(df["distance"].dtype).max
    else:
        max_val = np.finfo(df["distance"].dtype).max

    verts_np = df["vertex"].to_array()
    dist_np = df["distance"].to_array()
    pred_np = df["predecessor"].to_array()
    result = dict(zip(verts_np, zip(dist_np, pred_np)))
    return result, max_val

Example #8

0

Show file

def test_sssp_data_type_conversion(managed, pool, graph_file, source):
    gc.collect()

    rmm.reinitialize(managed_memory=managed,
                     pool_allocator=pool,
                     initial_pool_size=2 << 27)

    assert (rmm.is_initialized())

    M = utils.read_csv_for_nx(graph_file)
    cu_M = utils.read_csv_file(graph_file)

    # cugraph call with int32 weights
    cu_M['2'] = cu_M['2'].astype(np.int32)
    G = cugraph.DiGraph()
    G.from_cudf_edgelist(cu_M, source='0', destination='1', edge_attr='2')
    # assert cugraph weights is int32
    assert G.edgelist.edgelist_df['weights'].dtype == np.int32
    df = cugraph.sssp(G, source)
    max_val = np.finfo(df['distance'].dtype).max
    verts_np = df['vertex'].to_array()
    dist_np = df['distance'].to_array()
    pred_np = df['predecessor'].to_array()
    cu_paths = dict(zip(verts_np, zip(dist_np, pred_np)))

    # networkx call with int32 weights
    M['weight'] = M['weight'].astype(np.int32)
    Gnx = nx.from_pandas_edgelist(M,
                                  source='0',
                                  target='1',
                                  edge_attr='weight',
                                  create_using=nx.DiGraph())
    # assert nx weights is int
    assert type(list(Gnx.edges(data=True))[0][2]['weight']) is int
    nx_paths = nx.single_source_dijkstra_path_length(Gnx, source)

    # Calculating mismatch
    err = 0
    for vid in cu_paths:
        # Validate vertices that are reachable
        # NOTE : If distance type is float64 then cu_paths[vid][0]
        # should be compared against np.finfo(np.float64).max)
        if (cu_paths[vid][0] != max_val):
            if (cu_paths[vid][0] != nx_paths[vid]):
                err = err + 1
            # check pred dist + edge_weight = current dist
            if (vid != source):
                pred = cu_paths[vid][1]
                edge_weight = Gnx[pred][vid]['weight']
                if (cu_paths[pred][0] + edge_weight != cu_paths[vid][0]):
                    err = err + 1
        else:
            if (vid in nx_paths.keys()):
                err = err + 1

    assert err == 0

Example #9

0

Show file

File: test_sssp.py Project: hyperbolic2346/cugraph

def test_sssp_data_type_conversion(graph_file, source):
    gc.collect()

    M = utils.read_csv_for_nx(graph_file)
    cu_M = utils.read_csv_file(graph_file)

    # cugraph call with int32 weights
    cu_M["2"] = cu_M["2"].astype(np.int32)
    G = cugraph.DiGraph()
    G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2")
    # assert cugraph weights is int32
    assert G.edgelist.edgelist_df["weights"].dtype == np.int32
    df = cugraph.sssp(G, source)
    max_val = np.finfo(df["distance"].dtype).max
    verts_np = df["vertex"].to_array()
    dist_np = df["distance"].to_array()
    pred_np = df["predecessor"].to_array()
    cu_paths = dict(zip(verts_np, zip(dist_np, pred_np)))

    # networkx call with int32 weights
    M["weight"] = M["weight"].astype(np.int32)
    Gnx = nx.from_pandas_edgelist(
        M,
        source="0",
        target="1",
        edge_attr="weight",
        create_using=nx.DiGraph(),
    )
    # assert nx weights is int
    assert type(list(Gnx.edges(data=True))[0][2]["weight"]) is int
    nx_paths = nx.single_source_dijkstra_path_length(Gnx, source)

    # Calculating mismatch
    err = 0
    for vid in cu_paths:
        # Validate vertices that are reachable
        # NOTE : If distance type is float64 then cu_paths[vid][0]
        # should be compared against np.finfo(np.float64).max)
        if cu_paths[vid][0] != max_val:
            if cu_paths[vid][0] != nx_paths[vid]:
                err = err + 1
            # check pred dist + edge_weight = current dist
            if vid != source:
                pred = cu_paths[vid][1]
                edge_weight = Gnx[pred][vid]["weight"]
                if cu_paths[pred][0] + edge_weight != cu_paths[vid][0]:
                    err = err + 1
        else:
            if vid in nx_paths.keys():
                err = err + 1

    assert err == 0

Example #10

0

Show file

File: test_mg_sssp.py Project: rapidsai/cugraph

def test_dask_sssp(dask_client):
    gc.collect()

    input_data_path = (RAPIDS_DATASET_ROOT_DIR_PATH /
                       "netscience.csv").as_posix()
    print(f"dataset={input_data_path}")
    chunksize = dcg.get_chunksize(input_data_path)

    ddf = dask_cudf.read_csv(
        input_data_path,
        chunksize=chunksize,
        delimiter=" ",
        names=["src", "dst", "value"],
        dtype=["int32", "int32", "float32"],
    )

    df = cudf.read_csv(
        input_data_path,
        delimiter=" ",
        names=["src", "dst", "value"],
        dtype=["int32", "int32", "float32"],
    )

    g = cugraph.DiGraph()
    g.from_cudf_edgelist(df, "src", "dst", "value", renumber=True)

    dg = cugraph.DiGraph()
    dg.from_dask_cudf_edgelist(ddf, "src", "dst", "value")

    expected_dist = cugraph.sssp(g, 0)
    print(expected_dist)
    result_dist = dcg.sssp(dg, 0)
    result_dist = result_dist.compute()

    compare_dist = expected_dist.merge(
        result_dist, on="vertex", suffixes=["_local", "_dask"]
    )

    err = 0

    for i in range(len(compare_dist)):
        if (
            compare_dist["distance_local"].iloc[i]
            != compare_dist["distance_dask"].iloc[i]
        ):
            err = err + 1
    assert err == 0

Example #11

0

Show file

File: test_mg_sssp.py Project: goncaloperes/cugraph

def test_dask_sssp(client_connection):
    gc.collect()

    # FIXME: update this to allow dataset to be parameterized and have dataset
    # part of test param id (see other tests)
    input_data_path = r"../datasets/netscience.csv"
    print(f"dataset={input_data_path}")
    chunksize = dcg.get_chunksize(input_data_path)

    ddf = dask_cudf.read_csv(
        input_data_path,
        chunksize=chunksize,
        delimiter=" ",
        names=["src", "dst", "value"],
        dtype=["int32", "int32", "float32"],
    )

    df = cudf.read_csv(
        input_data_path,
        delimiter=" ",
        names=["src", "dst", "value"],
        dtype=["int32", "int32", "float32"],
    )

    g = cugraph.DiGraph()
    g.from_cudf_edgelist(df, "src", "dst", "value", renumber=True)

    dg = cugraph.DiGraph()
    dg.from_dask_cudf_edgelist(ddf, "src", "dst", "value")

    expected_dist = cugraph.sssp(g, 0)
    print(expected_dist)
    result_dist = dcg.sssp(dg, 0)
    result_dist = result_dist.compute()

    compare_dist = expected_dist.merge(result_dist,
                                       on="vertex",
                                       suffixes=["_local", "_dask"])

    err = 0

    for i in range(len(compare_dist)):
        if (compare_dist["distance_local"].iloc[i] !=
                compare_dist["distance_dask"].iloc[i]):
            err = err + 1
    assert err == 0

Example #12

0

Show file

def test_get_traversed_cost(graph_file):
    cu_M = utils.read_csv_file(graph_file)

    noise = cudf.Series(np.random.randint(10, size=(cu_M.shape[0])))
    cu_M['info'] = cu_M['2'] + noise

    G = cugraph.Graph()
    G.from_cudf_edgelist(cu_M, source='0', destination='1', edge_attr='info')

    # run SSSP starting at vertex 17
    df = cugraph.sssp(G, 16)

    answer = cugraph.utilities.path_retrieval.get_traversed_cost(
        df, 16, cu_M['0'], cu_M['1'], cu_M['info'])

    df = df.sort_values(by='vertex').reset_index()
    answer = answer.sort_values(by='vertex').reset_index()

    assert df.shape[0] == answer.shape[0]
    assert np.allclose(df['distance'], answer['info'])

Example #13

0

Show file

File: test_filter_unreachable.py Project: hieuqtran/cugraph

def test_filter_unreachable(managed, pool, graph_file, source):
    gc.collect()

    rmm.finalize()
    rmm_config.use_managed_memory = managed
    rmm_config.use_pool_allocator = pool
    rmm_config.initial_pool_size = 2 << 27
    rmm.initialize()

    assert(rmm.is_initialized())

    cu_M = utils.read_csv_file(graph_file)
    # Device data
    sources = cu_M['0']
    destinations = cu_M['1']

    print('sources size = ' + str(len(sources)))
    print('destinations size = ' + str(len(destinations)))

    # cugraph Pagerank Call
    G = cugraph.Graph()
    G.add_edge_list(sources, destinations)

    print('cugraph Solving... ')
    t1 = time.time()

    df = cugraph.sssp(G, source)

    t2 = time.time() - t1
    print('Time : '+str(t2))

    reachable_df = cugraph.filter_unreachable(df)

    if(np.issubdtype(df['distance'].dtype, np.integer)):
        inf = np.iinfo(reachable_df['distance'].dtype).max  # noqa: F841
        assert len(reachable_df.query("distance == @inf")) == 0
    elif(np.issubdtype(df['distance'].dtype, np.inexact)):
        inf = np.finfo(reachable_df['distance'].dtype).max  # noqa: F841
        assert len(reachable_df.query("distance == @inf")) == 0

    assert len(reachable_df) != 0

Example #14

0

Show file

def test_multigraph_sssp(graph_file):
    # FIXME: Migrate to new test fixtures for Graph setup once available
    cuM = utils.read_csv_file(graph_file)
    G = cugraph.MultiDiGraph()
    G.from_cudf_edgelist(cuM, source="0", destination="1", edge_attr="2")
    cu_paths = cugraph.sssp(G, 0)
    max_val = np.finfo(cu_paths["distance"].dtype).max
    cu_paths = cu_paths[cu_paths["distance"] != max_val]
    nxM = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True)
    Gnx = nx.from_pandas_edgelist(
        nxM,
        source="0",
        target="1",
        edge_attr="weight",
        create_using=nx.MultiDiGraph(),
    )
    nx_paths = nx.single_source_dijkstra_path_length(Gnx, 0)

    cu_dist = cu_paths.sort_values(by='vertex')['distance'].to_numpy()
    nx_dist = [i[1] for i in sorted(nx_paths.items())]

    assert (cu_dist == nx_dist).all()

Example #15

0

Show file

File: test_filter_unreachable.py Project: zeta1999/cugraph

def test_filter_unreachable(managed, pool, graph_file, source):
    gc.collect()

    rmm.reinitialize(
        managed_memory=managed,
        pool_allocator=pool,
        initial_pool_size=2 << 27
    )

    assert(rmm.is_initialized())

    cu_M = utils.read_csv_file(graph_file)

    print('sources size = ' + str(len(cu_M)))
    print('destinations size = ' + str(len(cu_M)))

    # cugraph Pagerank Call
    G = cugraph.DiGraph()
    G.from_cudf_edgelist(cu_M, source='0', destination='1', edge_attr='2')

    print('cugraph Solving... ')
    t1 = time.time()

    df = cugraph.sssp(G, source)

    t2 = time.time() - t1
    print('Time : '+str(t2))

    reachable_df = cugraph.filter_unreachable(df)

    if(np.issubdtype(df['distance'].dtype, np.integer)):
        inf = np.iinfo(reachable_df['distance'].dtype).max  # noqa: F841
        assert len(reachable_df.query("distance == @inf")) == 0
    elif(np.issubdtype(df['distance'].dtype, np.inexact)):
        inf = np.finfo(reachable_df['distance'].dtype).max  # noqa: F841
        assert len(reachable_df.query("distance == @inf")) == 0

    assert len(reachable_df) != 0

Example #16

0

Show file

def getPDF(knnRelation, numBins, numSamples, numberOfNodes):
    us, vs, ds = map(cudf.Series, knnRelation)
    us, vs, ds = cugraph.structure.symmetrize(us, vs, ds)
    df = cudf.DataFrame({'source': us, 'destination': vs, 'weight': ds})

    G = cugraph.Graph()
    G.from_cudf_edgelist(df, edge_attr='weight')

    pdf = np.zeros(numBins)
    # for i in tqdm(range(min(numSamples, numberOfNodes))):
    for i in range(min(numSamples, numberOfNodes)):
        ssspResult: cudf.DataFrame = cugraph.sssp(G, i)
        distances: cudf.Series = ssspResult['distance']
        vertexIds: cudf.Series = ssspResult['vertex']
        # plt.scatter(range(len(distances)), sorted(distances))  # looks somewhat like an inverse sigmoid
        distances = distances[vertexIds > i]
        if i == 0:
            pdfMaxDist = 1.2 * distances.max()
        hist, _ = np.histogram(distances.tolist(),
                               bins=numBins,
                               range=(0, pdfMaxDist))
        pdf += hist
    return pdfMaxDist, pdf / pdf.sum()

Example #17

0

Show file

def sssp(G, start):
    return cugraph.sssp(G, source=start)

Example #18

0

Show file

import cugraph
import cudf
import json

M = cudf.read_csv('simple_test_sssp.csv', names=["src","dst",'value'], dtype=['int32', 'int32', 'float32'], header=None)
G = cugraph.Graph()
G.from_cudf_edgelist(M, source='src', destination='dst')
distances = cugraph.sssp(G, 0)
print(distances)
print(type(distances))
distances_json = distances.to_json()
json_object = json.loads(distances_json)

print(json_object)

vertex_distance_dic = json_object['distance']
vertex_dic = json_object['vertex']
print(vertex_distance_dic,type(vertex_distance_dic))
print(vertex_dic)