Python ssspの例、cugraph.sssp Pythonの例

コード例 #1

0

ファイルを表示

def cugraph_Call(M, source):

    # Device data
    sources = cudf.Series(M.row)
    destinations = cudf.Series(M.col)

    print('sources size = ' + str(len(sources)))
    print('destinations size = ' + str(len(destinations)))

    # cugraph Pagerank Call
    G = cugraph.Graph()
    G.add_edge_list(sources, destinations, None)

    print('cugraph Solving... ')
    t1 = time.time()

    dist = cugraph.sssp(G, source)

    t2 = time.time() - t1
    print('Time : ' + str(t2))

    distances = []
    for i, d in enumerate(dist['distance']):
        distances.append((i, d))

    return distances

コード例 #2

0

ファイルを表示

def cugraph_call(cu_M, source, edgevals=False):

    G = cugraph.DiGraph()
    if edgevals is True:
        G.from_cudf_edgelist(cu_M, source='0', destination='1', edge_attr='2')
    else:
        G.from_cudf_edgelist(cu_M, source='0', destination='1')
    print('sources size = ' + str(len(cu_M['0'])))
    print('destinations size = ' + str(len(cu_M['1'])))

    print('cugraph Solving... ')
    t1 = time.time()

    df = cugraph.sssp(G, source)

    t2 = time.time() - t1
    print('Cugraph Time : ' + str(t2))

    if (np.issubdtype(df['distance'].dtype, np.integer)):
        max_val = np.iinfo(df['distance'].dtype).max
    else:
        max_val = np.finfo(df['distance'].dtype).max

    verts_np = df['vertex'].to_array()
    dist_np = df['distance'].to_array()
    pred_np = df['predecessor'].to_array()
    result = dict(zip(verts_np, zip(dist_np, pred_np)))
    return result, max_val

コード例 #3

0

ファイルを表示

ファイル: routing_dsh.py プロジェクト: cjber/ahah

    def get_shortest_dists(self, poi: namedtuple) -> None:
        """
        Use `cugraph.sssp` to calculate shortest paths from POI to postcodes

        First subsets road graph, then finds shortest paths, ensuring all paths are
        routed that are known to be important to each POI. Saves to `hdf` to allow
        restarts.

        Parameters
        ----------
        poi : namedtuple
            Single POI created from `df.itertuples()`
        """
        if self.buffer:
            self.graph = self.create_sub_graph(poi=poi)

        shortest_paths: cudf.DataFrame = cugraph.filter_unreachable(
            cugraph.sssp(self.graph, source=poi.node_id))
        pc_dist = shortest_paths[shortest_paths.vertex.isin(self.postcode_ids)]

        self.idx += 1
        pc_dist["idx"] = self.idx

        if self.log_file.exists():
            self.distances = cudf.read_csv(self.log_file).append(pc_dist)
        else:
            self.distances = pc_dist[["vertex", "distance", "idx"]]

        self.distances = (self.distances.sort_values(
            "distance").drop_duplicates("vertex").reset_index()[[
                "vertex", "distance", "idx"
            ]])
        self.distances.to_csv(self.log_file, index=False)

コード例 #4

0

ファイルを表示

ファイル: test_sssp.py プロジェクト: stjordanis/cugraph

def cugraph_call(cu_M, source, edgevals=False):

    # Device data
    sources = cu_M['0']
    destinations = cu_M['1']
    if edgevals is False:
        values = None
    else:
        values = cu_M['2']

    print('sources size = ' + str(len(sources)))
    print('destinations size = ' + str(len(destinations)))

    # cugraph Pagerank Call
    G = cugraph.Graph()
    G.add_edge_list(sources, destinations, values)

    print('cugraph Solving... ')
    t1 = time.time()

    dist = cugraph.sssp(G, source)

    t2 = time.time() - t1
    print('Time : ' + str(t2))

    distances = []
    dist_np = dist['distance'].to_array()
    for i, d in enumerate(dist_np):
        distances.append((i, d))

    return distances

コード例 #5

0

ファイルを表示

def cugraph_call(cu_M, source, edgevals=False):

    # Device data
    sources = cu_M['0']
    destinations = cu_M['1']
    if edgevals is False:
        values = None
    else:
        values = cu_M['2']

    print('sources size = ' + str(len(sources)))
    print('destinations size = ' + str(len(destinations)))

    # cugraph Pagerank Call
    G = cugraph.Graph()
    G.add_edge_list(sources, destinations, values)

    print('cugraph Solving... ')
    t1 = time.time()

    df = cugraph.sssp(G, source)

    t2 = time.time() - t1
    print('Time : ' + str(t2))

    verts_np = df['vertex'].to_array()
    dist_np = df['distance'].to_array()
    pred_np = df['predecessor'].to_array()
    result = dict(zip(verts_np, zip(dist_np, pred_np)))
    return result

コード例 #6

0

ファイルを表示

def test_filter_unreachable(graph_file, source):
    gc.collect()

    cu_M = utils.read_csv_file(graph_file)

    print("sources size = " + str(len(cu_M)))
    print("destinations size = " + str(len(cu_M)))

    # cugraph Pagerank Call
    G = cugraph.DiGraph()
    G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2")

    print("cugraph Solving... ")
    t1 = time.time()

    df = cugraph.sssp(G, source)

    t2 = time.time() - t1
    print("Time : " + str(t2))

    reachable_df = cugraph.filter_unreachable(df)

    if np.issubdtype(df["distance"].dtype, np.integer):
        inf = np.iinfo(reachable_df["distance"].dtype).max  # noqa: F841
        assert len(reachable_df.query("distance == @inf")) == 0
    elif np.issubdtype(df["distance"].dtype, np.inexact):
        inf = np.finfo(reachable_df["distance"].dtype).max  # noqa: F841
        assert len(reachable_df.query("distance == @inf")) == 0

    assert len(reachable_df) != 0

コード例 #7

0

ファイルを表示

ファイル: test_sssp.py プロジェクト: hyperbolic2346/cugraph

def cugraph_call(cu_M, source, edgevals=False):

    G = cugraph.DiGraph()
    if edgevals is True:
        G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2")
    else:
        G.from_cudf_edgelist(cu_M, source="0", destination="1")
    print("sources size = " + str(len(cu_M["0"])))
    print("destinations size = " + str(len(cu_M["1"])))

    print("cugraph Solving... ")
    t1 = time.time()

    df = cugraph.sssp(G, source)

    t2 = time.time() - t1
    print("Cugraph Time : " + str(t2))

    if np.issubdtype(df["distance"].dtype, np.integer):
        max_val = np.iinfo(df["distance"].dtype).max
    else:
        max_val = np.finfo(df["distance"].dtype).max

    verts_np = df["vertex"].to_array()
    dist_np = df["distance"].to_array()
    pred_np = df["predecessor"].to_array()
    result = dict(zip(verts_np, zip(dist_np, pred_np)))
    return result, max_val

コード例 #8

0

ファイルを表示

def test_sssp_data_type_conversion(managed, pool, graph_file, source):
    gc.collect()

    rmm.reinitialize(managed_memory=managed,
                     pool_allocator=pool,
                     initial_pool_size=2 << 27)

    assert (rmm.is_initialized())

    M = utils.read_csv_for_nx(graph_file)
    cu_M = utils.read_csv_file(graph_file)

    # cugraph call with int32 weights
    cu_M['2'] = cu_M['2'].astype(np.int32)
    G = cugraph.DiGraph()
    G.from_cudf_edgelist(cu_M, source='0', destination='1', edge_attr='2')
    # assert cugraph weights is int32
    assert G.edgelist.edgelist_df['weights'].dtype == np.int32
    df = cugraph.sssp(G, source)
    max_val = np.finfo(df['distance'].dtype).max
    verts_np = df['vertex'].to_array()
    dist_np = df['distance'].to_array()
    pred_np = df['predecessor'].to_array()
    cu_paths = dict(zip(verts_np, zip(dist_np, pred_np)))

    # networkx call with int32 weights
    M['weight'] = M['weight'].astype(np.int32)
    Gnx = nx.from_pandas_edgelist(M,
                                  source='0',
                                  target='1',
                                  edge_attr='weight',
                                  create_using=nx.DiGraph())
    # assert nx weights is int
    assert type(list(Gnx.edges(data=True))[0][2]['weight']) is int
    nx_paths = nx.single_source_dijkstra_path_length(Gnx, source)

    # Calculating mismatch
    err = 0
    for vid in cu_paths:
        # Validate vertices that are reachable
        # NOTE : If distance type is float64 then cu_paths[vid][0]
        # should be compared against np.finfo(np.float64).max)
        if (cu_paths[vid][0] != max_val):
            if (cu_paths[vid][0] != nx_paths[vid]):
                err = err + 1
            # check pred dist + edge_weight = current dist
            if (vid != source):
                pred = cu_paths[vid][1]
                edge_weight = Gnx[pred][vid]['weight']
                if (cu_paths[pred][0] + edge_weight != cu_paths[vid][0]):
                    err = err + 1
        else:
            if (vid in nx_paths.keys()):
                err = err + 1

    assert err == 0

コード例 #9

0

ファイルを表示

ファイル: test_sssp.py プロジェクト: hyperbolic2346/cugraph

def test_sssp_data_type_conversion(graph_file, source):
    gc.collect()

    M = utils.read_csv_for_nx(graph_file)
    cu_M = utils.read_csv_file(graph_file)

    # cugraph call with int32 weights
    cu_M["2"] = cu_M["2"].astype(np.int32)
    G = cugraph.DiGraph()
    G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2")
    # assert cugraph weights is int32
    assert G.edgelist.edgelist_df["weights"].dtype == np.int32
    df = cugraph.sssp(G, source)
    max_val = np.finfo(df["distance"].dtype).max
    verts_np = df["vertex"].to_array()
    dist_np = df["distance"].to_array()
    pred_np = df["predecessor"].to_array()
    cu_paths = dict(zip(verts_np, zip(dist_np, pred_np)))

    # networkx call with int32 weights
    M["weight"] = M["weight"].astype(np.int32)
    Gnx = nx.from_pandas_edgelist(
        M,
        source="0",
        target="1",
        edge_attr="weight",
        create_using=nx.DiGraph(),
    )
    # assert nx weights is int
    assert type(list(Gnx.edges(data=True))[0][2]["weight"]) is int
    nx_paths = nx.single_source_dijkstra_path_length(Gnx, source)

    # Calculating mismatch
    err = 0
    for vid in cu_paths:
        # Validate vertices that are reachable
        # NOTE : If distance type is float64 then cu_paths[vid][0]
        # should be compared against np.finfo(np.float64).max)
        if cu_paths[vid][0] != max_val:
            if cu_paths[vid][0] != nx_paths[vid]:
                err = err + 1
            # check pred dist + edge_weight = current dist
            if vid != source:
                pred = cu_paths[vid][1]
                edge_weight = Gnx[pred][vid]["weight"]
                if cu_paths[pred][0] + edge_weight != cu_paths[vid][0]:
                    err = err + 1
        else:
            if vid in nx_paths.keys():
                err = err + 1

    assert err == 0

コード例 #10

0

ファイルを表示

ファイル: test_mg_sssp.py プロジェクト: rapidsai/cugraph

def test_dask_sssp(dask_client):
    gc.collect()

    input_data_path = (RAPIDS_DATASET_ROOT_DIR_PATH /
                       "netscience.csv").as_posix()
    print(f"dataset={input_data_path}")
    chunksize = dcg.get_chunksize(input_data_path)

    ddf = dask_cudf.read_csv(
        input_data_path,
        chunksize=chunksize,
        delimiter=" ",
        names=["src", "dst", "value"],
        dtype=["int32", "int32", "float32"],
    )

    df = cudf.read_csv(
        input_data_path,
        delimiter=" ",
        names=["src", "dst", "value"],
        dtype=["int32", "int32", "float32"],
    )

    g = cugraph.DiGraph()
    g.from_cudf_edgelist(df, "src", "dst", "value", renumber=True)

    dg = cugraph.DiGraph()
    dg.from_dask_cudf_edgelist(ddf, "src", "dst", "value")

    expected_dist = cugraph.sssp(g, 0)
    print(expected_dist)
    result_dist = dcg.sssp(dg, 0)
    result_dist = result_dist.compute()

    compare_dist = expected_dist.merge(
        result_dist, on="vertex", suffixes=["_local", "_dask"]
    )

    err = 0

    for i in range(len(compare_dist)):
        if (
            compare_dist["distance_local"].iloc[i]
            != compare_dist["distance_dask"].iloc[i]
        ):
            err = err + 1
    assert err == 0

コード例 #11

0

ファイルを表示

ファイル: test_mg_sssp.py プロジェクト: goncaloperes/cugraph

def test_dask_sssp(client_connection):
    gc.collect()

    # FIXME: update this to allow dataset to be parameterized and have dataset
    # part of test param id (see other tests)
    input_data_path = r"../datasets/netscience.csv"
    print(f"dataset={input_data_path}")
    chunksize = dcg.get_chunksize(input_data_path)

    ddf = dask_cudf.read_csv(
        input_data_path,
        chunksize=chunksize,
        delimiter=" ",
        names=["src", "dst", "value"],
        dtype=["int32", "int32", "float32"],
    )

    df = cudf.read_csv(
        input_data_path,
        delimiter=" ",
        names=["src", "dst", "value"],
        dtype=["int32", "int32", "float32"],
    )

    g = cugraph.DiGraph()
    g.from_cudf_edgelist(df, "src", "dst", "value", renumber=True)

    dg = cugraph.DiGraph()
    dg.from_dask_cudf_edgelist(ddf, "src", "dst", "value")

    expected_dist = cugraph.sssp(g, 0)
    print(expected_dist)
    result_dist = dcg.sssp(dg, 0)
    result_dist = result_dist.compute()

    compare_dist = expected_dist.merge(result_dist,
                                       on="vertex",
                                       suffixes=["_local", "_dask"])

    err = 0

    for i in range(len(compare_dist)):
        if (compare_dist["distance_local"].iloc[i] !=
                compare_dist["distance_dask"].iloc[i]):
            err = err + 1
    assert err == 0

コード例 #12

0

ファイルを表示

def test_get_traversed_cost(graph_file):
    cu_M = utils.read_csv_file(graph_file)

    noise = cudf.Series(np.random.randint(10, size=(cu_M.shape[0])))
    cu_M['info'] = cu_M['2'] + noise

    G = cugraph.Graph()
    G.from_cudf_edgelist(cu_M, source='0', destination='1', edge_attr='info')

    # run SSSP starting at vertex 17
    df = cugraph.sssp(G, 16)

    answer = cugraph.utilities.path_retrieval.get_traversed_cost(
        df, 16, cu_M['0'], cu_M['1'], cu_M['info'])

    df = df.sort_values(by='vertex').reset_index()
    answer = answer.sort_values(by='vertex').reset_index()

    assert df.shape[0] == answer.shape[0]
    assert np.allclose(df['distance'], answer['info'])

コード例 #13

0

ファイルを表示

ファイル: test_filter_unreachable.py プロジェクト: hieuqtran/cugraph

def test_filter_unreachable(managed, pool, graph_file, source):
    gc.collect()

    rmm.finalize()
    rmm_config.use_managed_memory = managed
    rmm_config.use_pool_allocator = pool
    rmm_config.initial_pool_size = 2 << 27
    rmm.initialize()

    assert(rmm.is_initialized())

    cu_M = utils.read_csv_file(graph_file)
    # Device data
    sources = cu_M['0']
    destinations = cu_M['1']

    print('sources size = ' + str(len(sources)))
    print('destinations size = ' + str(len(destinations)))

    # cugraph Pagerank Call
    G = cugraph.Graph()
    G.add_edge_list(sources, destinations)

    print('cugraph Solving... ')
    t1 = time.time()

    df = cugraph.sssp(G, source)

    t2 = time.time() - t1
    print('Time : '+str(t2))

    reachable_df = cugraph.filter_unreachable(df)

    if(np.issubdtype(df['distance'].dtype, np.integer)):
        inf = np.iinfo(reachable_df['distance'].dtype).max  # noqa: F841
        assert len(reachable_df.query("distance == @inf")) == 0
    elif(np.issubdtype(df['distance'].dtype, np.inexact)):
        inf = np.finfo(reachable_df['distance'].dtype).max  # noqa: F841
        assert len(reachable_df.query("distance == @inf")) == 0

    assert len(reachable_df) != 0

コード例 #14

0

ファイルを表示

def test_multigraph_sssp(graph_file):
    # FIXME: Migrate to new test fixtures for Graph setup once available
    cuM = utils.read_csv_file(graph_file)
    G = cugraph.MultiDiGraph()
    G.from_cudf_edgelist(cuM, source="0", destination="1", edge_attr="2")
    cu_paths = cugraph.sssp(G, 0)
    max_val = np.finfo(cu_paths["distance"].dtype).max
    cu_paths = cu_paths[cu_paths["distance"] != max_val]
    nxM = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True)
    Gnx = nx.from_pandas_edgelist(
        nxM,
        source="0",
        target="1",
        edge_attr="weight",
        create_using=nx.MultiDiGraph(),
    )
    nx_paths = nx.single_source_dijkstra_path_length(Gnx, 0)

    cu_dist = cu_paths.sort_values(by='vertex')['distance'].to_numpy()
    nx_dist = [i[1] for i in sorted(nx_paths.items())]

    assert (cu_dist == nx_dist).all()

コード例 #15

0

ファイルを表示

ファイル: test_filter_unreachable.py プロジェクト: zeta1999/cugraph

def test_filter_unreachable(managed, pool, graph_file, source):
    gc.collect()

    rmm.reinitialize(
        managed_memory=managed,
        pool_allocator=pool,
        initial_pool_size=2 << 27
    )

    assert(rmm.is_initialized())

    cu_M = utils.read_csv_file(graph_file)

    print('sources size = ' + str(len(cu_M)))
    print('destinations size = ' + str(len(cu_M)))

    # cugraph Pagerank Call
    G = cugraph.DiGraph()
    G.from_cudf_edgelist(cu_M, source='0', destination='1', edge_attr='2')

    print('cugraph Solving... ')
    t1 = time.time()

    df = cugraph.sssp(G, source)

    t2 = time.time() - t1
    print('Time : '+str(t2))

    reachable_df = cugraph.filter_unreachable(df)

    if(np.issubdtype(df['distance'].dtype, np.integer)):
        inf = np.iinfo(reachable_df['distance'].dtype).max  # noqa: F841
        assert len(reachable_df.query("distance == @inf")) == 0
    elif(np.issubdtype(df['distance'].dtype, np.inexact)):
        inf = np.finfo(reachable_df['distance'].dtype).max  # noqa: F841
        assert len(reachable_df.query("distance == @inf")) == 0

    assert len(reachable_df) != 0

コード例 #16

0

ファイルを表示

def getPDF(knnRelation, numBins, numSamples, numberOfNodes):
    us, vs, ds = map(cudf.Series, knnRelation)
    us, vs, ds = cugraph.structure.symmetrize(us, vs, ds)
    df = cudf.DataFrame({'source': us, 'destination': vs, 'weight': ds})

    G = cugraph.Graph()
    G.from_cudf_edgelist(df, edge_attr='weight')

    pdf = np.zeros(numBins)
    # for i in tqdm(range(min(numSamples, numberOfNodes))):
    for i in range(min(numSamples, numberOfNodes)):
        ssspResult: cudf.DataFrame = cugraph.sssp(G, i)
        distances: cudf.Series = ssspResult['distance']
        vertexIds: cudf.Series = ssspResult['vertex']
        # plt.scatter(range(len(distances)), sorted(distances))  # looks somewhat like an inverse sigmoid
        distances = distances[vertexIds > i]
        if i == 0:
            pdfMaxDist = 1.2 * distances.max()
        hist, _ = np.histogram(distances.tolist(),
                               bins=numBins,
                               range=(0, pdfMaxDist))
        pdf += hist
    return pdfMaxDist, pdf / pdf.sum()

コード例 #17

0

ファイルを表示

def sssp(G, start):
    return cugraph.sssp(G, source=start)

コード例 #18

0

ファイルを表示

import cugraph
import cudf
import json

M = cudf.read_csv('simple_test_sssp.csv', names=["src","dst",'value'], dtype=['int32', 'int32', 'float32'], header=None)
G = cugraph.Graph()
G.from_cudf_edgelist(M, source='src', destination='dst')
distances = cugraph.sssp(G, 0)
print(distances)
print(type(distances))
distances_json = distances.to_json()
json_object = json.loads(distances_json)

print(json_object)

vertex_distance_dic = json_object['distance']
vertex_dic = json_object['vertex']
print(vertex_distance_dic,type(vertex_distance_dic))
print(vertex_dic)