Beispiel #1
0
def test_renumber_ips():

    source_list = ['192.168.1.1',
                   '172.217.5.238',
                   '216.228.121.209',
                   '192.16.31.23']
    dest_list = ['172.217.5.238',
                 '216.228.121.209',
                 '192.16.31.23',
                 '192.168.1.1']

    pdf = pd.DataFrame({
            'source_list': source_list,
            'dest_list': dest_list
            })

    gdf = cudf.from_pandas(pdf)

    gdf['source_as_int'] = gdf['source_list'].str.ip2int()
    gdf['dest_as_int'] = gdf['dest_list'].str.ip2int()

    src, dst, numbering = cugraph.renumber(gdf['source_as_int'],
                                           gdf['dest_as_int'])

    for i in range(len(gdf)):
        assert numbering[src[i]] == gdf['source_as_int'][i]
        assert numbering[dst[i]] == gdf['dest_as_int'][i]
Beispiel #2
0
def test_renumber_files(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(
        managed_memory=managed,
        pool_allocator=pool,
        initial_pool_size=2 << 27
    )

    assert(rmm.is_initialized())

    M = utils.read_csv_for_nx(graph_file)
    sources = cudf.Series(M['0'])
    destinations = cudf.Series(M['1'])

    translate = 1000

    source_translated = cudf.Series([x + translate for x in sources])
    dest_translated = cudf.Series([x + translate for x in destinations])

    src, dst, numbering = cugraph.renumber(source_translated, dest_translated)

    for i in range(len(sources)):
        assert sources[i] == (numbering[src[i]] - translate)
        assert destinations[i] == (numbering[dst[i]] - translate)
Beispiel #3
0
def test_renumber_negative():
    source_list = [4, 6, 8, -20, 1]
    dest_list = [1, 29, 35, 0, 77]

    df = pd.DataFrame({
        'source_list': source_list,
        'dest_list': dest_list,
    })

    gdf = cudf.DataFrame.from_pandas(df[['source_list', 'dest_list']])

    src, dst, numbering = cugraph.renumber(gdf['source_list'],
                                           gdf['dest_list'])

    for i in range(len(source_list)):
        assert source_list[i] == numbering[src[i]]
        assert dest_list[i] == numbering[dst[i]]
Beispiel #4
0
def test_renumber_files(graph_file):
    gc.collect()

    M = utils.read_csv_for_nx(graph_file)
    sources = cudf.Series(M['0'])
    destinations = cudf.Series(M['1'])

    translate = 1000

    source_translated = cudf.Series([x + translate for x in sources])
    dest_translated = cudf.Series([x + translate for x in destinations])

    src, dst, numbering = cugraph.renumber(source_translated, dest_translated)

    for i in range(len(sources)):
        assert sources[i] == (numbering[src[i]] - translate)
        assert destinations[i] == (numbering[dst[i]] - translate)
Beispiel #5
0
def load_cugraph(lang,
                 year,
                 usecols=[0, 2],
                 dtype=["int32", "str", "int32", "str"]):
    import cugraph
    import cudf
    G = cugraph.DiGraph()

    file_name = generate_file_name(lang, year)

    gdf = cudf.read_csv(file_name, usecols=usecols, dtype=dtype, sep="\t")

    sources = cudf.Series(gdf["page_id_from"])
    destinations = cudf.Series(gdf["page_id_to"])
    source_col, dest_col, renumbering_map = cugraph.renumber(
        sources, destinations)

    edge_df = cudf.DataFrame()
    edge_df.add_column("src", source_col)
    edge_df.add_column("dst", dest_col)

    G.from_cudf_edgelist(edge_df, source="src", target="dst")

    return G, renumbering_map
Beispiel #6
0
def test_renumber_files(managed, pool, graph_file):
    gc.collect()

    rmm.finalize()
    rmm_cfg.use_managed_memory = managed
    rmm_cfg.use_pool_allocator = pool
    rmm.initialize()

    assert (rmm.is_initialized())

    M = utils.read_mtx_file(graph_file)
    sources = cudf.Series(M.row)
    destinations = cudf.Series(M.col)

    translate = 1000

    source_translated = cudf.Series([x + translate for x in sources])
    dest_translated = cudf.Series([x + translate for x in destinations])

    src, dst, numbering = cugraph.renumber(source_translated, dest_translated)

    for i in range(len(sources)):
        assert sources[i] == (numbering[src[i]] - translate)
        assert destinations[i] == (numbering[dst[i]] - translate)