Exemple #1
0
def test_add_edge_data_prop_columns(df_type):
    """
    add_edge_data() on "transactions" table, subset of properties.
    """
    from cugraph.experimental import PropertyGraph

    transactions = dataset1["transactions"]
    transactions_df = df_type(columns=transactions[0], data=transactions[1])
    expected_props = ["card_num", "card_type"]

    pG = PropertyGraph()
    pG.add_edge_data(transactions_df,
                     type_name="transactions",
                     vertex_id_columns=("user_id", "merchant_id"),
                     property_columns=expected_props)

    assert pG.num_vertices == 7
    assert pG.num_edges == 4
    assert sorted(pG.edge_property_names) == sorted(expected_props)
Exemple #2
0
def test_add_vertex_data_prop_columns(df_type):
    """
    add_vertex_data() on "merchants" table, subset of properties.
    """
    from cugraph.experimental import PropertyGraph

    merchants = dataset1["merchants"]
    merchants_df = df_type(columns=merchants[0], data=merchants[1])
    expected_props = ["merchant_name", "merchant_sales", "merchant_location"]

    pG = PropertyGraph()
    pG.add_vertex_data(merchants_df,
                       type_name="merchants",
                       vertex_id_column="merchant_id",
                       property_columns=expected_props)

    assert pG.num_vertices == 5
    assert pG.num_edges == 0
    assert sorted(pG.vertex_property_names) == sorted(expected_props)
Exemple #3
0
def test_add_vertex_data(df_type):
    """
    add_vertex_data() on "merchants" table, all properties.
    """
    from cugraph.experimental import PropertyGraph

    merchants = dataset1["merchants"]
    merchants_df = df_type(columns=merchants[0], data=merchants[1])

    pG = PropertyGraph()
    pG.add_vertex_data(merchants_df,
                       type_name="merchants",
                       vertex_id_column="merchant_id",
                       property_columns=None)

    assert pG.num_vertices == 5
    assert pG.num_edges == 0
    expected_props = merchants[0].copy()
    assert sorted(pG.vertex_property_names) == sorted(expected_props)
Exemple #4
0
def test_null_data(df_type):
    """
    test for null data
    """
    from cugraph.experimental import PropertyGraph

    pG = PropertyGraph()

    assert pG.num_vertices == 0
    assert pG.num_edges == 0
    assert sorted(pG.vertex_property_names) == sorted([])
Exemple #5
0
def read_reddit(raw_path, self_loop=False):
    coo_adj = sp.load_npz(os.path.join(raw_path, "reddit_graph.npz"))
    edgelist = cudf.DataFrame()
    edgelist['src'] = cudf.Series(coo_adj.row)
    edgelist['dst'] = cudf.Series(coo_adj.col)
    edgelist['wt'] = cudf.Series(coo_adj.data)

    # features and labels
    reddit_data = np.load(os.path.join(raw_path, "reddit_data.npz"))
    features = reddit_data["feature"]
    cu_features = cudf.DataFrame(features)
    cu_features['name'] = np.arange(cu_features.shape[0])
    labels = reddit_data["label"]
    # tarin/val/test indices
    node_types = reddit_data["node_types"]
    train_mask = (node_types == 1)
    val_mask = (node_types == 2)
    test_mask = (node_types == 3)
    # add features to nodes and edges
    pg = PropertyGraph()

    pg.add_edge_data(edgelist, vertex_col_names=("src", "dst"))

    pg.add_vertex_data(cu_features, vertex_col_name="name")
    pg._vertex_prop_dataframe.drop(columns=['name'], inplace=True)

    gstore = CuGraphStorage(pg)

    return gstore, labels, train_mask, val_mask, test_mask
Exemple #6
0
def read_cora(graph_path, feat_path, self_loop=False):
    cora_M = cudf.read_csv(graph_path, sep='\t', header=None)
    cora_content = cudf.read_csv(feat_path, sep='\t', header=None)
    # the last column is true label
    labels = cora_content['1434']
    cora_content.drop(columns='1434', inplace=True)
    # add weight into graph
    cora_M['weight'] = 1.0

    # add features to nodes and edges
    pg = PropertyGraph()

    pg.add_edge_data(cora_M, vertex_col_names=("0", "1"))
    pg.add_vertex_data(cora_content, vertex_col_name="0")

    pg._vertex_prop_dataframe.drop(columns=['0'], inplace=True)
    pg._edge_prop_dataframe.drop(columns=['0', '1'], inplace=True)

    gstore = CuGraphStorage(pg)

    # define train, test and val splits
    indices = np.arange(len(labels))
    random.shuffle(indices)
    idx_train, idx_val, idx_test = np.split(indices, [1000, 1500])

    return gstore, labels, idx_train, idx_val, idx_test
Exemple #7
0
def test_extract_subgraph_graph_without_vert_props():
    """
    Ensure a subgraph can be extracted from a PropertyGraph that does not have
    vertex properties.
    """
    from cugraph.experimental import PropertyGraph

    transactions = dataset1["transactions"]
    relationships = dataset1["relationships"]

    pG = PropertyGraph()

    pG.add_edge_data(cudf.DataFrame(columns=transactions[0],
                                    data=transactions[1]),
                     type_name="transactions",
                     vertex_id_columns=("user_id", "merchant_id"),
                     property_columns=None)
    pG.add_edge_data(cudf.DataFrame(columns=relationships[0],
                                    data=relationships[1]),
                     type_name="relationships",
                     vertex_id_columns=("user_id_1", "user_id_2"),
                     property_columns=None)

    G = pG.extract_subgraph(selection=pG.select_edges("_SRC_ == 89216"),
                            create_using=DiGraph_inst,
                            edge_weight_property="relationship_type",
                            default_edge_weight=0)

    expected_edgelist = cudf.DataFrame({
        "src": [89216, 89216, 89216],
        "dst": [4, 89021, 32431],
        "weights": [0, 9, 9]
    })
    actual_edgelist = G.unrenumber(G.edgelist.edgelist_df,
                                   "src",
                                   preserve_order=True)
    actual_edgelist = G.unrenumber(actual_edgelist, "dst", preserve_order=True)

    assert G.is_directed()
    assert_frame_equal(expected_edgelist, actual_edgelist, check_like=True)
def create_pg():
    """
    Fixture which returns an instance of a PropertyGraph with vertex and edge
    data added from dataset1, parameterized for different DataFrame types.
    """
    dataframe_type = cudf.DataFrame

    (merchants, users, taxpayers, transactions, relationships,
     referrals) = dataset1.values()

    pG = PropertyGraph()

    # Vertex and edge data is added as one or more DataFrames; either a Pandas
    # DataFrame to keep data on the CPU, a cuDF DataFrame to keep data on GPU,
    # or a dask_cudf DataFrame to keep data on distributed GPUs.

    # For dataset1: vertices are merchants and users, edges are transactions,
    # relationships, and referrals.

    # property_columns=None (the default) means all columns except
    # vertex_col_name will be used as properties for the vertices/edges.

    pG.add_vertex_data(dataframe_type(columns=merchants[0], data=merchants[1]),
                       type_name="merchants",
                       vertex_col_name="merchant_id",
                       property_columns=None)
    pG.add_vertex_data(dataframe_type(columns=users[0], data=users[1]),
                       type_name="users",
                       vertex_col_name="user_id",
                       property_columns=None)
    pG.add_vertex_data(dataframe_type(columns=taxpayers[0], data=taxpayers[1]),
                       type_name="taxpayers",
                       vertex_col_name="payer_id",
                       property_columns=None)

    pG.add_edge_data(dataframe_type(columns=transactions[0],
                                    data=transactions[1]),
                     type_name="transactions",
                     vertex_col_names=("user_id", "merchant_id"),
                     property_columns=None)
    pG.add_edge_data(dataframe_type(columns=relationships[0],
                                    data=relationships[1]),
                     type_name="relationships",
                     vertex_col_names=("user_id_1", "user_id_2"),
                     property_columns=None)
    pG.add_edge_data(dataframe_type(columns=referrals[0], data=referrals[1]),
                     type_name="referrals",
                     vertex_col_names=("user_id_1", "user_id_2"),
                     property_columns=None)

    return pG
Exemple #9
0
def test_different_vertex_edge_input_dataframe_types():
    """
    Ensures that a PropertyGraph initialized with one DataFrame type cannot be
    extended with another.
    """
    df = cudf.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
    pdf = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})

    from cugraph.experimental import PropertyGraph

    pG = PropertyGraph()
    pG.add_vertex_data(df, type_name="foo", vertex_id_column="a")
    with pytest.raises(TypeError):
        pG.add_edge_data(pdf, type_name="bar", vertex_id_columns=("a", "b"))

    pG = PropertyGraph()
    pG.add_vertex_data(pdf, type_name="foo", vertex_id_column="a")
    with pytest.raises(TypeError):
        pG.add_edge_data(df, type_name="bar", vertex_id_columns=("a", "b"))

    # Different order
    pG = PropertyGraph()
    pG.add_edge_data(df, type_name="bar", vertex_id_columns=("a", "b"))
    with pytest.raises(TypeError):
        pG.add_vertex_data(pdf, type_name="foo", vertex_id_column="a")

    # Same API call, different types
    pG = PropertyGraph()
    pG.add_vertex_data(df, type_name="foo", vertex_id_column="a")
    with pytest.raises(TypeError):
        pG.add_vertex_data(pdf, type_name="foo", vertex_id_column="a")

    pG = PropertyGraph()
    pG.add_edge_data(df, type_name="bar", vertex_id_columns=("a", "b"))
    with pytest.raises(TypeError):
        pG.add_edge_data(pdf, type_name="bar", vertex_id_columns=("a", "b"))
Exemple #10
0
def test_add_edge_data_bad_args():
    """
    add_edge_data() with various bad args, checks that proper exceptions are
    raised.
    """
    from cugraph.experimental import PropertyGraph

    transactions = dataset1["transactions"]
    transactions_df = cudf.DataFrame(columns=transactions[0],
                                     data=transactions[1])

    pG = PropertyGraph()
    with pytest.raises(TypeError):
        pG.add_edge_data(42,
                         type_name="transactions",
                         vertex_id_columns=("user_id", "merchant_id"),
                         property_columns=None)
    with pytest.raises(TypeError):
        pG.add_edge_data(transactions_df,
                         type_name=42,
                         vertex_id_columns=("user_id", "merchant_id"),
                         property_columns=None)
    with pytest.raises(ValueError):
        pG.add_edge_data(transactions_df,
                         type_name="transactions",
                         vertex_id_columns=("user_id", "bad_column"),
                         property_columns=None)
    with pytest.raises(ValueError):
        pG.add_edge_data(transactions_df,
                         type_name="transactions",
                         vertex_id_columns=("user_id", "merchant_id"),
                         property_columns=["bad_column_name", "time"])
    with pytest.raises(TypeError):
        pG.add_edge_data(transactions_df,
                         type_name="transactions",
                         vertex_id_columns=("user_id", "merchant_id"),
                         property_columns="time")
Exemple #11
0
def test_add_vertex_data_bad_args():
    """
    add_vertex_data() with various bad args, checks that proper exceptions are
    raised.
    """
    from cugraph.experimental import PropertyGraph

    merchants = dataset1["merchants"]
    merchants_df = cudf.DataFrame(columns=merchants[0], data=merchants[1])

    pG = PropertyGraph()
    with pytest.raises(TypeError):
        pG.add_vertex_data(42,
                           type_name="merchants",
                           vertex_id_column="merchant_id",
                           property_columns=None)
    with pytest.raises(TypeError):
        pG.add_vertex_data(merchants_df,
                           type_name=42,
                           vertex_id_column="merchant_id",
                           property_columns=None)
    with pytest.raises(ValueError):
        pG.add_vertex_data(merchants_df,
                           type_name="merchants",
                           vertex_id_column="bad_column_name",
                           property_columns=None)
    with pytest.raises(ValueError):
        pG.add_vertex_data(
            merchants_df,
            type_name="merchants",
            vertex_id_column="merchant_id",
            property_columns=["bad_column_name", "merchant_name"])
    with pytest.raises(TypeError):
        pG.add_vertex_data(merchants_df,
                           type_name="merchants",
                           vertex_id_column="merchant_id",
                           property_columns="merchant_name")
Exemple #12
0
def property_graph_instance(request):
    """
    FIXME: fill this in
    """
    dataframe_type = request.param[0]
    from cugraph.experimental import PropertyGraph

    (merchants, users, taxpayers, transactions, relationships,
     referrals) = dataset1.values()

    pG = PropertyGraph()

    # Vertex and edge data is added as one or more DataFrames; either a Pandas
    # DataFrame to keep data on the CPU, a cuDF DataFrame to keep data on GPU,
    # or a dask_cudf DataFrame to keep data on distributed GPUs.

    # For dataset1: vertices are merchants and users, edges are transactions,
    # relationships, and referrals.

    # property_columns=None (the default) means all columns except
    # vertex_id_column will be used as properties for the vertices/edges.

    pG.add_vertex_data(dataframe_type(columns=merchants[0], data=merchants[1]),
                       type_name="merchants",
                       vertex_id_column="merchant_id",
                       property_columns=None)
    pG.add_vertex_data(dataframe_type(columns=users[0], data=users[1]),
                       type_name="users",
                       vertex_id_column="user_id",
                       property_columns=None)
    pG.add_vertex_data(dataframe_type(columns=taxpayers[0], data=taxpayers[1]),
                       type_name="taxpayers",
                       vertex_id_column="payer_id",
                       property_columns=None)

    pG.add_edge_data(dataframe_type(columns=transactions[0],
                                    data=transactions[1]),
                     type_name="transactions",
                     vertex_id_columns=("user_id", "merchant_id"),
                     property_columns=None)
    pG.add_edge_data(dataframe_type(columns=relationships[0],
                                    data=relationships[1]),
                     type_name="relationships",
                     vertex_id_columns=("user_id_1", "user_id_2"),
                     property_columns=None)
    pG.add_edge_data(dataframe_type(columns=referrals[0], data=referrals[1]),
                     type_name="referrals",
                     vertex_id_columns=("user_id_1", "user_id_2"),
                     property_columns=None)

    return pG