def test_hyperedges_direct_manual_shaping(): h1 = cugraph.hypergraph( hyper_df, direct=True, EDGES={"aa": ["cc"], "cc": ["cc"]}, ) assert len(h1["edges"]) == 6 h2 = cugraph.hypergraph( hyper_df, direct=True, EDGES={"aa": ["cc", "bb", "aa"], "cc": ["cc"]}, ) assert len(h2["edges"]) == 12
def test_hyper_to_pa_all_direct(): hg = cugraph.hypergraph(simple_df, ["id", "a1", "🙈"], direct=True) nodes_arr = hg["graph"].nodes().to_arrow() assert len(hg["graph"].nodes()) == 9 assert len(nodes_arr) == 9 edges_err = hg["graph"].edges().to_arrow() assert len(hg["graph"].edges()) == 9 assert len(edges_err) == 9
def test_skip_na_hyperedge(): nans_df = cudf.DataFrame.from_pandas( pd.DataFrame({ "x": ["a", "b", "c"], "y": ["aa", None, "cc"] })) expected_hits = ["a", "b", "c", "aa", "cc"] skip_attr_h_edges = cugraph.hypergraph(nans_df, drop_edge_attrs=True)["edges"] assert_eq(len(skip_attr_h_edges), len(expected_hits)) default_h_edges = cugraph.hypergraph(nans_df)["edges"] assert_eq(len(default_h_edges), len(expected_hits))
def test_complex_df(): complex_df = pd.DataFrame({ "src": [0, 1, 2, 3], "dst": [1, 2, 3, 0], "colors": [1, 1, 2, 2], "bool": [True, False, True, True], "char": ["a", "b", "c", "d"], "str": ["a", "b", "c", "d"], "ustr": [u"a", u"b", u"c", u"d"], "emoji": ["😋", "😋😋", "😋", "😋"], "int": [0, 1, 2, 3], "num": [0.5, 1.5, 2.5, 3.5], "date_str": [ "2018-01-01 00:00:00", "2018-01-02 00:00:00", "2018-01-03 00:00:00", "2018-01-05 00:00:00", ], "date": [ dt.datetime(2018, 1, 1), dt.datetime(2018, 1, 1), dt.datetime(2018, 1, 1), dt.datetime(2018, 1, 1), ], "time": [ pd.Timestamp("2018-01-05"), pd.Timestamp("2018-01-05"), pd.Timestamp("2018-01-05"), pd.Timestamp("2018-01-05"), ], }) for c in complex_df.columns: try: complex_df[c + "_cat"] = complex_df[c].astype("category") except Exception: # lists aren't categorical # print('could not make categorical', c) pass complex_df = cudf.DataFrame.from_pandas(complex_df) cugraph.hypergraph(complex_df)
def test_drop_na_direct(): df = cudf.DataFrame.from_pandas( pd.DataFrame({"a": ["a", None, "a"], "i": [1, 1, None]}) ) hg = cugraph.hypergraph(df, dropna=True, direct=True) assert len(hg["graph"].nodes()) == 2 assert len(hg["graph"].edges()) == 1
def test_drop_na_hyper(): df = cudf.DataFrame.from_pandas( pd.DataFrame({"a": ["a", None, "c"], "i": [1, 2, None]}) ) hg = cugraph.hypergraph(df, dropna=True) assert len(hg["graph"].nodes()) == 7 assert len(hg["graph"].edges()) == 4
def test_hyper_to_pa_mixed(): df = cudf.DataFrame.from_pandas(pd.DataFrame({ "x": ["a", "b", "c"], "y": [1, 2, 3] })) hg = cugraph.hypergraph(df) nodes_arr = hg["graph"].nodes().to_arrow() assert len(nodes_arr) == 9 edges_err = hg["graph"].edges().to_arrow() assert len(edges_err) == 6
def test_skip_drop_na_direct(): df = cudf.DataFrame.from_pandas(pd.DataFrame({ "a": ["a", None, "b"], "b": ["a", "b", "c"], "c": [1, 2, 3] })) hg = cugraph.hypergraph(df, SKIP=["c"], dropna=True, direct=True) assert len(hg["graph"].nodes()) == 4 assert len(hg["graph"].edges()) == 2
def test_skip_hyper(): df = cudf.DataFrame.from_pandas(pd.DataFrame({ "a": ["a", None, "b"], "b": ["a", "b", "c"], "c": [1, 2, 3] })) hg = cugraph.hypergraph(df, SKIP=["c"], dropna=False) assert len(hg["graph"].nodes()) == 9 assert len(hg["graph"].edges()) == 6
def test_hyperedges_direct_categories(): h = cugraph.hypergraph( hyper_df, direct=True, categories={ "aa": "N", "bb": "N", "cc": "N", }, ) assert_eq(len(h["edges"]), 9) assert_eq(len(h["nodes"]), 6)
def test_hyper_to_pa_na(): df = cudf.DataFrame.from_pandas(pd.DataFrame({ "x": ["a", None, "c"], "y": [1, 2, None] })) hg = cugraph.hypergraph(df, dropna=False) print(hg["graph"].nodes()) nodes_arr = hg["graph"].nodes().to_arrow() assert len(hg["graph"].nodes()) == 9 assert len(nodes_arr) == 9 edges_err = hg["graph"].edges().to_arrow() assert len(hg["graph"].edges()) == 6 assert len(edges_err) == 6
def test_drop_edge_attrs(categorical_metadata): h = cugraph.hypergraph(simple_df, columns=["id", "a1", "🙈"], drop_edge_attrs=True, categorical_metadata=categorical_metadata) assert len(h.keys()) == len( ["entities", "nodes", "edges", "events", "graph"]) edges = cudf.DataFrame.from_pandas(pd.DataFrame({ "event_id": [ "event_id::0", "event_id::1", "event_id::2", "event_id::0", "event_id::1", "event_id::2", "event_id::0", "event_id::1", "event_id::2", ], "edge_type": [ "a1", "a1", "a1", "id", "id", "id", "🙈", "🙈", "🙈" ], "attrib_id": [ "a1::1", "a1::2", "a1::3", "id::a", "id::b", "id::c", "🙈::æski ēˈmōjē", "🙈::😋", "🙈::s", ], })) if categorical_metadata: edges = edges.astype({"edge_type": "category"}) assert_frame_equal(edges, h["edges"], check_dtype=False) for (k, v) in [ ("entities", 9), ("nodes", 12), ("edges", 9), ("events", 3) ]: assert len(h[k]) == v
def make_and_shape_hypergraph(df, **kwargs): hyper = cugraph.hypergraph(df, **kwargs) del hyper["events"] del hyper["entities"] SOURCE = kwargs.get("SOURCE", "src") TARGET = kwargs.get("TARGET", "dst") NODEID = kwargs.get("NODEID", "node_id") EVENTID = kwargs.get("EVENTID", "event_id") CATEGORY = kwargs.get("CATEGORY", "category") nodes = hyper["nodes"][[NODEID, CATEGORY]] edges = hyper["edges"][[SOURCE, TARGET]] # Create graph graph, nodes, edges = from_cudf_edgelist(edges, SOURCE, TARGET) nodes["name"] = nodes["node"] # Add vis components nodes = annotate_nodes(graph, nodes, edges) edges = annotate_edges(graph, nodes, edges) return graph, nodes, edges
def test_drop_edge_attrs_direct(categorical_metadata): h = cugraph.hypergraph( simple_df, ["id", "a1", "🙈"], direct=True, drop_edge_attrs=True, EDGES={ "id": ["a1"], "a1": ["🙈"] }, categorical_metadata=categorical_metadata, ) assert_eq(len(h.keys()), len(["entities", "nodes", "edges", "events", "graph"])) edges = cudf.DataFrame.from_pandas( pd.DataFrame({ "event_id": [ "event_id::0", "event_id::1", "event_id::2", "event_id::0", "event_id::1", "event_id::2", ], "edge_type": ["a1::🙈", "a1::🙈", "a1::🙈", "id::a1", "id::a1", "id::a1"], "src": ["a1::1", "a1::2", "a1::3", "id::a", "id::b", "id::c"], "dst": ["🙈::æski ēˈmōjē", "🙈::😋", "🙈::s", "a1::1", "a1::2", "a1::3"], })) if categorical_metadata: edges = edges.astype({"edge_type": "category"}) assert_eq(edges, h["edges"]) for (k, v) in [("entities", 9), ("nodes", 9), ("edges", 6), ("events", 0)]: assert_eq(len(h[k]), v)
def test_hyperedges_direct(): h = cugraph.hypergraph(hyper_df, direct=True) assert_eq(len(h["edges"]), 9) assert_eq(len(h["nodes"]), 9)
def test_hyperedges(categorical_metadata): h = cugraph.hypergraph(simple_df, categorical_metadata=categorical_metadata) assert_eq(len(h.keys()), len(["entities", "nodes", "edges", "events", "graph"])) edges = pd.DataFrame({ "event_id": [ "event_id::0", "event_id::1", "event_id::2", "event_id::0", "event_id::1", "event_id::2", "event_id::0", "event_id::1", "event_id::2", "event_id::0", "event_id::1", "event_id::2", ], "edge_type": [ "a1", "a1", "a1", "a2", "a2", "a2", "id", "id", "id", "🙈", "🙈", "🙈", ], "attrib_id": [ "a1::1", "a1::2", "a1::3", "a2::red", "a2::blue", "a2::green", "id::a", "id::b", "id::c", "🙈::æski ēˈmōjē", "🙈::😋", "🙈::s", ], "id": ["a", "b", "c"] * 4, "a1": [1, 2, 3] * 4, "a2": ["red", "blue", "green"] * 4, "🙈": ["æski ēˈmōjē", "😋", "s"] * 4, }) if categorical_metadata: edges = edges.astype({"edge_type": "category"}) assert_eq(edges, h["edges"]) for (k, v) in [("entities", 12), ("nodes", 15), ("edges", 12), ("events", 3)]: assert_eq(len(h[k]), v)