Esempio n. 1
0
def test_drop_singletons_slow():
    rs = np.random.RandomState(0)
    c1 = rs.randint(0, 10000, (40000, 1))
    c2 = rs.randint(0, 20000, (40000, 1))
    cats = np.concatenate([c1, c2], 1)
    retain = in_2core_graph_slow(cats)
    nonsingletons = cats[retain]
    for col in (c1, c2):
        uniq, counts = np.unique(col, return_counts=True)
        assert not np.any(np.isin(col[retain], uniq[counts == 1]))

    idx = np.arange(40000)

    cols = {"c1": c1.copy(), "c2": c2.copy()}
    for i in range(40000):
        last = cols["c1"].shape[0]
        for col in cols:
            keep = in_2core_graph_slow(cols[col])
            for col2 in cols:
                cols[col2] = cols[col2][keep]
            idx = idx[keep]
        if cols["c1"].shape[0] == last:
            break

    expected = np.concatenate([c1[idx], c2[idx]], 1)
    assert_array_equal(nonsingletons, expected)
    expected = np.concatenate([cols["c1"], cols["c2"]], 1)
    assert_array_equal(nonsingletons, expected)

    dummies, _ = dummy_matrix(cats, output_format="csr", precondition=False)
    to_drop = dummies[~retain]
    assert to_drop.sum() == 2 * (~retain).sum()
Esempio n. 2
0
def test_drop_singletons():
    rs = np.random.RandomState(0)
    c1 = rs.randint(0, 10000, (40000, 1))
    c2 = rs.randint(0, 20000, (40000, 1))
    cats = np.concatenate([c1, c2], 1)
    remain = in_2core_graph(cats)
    expected = in_2core_graph_slow(cats)
    assert_array_equal(remain, expected)
Esempio n. 3
0
def test_drop_singletons_large():
    rs = np.random.RandomState(1234)
    m = 2000000
    c1 = rs.randint(0, m // 3, m)
    c2 = rs.randint(0, m // 20, m)
    cats = np.column_stack([c1, c2])
    retain = in_2core_graph(cats)
    expected = in_2core_graph_slow(cats)
    assert_array_equal(retain, expected)
Esempio n. 4
0
def test_drop_singletons_single():
    rs = np.random.RandomState(0)
    cats = rs.randint(0, 10000, (40000, 1))
    retain = in_2core_graph_slow(cats)
    nonsingletons = cats[retain]
    cats = pd.Series(cats[:, 0])
    vc = cats.value_counts()
    expected = np.sort(np.asarray(vc.index[vc > 1]))
    assert_array_equal(np.unique(nonsingletons), expected)
    assert vc[vc > 1].sum() == nonsingletons.shape[0]
    singletons = np.asarray(vc.index[vc == 1])
    assert nonsingletons.shape[0] == (40000 - singletons.shape[0])
    assert not np.any(np.isin(nonsingletons, singletons))
Esempio n. 5
0
def test_drop_singletons_pandas():
    rs = np.random.RandomState(0)
    c1 = rs.randint(0, 10000, (40000, 1))
    c2 = rs.randint(0, 20000, (40000, 1))
    df = [
        pd.Series(["{0}{1}".format(let, c) for c in cat.ravel()], dtype="category")
        for let, cat in zip("AB", (c1, c2))
    ]
    df = pd.concat(df, 1)
    df.columns = ["cat1", "cat2"]
    cats = df
    remain = in_2core_graph(cats)
    expected = in_2core_graph_slow(cats)
    assert_array_equal(remain, expected)