def test_dist_samples(cluster4): a = ["A"] * 200 + ["B"] * 100 + ["C"] * 3 + ["D"] * 10 random.shuffle(a) result = hd.Values(a).groups(lambda x: x, 1).run(cluster4.ctx) assert {"A": ["A"], "B": ["B"], "C": ["C"], "D": ["D"]} == result result = hd.Values(a).groups(lambda x: x, 10).run(cluster4.ctx) expected = { "A": ["A"] * 10, "B": ["B"] * 10, "C": ["C"] * 3, "D": ["D"] * 10 } assert result == expected result = hd.Values(a).groups(lambda x: x, 150).run(cluster4.ctx) expected = { "A": ["A"] * 150, "B": ["B"] * 100, "C": ["C"] * 3, "D": ["D"] * 10 } assert result == expected result = hd.Values(a).groups(lambda x: x, 1500).run(cluster4.ctx) expected = { "A": ["A"] * 200, "B": ["B"] * 100, "C": ["C"] * 3, "D": ["D"] * 10 } assert result == expected
def test_domain_repr(): domain = hd.Values([]) assert repr(domain) == "<Values size=0 {}>" domain = hd.Values(["a"]) assert repr(domain) == "<Values size=1 {'a'}>" domain = hd.Values(["a" * 1000]) assert repr(domain) == "<Values size=1 {'aaaaaaaaa ... aaaaaaaa'}>" domain = hd.Values(["a", "b"]) assert repr(domain) == "<Values size=2 {'a', 'b'}>" domain = hd.Values(["a" * 1000, "b" * 1000]) assert repr(domain) == \ "<Values size=2 {'aaaaaaaaa ... aaaaaaaa', " \ "'bbbbbbbb ... bbbbbbb'}>" domain = hd.Values(["a", "b"] * 1000) assert repr(domain) == \ "<Values size=2000 {'a', 'b', 'a', 'b', 'a', ...}>" domain = hd.Values(["a" * 1000, "b" * 1000] * 1000) assert repr(domain) == \ "<Values size=2000 {'aaaaaaaaa ... aaaaaaaa', " \ "'bbbbbbbb ... bbbbbbb', ...}>" domain = hd.Range(2) * hd.Values(["a", "b", "c"]) * hd.Values(["x", "y"]) assert repr(domain) == \ ("<Product size=12 {(0, 'a', 'x'), (0, 'a', 'y'), " "(0, 'b', 'x'), ...}>") domain = hd.Values([], name="MyName") assert repr(domain) == "<MyName size=0 {}>"
def test_join_int_generate2(): r1 = hd.Values([5000]) r2 = hd.Range(1000) r3 = hd.Values([5001, 5002]) j = r1 + r2 + r3 result = list(j.generate().take(3000)) assert result.count(5000) < 100 assert result.count(5001) < 100 assert result.count(5002) < 100
def test_join_iter_set(): r1 = hd.Values([5000]) r2 = hd.Range(10) r3 = hd.Values([5001, 5002]) p = r1 + r2 + r3 a = list(p) b = [] for i in xrange(p.size + 10): it = p.create_iter(i) result = list(it) if result: b.append(result[0]) assert a == b
def test_values_iterate(): a = hd.Values(["a", "b", "c"]) assert not a.filtered assert a.step_jumps assert a.size == 3 assert list(a) == ["a", "b", "c"] for x in a.generate(100): assert x in ("a", "b", "c") b = hd.Values([]) assert b.size == 0 assert list(b) == []
def test_cnfs_exception(): class X(object): pass x = hd.Values((X(), X(), X())) with pytest.raises(Exception): x.cnfs()
def test_join_to_values(): a = hd.Range(5) b = hd.Values(("a", "b")) c = a + b v = c.to_values() assert isinstance(v, hd.Values) assert list(c) == list(v)
def test_permutations_iterate(): v = hd.Values(("A", "B", "C")) p = hd.Permutations(v) assert p.size == 6 result = set(p) expected = {('A', 'B', 'C'), ('A', 'C', 'B'), ('B', 'A', 'C'), ('B', 'C', 'A'), ('C', 'A', 'B'), ('C', 'B', 'A')} assert result == expected
def test_join_to_values_maxsize(): a = hd.Range(5) b = hd.Values(("a", "b")) c = a + b v = c.to_values(max_size=5) assert isinstance(v, hd.Join) assert all(isinstance(d, hd.Values) for d in v.domains) assert list(c) == list(v)
def test_samples_counts(): f = [("A", 10), ("A", 10), ("B", 20), ("C", 10), ("D", 20), ("E", 5), ("Z", None), ("S", 10)] v = hd.Values(f) result = v.groups_counts(lambda x: x[1], 2).run() expected = { 10: [4, ("A", 10), ("A", 10)], 20: [2, ("B", 20), ("D", 20)], 5: [1, ("E", 5)], } assert expected == result
def test_max(): f = [("A", 10), ("A", 10), ("B", 20), ("C", 10), ("D", 20), ("E", 5)] v = hd.Values(f) result = v.max(lambda x: x[1]).run() assert result == [("B", 20), ("D", 20)] result = v.max(lambda x: x[1], 10).run() assert result == [("B", 20), ("D", 20)] result = v.max(lambda x: x[1], 1).run() assert result == [("B", 20)] result = list(v.filter( lambda x: x[1] < 20).max(lambda x: x[1])) assert result == [("A", 10), ("A", 10), ("C", 10)] result = hd.Range(10).max().run() assert result == [9] result = hd.Range(10).iterate().max().run() assert result == [9]
def test_values_to_values(): v = hd.Values(("A", "B", "C")) assert v.to_values() == v
def test_values_repr(): v = hd.Values(("abc", 321, (2.2, 1))) assert repr(v) == "<Values size=3 {'abc', 321, (2.2, 1)}>"
def test_values_flags(): a = hd.Values(["a", "b", "c"]) assert not a.filtered assert a.step_jumps assert not a.strict
def test_values_name(): a = hd.Values(["a", "b", "c", "d"], name="ListTest") assert a.name == "ListTest"
def test_values_set(): a = hd.Values(["a", "b", "c", "d"]) i = a.create_iter(2) assert list(i) == ["c", "d"]