Example #1
0
def test_choices_k_bigger_than_smallest_partition_size():
    seq = range(10)
    sut = db.from_sequence(seq, partition_size=9)
    li = list(random.choices(sut, k=2).compute())
    assert sut.map_partitions(len).compute() == (9, 1)
    assert len(li) == 2
    assert all(i in seq for i in li)
Example #2
0
def test_choices_k_equal_bag_size_with_unbalanced_partitions():
    seq = range(10)
    sut = db.from_sequence(seq, partition_size=9)
    li = list(random.choices(sut, k=10).compute())
    assert sut.map_partitions(len).compute() == (9, 1)
    assert len(li) == 10
    assert all(i in seq for i in li)
Example #3
0
def test_choices_empty_partition():
    seq = range(10)
    sut = db.from_sequence(seq, partition_size=9)
    sut = sut.repartition(3)
    li = list(random.choices(sut, k=2).compute())
    assert sut.map_partitions(len).compute() == (9, 0, 1)
    assert len(li) == 2
    assert all(i in seq for i in li)
Example #4
0
def test_choices_size_over_two_perpartition():
    """
    Number of randomly sampled are more than the elements of two partitions.
    """
    seq = range(10)
    sut = db.from_sequence(seq, partition_size=9)
    li = list(random.choices(sut, k=10).compute())
    assert len(li) == 10
    assert all(i in seq for i in li)
Example #5
0
def test_choices_size():
    """
    Number of randomly sampled elements are exactly k.
    """
    seq = range(20)
    sut = db.from_sequence(seq, npartitions=3)
    li = list(random.choices(sut, k=2).compute())
    assert len(li) == 2
    assert all(i in seq for i in li)
Example #6
0
def test_choices_size_over():
    """
    Number of randomly sampled are more than the elements.
    """
    seq = range(3)
    sut = db.from_sequence(seq, npartitions=3)
    li = list(random.choices(sut, k=4).compute())
    assert len(li) == 4
    assert all(i in seq for i in li)
Example #7
0
def test_partitions_are_coerced_to_lists():
    # https://github.com/dask/dask/issues/6906
    A = db.from_sequence([[1, 2], [3, 4, 5], [6], [7]])
    B = db.from_sequence(["a", "b", "c", "d"])

    a = random.choices(A.flatten(), k=B.count().compute()).repartition(4)

    C = db.zip(B, a).compute()
    assert len(C) == 4
Example #8
0
def test_choices_size_over_repartition():
    """
    Number of randomly sampled are more than the elements on each partition.
    """
    seq = range(10)
    sut = db.from_sequence(seq, partition_size=9)
    sut = sut.repartition(3)
    li = list(random.choices(sut, k=2).compute())
    assert sut.map_partitions(len).compute() == (9, 0, 1)
    assert len(li) == 2
    assert all(i in seq for i in li)
Example #9
0
def test_choices_size_exactly_k():
    seq = range(20)
    sut = db.from_sequence(seq, npartitions=3)
    li = list(random.choices(sut, k=2).compute())
    assert len(li) == 2
    assert all(i in seq for i in li)
Example #10
0
def test_choices_k_bigger_than_bag_size():
    seq = range(3)
    sut = db.from_sequence(seq, npartitions=3)
    li = list(random.choices(sut, k=4).compute())
    assert len(li) == 4
    assert all(i in seq for i in li)