from pysparkling import Context

by_subject_rdd = Context().textFile(
    's3n://human-microbiome-project/DEMO/HM16STR/46333/by_subject/*'
)
print(by_subject_rdd.takeSample(True, 1))
from pysparkling import Context

by_subject_rdd = Context().textFile(
    's3n://human-microbiome-project/DEMO/HM16STR/46333/by_subject/*'
)
print(by_subject_rdd.takeSample(1))
def test_takeSample():
    my_rdd = Context().parallelize([4, 7, 2])
    assert my_rdd.takeSample(1)[0] in [4, 7, 2]
def test_takeSample_partitions():
    """The real test here is that only one partition should be
    computed and not the other two. Shown in debug logs."""
    my_rdd = Context().parallelize([4, 9, 7, 3, 2, 5], 3)
    assert my_rdd.takeSample(1)[0] in [4, 9, 7, 3, 2, 5]
from pysparkling import Context

by_subject_rdd = Context().textFile(
    's3n://human-microbiome-project/DEMO/HM16STR/46333/by_subject/*')
print(by_subject_rdd.takeSample(1))