from pysparkling import Context by_subject_rdd = Context().textFile( 's3n://human-microbiome-project/DEMO/HM16STR/46333/by_subject/*' ) print(by_subject_rdd.takeSample(True, 1))
from pysparkling import Context by_subject_rdd = Context().textFile( 's3n://human-microbiome-project/DEMO/HM16STR/46333/by_subject/*' ) print(by_subject_rdd.takeSample(1))
def test_takeSample(): my_rdd = Context().parallelize([4, 7, 2]) assert my_rdd.takeSample(1)[0] in [4, 7, 2]
def test_takeSample_partitions(): """The real test here is that only one partition should be computed and not the other two. Shown in debug logs.""" my_rdd = Context().parallelize([4, 9, 7, 3, 2, 5], 3) assert my_rdd.takeSample(1)[0] in [4, 9, 7, 3, 2, 5]
from pysparkling import Context by_subject_rdd = Context().textFile( 's3n://human-microbiome-project/DEMO/HM16STR/46333/by_subject/*') print(by_subject_rdd.takeSample(1))