Python Context.collect Exemples

Langage de programmation: Python

Espace de nommage/Pack: fast_pyspark_tester

Class/Type: Context

Méthode/Fonction: collect

Exemples au hotexamples.com: 10

Python Context.collect - 10 exemples trouvés. Ce sont les exemples réels les mieux notés de fast_pyspark_tester.Context.collect extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

Context(26)

collect(10)

count(3)

saveAsTextFile(3)

filter(1)

lookup(1)

map(1)

parallelize(1)

startswith(1)

takeSample(1)

top(1)

Méthodes fréquemment utilisées

Context (26)

collect (10)

count (3)

saveAsTextFile (3)

filter (1)

lookup (1)

map (1)

parallelize (1)

startswith (1)

takeSample (1)

Méthodes fréquemment utilisées

top (1)

Exemple #1

0

Afficher le fichier

Fichier : test_textFile.py Projet : svaningelgem/fast_pyspark_tester

def test_read_7z(): # file was created with: # 7z a tests/data.7z tests/readme_example.py # (brew install p7zip) rdd = Context().textFile('{}/data.7z'.format(LOCAL_TEST_PATH)) print(rdd.collect()) assert 'from fast_pyspark_tester import Context' in rdd.collect()

Exemple #2

0

Afficher le fichier

Fichier : test_textFile.py Projet : svaningelgem/fast_pyspark_tester

def test_saveAsTextFile_zip(): tempFile = tempfile.NamedTemporaryFile(delete=True) tempFile.close() Context().parallelize(range(10)).saveAsTextFile(tempFile.name + '.zip') read_rdd = Context().textFile(tempFile.name + '.zip') print(read_rdd.collect()) assert '5' in read_rdd.collect()

Exemple #3

0

Afficher le fichier

Fichier : test_textFile.py Projet : svaningelgem/fast_pyspark_tester

def test_cache(): # this crashes in version 0.2.28 lines = Context().textFile('{}/*textFil*.py'.format(LOCAL_TEST_PATH)) lines = lines.map(lambda l: '-' + l).cache() print(len(lines.collect())) lines = lines.map(lambda l: '+' + l) lines = lines.map(lambda l: '-' + l).cache() lines = lines.collect() print(lines) assert '-+-from fast_pyspark_tester import Context' in lines

Exemple #4

0

Afficher le fichier

Fichier : test_textFile.py Projet : svaningelgem/fast_pyspark_tester

def test_hdfs_textFile_loop(): random.seed() fn = '{}/pysparkling_test_{:d}.txt'.format(HDFS_TEST_PATH, random.random() * 999999.0) print('HDFS test file: {0}'.format(fn)) rdd = Context().parallelize('Hello World {0}'.format(x) for x in range(10)) rdd.saveAsTextFile(fn) read_rdd = Context().textFile(fn) print(rdd.collect()) print(read_rdd.collect()) assert rdd.count() == read_rdd.count() and all(r1 == r2 for r1, r2 in zip(rdd.collect(), read_rdd.collect()))

Exemple #5

0

Afficher le fichier

Fichier : test_textFile.py Projet : svaningelgem/fast_pyspark_tester

def test_s3_textFile(): myrdd = Context().textFile('s3n://aws-publicdatasets/common-crawl/crawl-data/' 'CC-MAIN-2015-11/warc.paths.*') assert ( 'common-crawl/crawl-data/CC-MAIN-2015-11/segments/1424937481488.49/' 'warc/CC-MAIN-20150226075801-00329-ip-10-28-5-156.ec2.' 'internal.warc.gz' in myrdd.collect() )

Exemple #6

0

Afficher le fichier

Fichier : test_textFile.py Projet : svaningelgem/fast_pyspark_tester

def test_s3_textFile_loop(): random.seed() fn = '{}/pysparkling_test_{:d}.txt'.format(S3_TEST_PATH, random.random() * 999999.0) rdd = Context().parallelize('Line {0}'.format(n) for n in range(200)) rdd.saveAsTextFile(fn) rdd_check = Context().textFile(fn) assert rdd.count() == rdd_check.count() and all(e1 == e2 for e1, e2 in zip(rdd.collect(), rdd_check.collect()))

Exemple #7

0

Afficher le fichier

Fichier : readme_example_common_crawl.py Projet : svaningelgem/fast_pyspark_tester

from __future__ import print_function from fast_pyspark_tester import Context # read all the paths of warc and wat files of the latest Common Crawl paths_rdd = Context().textFile( 's3n://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2015-11/' 'warc.paths.*,' 's3n://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2015-11/' 'wat.paths.gz', ) print(paths_rdd.collect())

Exemple #8

0

Afficher le fichier

Fichier : test_textFile.py Projet : svaningelgem/fast_pyspark_tester

def test_local_textFile_1(): lines = Context().textFile('{}/*textFil*.py'.format(LOCAL_TEST_PATH)) lines = lines.collect() print(lines) assert 'from fast_pyspark_tester import Context' in lines

Exemple #9

0

Afficher le fichier

Fichier : test_textFile.py Projet : svaningelgem/fast_pyspark_tester

def test_read_tar_gz(): # file was created with: # tar -cvzf data.tar.gz hello.txt rdd = Context().textFile('{}/data.tar.gz'.format(LOCAL_TEST_PATH)) print(rdd.collect()) assert 'Hello fast_pyspark_tester!' in rdd.collect()

Exemple #10

0

Afficher le fichier

Fichier : test_textFile.py Projet : svaningelgem/fast_pyspark_tester

def test_http_textFile(): myrdd = Context().textFile( 'https://s3-us-west-2.amazonaws.com/human-microbiome-project/DEMO/' 'HM16STR/46333/by_subject/1139.fsa' ) assert 'TGCTGCGGTGAATGCGTTCCCGGGTCT' in myrdd.collect()