Python DparkContext.parallelize Exemples

Langage de programmation: Python

Espace de nommage/Pack: dpark

Class/Type: DparkContext

Méthode/Fonction: parallelize

Exemples au hotexamples.com: 7

Python DparkContext.parallelize - 7 exemples trouvés. Ce sont les exemples réels les mieux notés de dpark.DparkContext.parallelize extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

DparkContext(19)

textFile(16)

makeRDD(10)

union(5)

parallelize(4)

broadcast(2)

accumulator(1)

map(1)

Méthodes fréquemment utilisées

DparkContext (19)

textFile (16)

makeRDD (10)

union (5)

parallelize (4)

broadcast (2)

accumulator (1)

map (1)

Associées

group_iter

get_is_following

UrlIgnoreHandler

error

exit

cmd_diff

plmrks

from_json

get_abv

create_app

Related in langs

ComposerAutoloaderInitfaa629fa7fae8340fce102c93f5cdfda (PHP)

MySetParameterInteger (PHP)

AssignmentsList (C#)

Wmean (C#)

qhy_core_getendp (C++)

iupdrvListAppendItem (C++)

Base64ToPriv (Go)

SigningProfile (Go)

SequenceA1 (Java)

CheckUpDAO (Java)

Exemple #1

0

Afficher le fichier

Fichier : pagerank.py Projet : FashtimeDotCom/dpark

return (title, Vertex(title, 1.0/numV, outEdges, True)) def gen_compute(num, epsilon): def compute(self, messageSum, agg, superstep): if messageSum and messageSum[0]: newValue = 0.15 / num + 0.85 * messageSum[0] else: newValue = self.value terminate = (superstep >= 10 and abs(newValue-self.value) < epsilon) or superstep > 30 outbox = [Message(edge.target_id, newValue / len(self.outEdges)) for edge in self.outEdges] if not terminate else [] return Vertex(self.id, newValue, self.outEdges, not terminate), outbox return compute if __name__ == '__main__': inputFile = 'wikipedia.txt' threshold = 0.01 dpark = DparkContext() path = os.path.join(os.path.dirname(os.path.abspath(__file__)), inputFile) input = dpark.textFile(path) numVertex = input.count() vertices = input.map(lambda line: parse_vertex(line, numVertex)).cache() epsilon = 0.01 / numVertex messages = dpark.parallelize([]) result = Bagel.run(dpark, vertices, messages, gen_compute(numVertex, epsilon)) for v in result.filter(lambda x:x.value > threshold).collect(): print v.id, v.value

Exemple #2

0

Afficher le fichier

Fichier : demo.py Projet : stefanie924/dpark

import math import random import os, sys from pprint import pprint sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from dpark import DparkContext ctx = DparkContext() # range nums = ctx.parallelize(range(100), 4) print nums.count() print nums.reduce(lambda x, y: x + y) # text search f = ctx.textFile("./", ext='py').map(lambda x: x.strip()) log = f.filter(lambda line: 'logging' in line).cache() print 'logging', log.count() print 'error', log.filter(lambda line: 'error' in line).count() for line in log.filter(lambda line: 'error' in line).collect(): print line # word count counts = f.flatMap(lambda x: x.split()).map(lambda x: (x, 1)).reduceByKey( lambda x, y: x + y).cache() pprint(counts.filter(lambda (_, v): v > 50).collectAsMap()) pprint( sorted( counts.filter(lambda (_, v): v > 20).map( lambda (x, y): (y, x)).groupByKey().collect())) pprint(counts.map(lambda v: "%s:%s" % v).saveAsTextFile("wc/"))

Exemple #3

0

Afficher le fichier

Fichier : batchprocess.py Projet : creasyw/humming

data = fs.get('/song/small/%s.mp3'%id) binfile = open("output/%s.mp3" % (id),"wb") binfile.write(data) binfile.close() m = RetrievalMusic(dptable, mode) m.retrieving('output/%s.mp3'%id) if mode != 2: call("rm output/%s.mp3" % (id), shell=True) def batchprocess(song_id, loaded, mode): # dpark = DparkContext() # dptable = dpark.broadcast(loaded) # dpark.parallelize(song_id, 80).foreach(lambda(id):calculate_single(id, dptable, mode)) for id in song_id: calculate_single(id, loaded, mode) if mode == 2: rearrange() if __name__ == '__main__': song_id = np.load("track_temp.npy") mode = 1 # 1 for save, 2 for filter, and 0 for regular work dpark = DparkContext() dpark.parallelize(song_id, 50).foreach(lambda(id):calculate_single(id,0,mode))

Exemple #4

0

Afficher le fichier

if messageSum and messageSum[0]: newValue = 0.15 / num + 0.85 * messageSum[0] else: newValue = self.value terminate = (superstep >= 10 and abs(newValue - self.value) < epsilon) or superstep > 30 outbox = [(edge.target_id, newValue / len(self.outEdges)) for edge in self.outEdges] if not terminate else [] return Vertex(self.id, newValue, self.outEdges, not terminate), outbox return compute if __name__ == '__main__': inputFile = 'wikipedia.txt' threshold = 0.01 dpark = DparkContext() path = os.path.join(os.path.dirname(os.path.abspath(__file__)), inputFile) input = dpark.textFile(path) numVertex = input.count() vertices = input.map(lambda line: parse_vertex(line, numVertex)).cache() epsilon = 0.01 / numVertex messages = dpark.parallelize([]) result = Bagel.run(dpark, vertices, messages, gen_compute(numVertex, epsilon)) for id, v in result.filter( lambda id_v: id_v[1].value > threshold).collect(): print(id, v)

Exemple #5

0

Afficher le fichier

from __future__ import print_function import math import random import os, sys from pprint import pprint from six.moves import map from six.moves import range from six.moves import zip sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from dpark import DparkContext dpark = DparkContext() # range nums = dpark.parallelize(list(range(100)), 4) print(nums.count()) print(nums.reduce(lambda x, y: x + y)) # text search f = dpark.textFile("./", ext='py').map(lambda x: x.strip()) log = f.filter(lambda line: 'logging' in line).cache() print('logging', log.count()) print('error', log.filter(lambda line: 'error' in line).count()) for line in log.filter(lambda line: 'error' in line).collect(): print(line) # word count counts = f.flatMap(lambda x: x.split()).map(lambda x: (x, 1)).reduceByKey(lambda x, y: x + y).cache() pprint(counts.filter(lambda __v1: __v1[1] > 50).collectAsMap()) pprint(sorted(counts.filter(lambda __v: __v[1] > 20).map(lambda x_y: (x_y[1], x_y[0])).groupByKey().collect()))

Exemple #6

0

Afficher le fichier

Fichier : demo.py Projet : JoshRosen/dpark

import math import random import os, sys from pprint import pprint sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from dpark import DparkContext ctx = DparkContext() # range nums = ctx.parallelize(range(100), 4) print nums.count() print nums.reduce(lambda x, y: x + y) # text search f = ctx.textFile("./", ext="py").map(lambda x: x.strip()) log = f.filter(lambda line: "logging" in line).cache() print "logging", log.count() print "error", log.filter(lambda line: "error" in line).count() for line in log.filter(lambda line: "error" in line).collect(): print line # word count counts = f.flatMap(lambda x: x.split()).map(lambda x: (x, 1)).reduceByKey(lambda x, y: x + y).cache() pprint(counts.filter(lambda (_, v): v > 50).collectAsMap()) pprint(sorted(counts.filter(lambda (_, v): v > 20).map(lambda (x, y): (y, x)).groupByKey().collect())) pprint(counts.map(lambda v: "%s:%s" % v).saveAsTextFile("wc/")) # Pi import random

Exemple #7

0

Afficher le fichier

Fichier : shuffle_in_dpark.py Projet : zhaojunzuozjzfr/exos

#coding:utf-8 from random import shuffle, random, sample import traceback from dpark import DparkContext dp = DparkContext('mesos') '''shuffle不返回数据值''' rdd1 = dp.parallelize([[(1, 2), (3, 4), (5, 6)]]).map(lambda x: shuffle(x)) print 'rdd1:' print rdd1.take(1) rdd2 = dp.parallelize([((1, 2), (3, 4), (5, 6))]).map(lambda x: shuffle(x)) print 'rdd2:' try: print rdd2.take(1) except Exception, e: print traceback.print_exc() '''O(NlogN)''' rdd3 = dp.parallelize([[(1, 2), (3, 4), (5, 6)] ]).map(lambda x: sorted(x, key=lambda k: random())) print 'rdd3:' print rdd3.take(1) '''O(N)''' rdd4 = dp.parallelize([[(1, 2), (3, 4), (5, 6)]]).map(lambda x: sample(x, len(x))) print 'rdd4:' print rdd4.take(1) rdd5 = dp.parallelize([((1, 2), (3, 4), (5, 6)) ]).map(lambda x: sample(x, len(x)))