def sendBroadcast(self): f = open(self.path, 'wb', 65536*100) if marshalable(self.value): marshal.dump(self.value, f) else: cPickle.dump(self.value, f, -1) f.close() logger.debug("dump to %s", self.path)
def sendBroadcast(self): f = gzip.open(self.path, 'wb') if marshalable(self.value): marshal.dump(self.value, f) else: cPickle.dump(self.value, f, -1) f.flush() self.bytes = f.tell() f.close() logger.debug("dump to %s", self.path)
def blockifyObject(self, obj): if marshalable(obj): buf = marshal.dumps(obj) else: buf = cPickle.dumps(obj, -1) N = self.BlockSize blockNum = len(buf) / N if len(buf) % N != 0: blockNum += 1 val = [BroadcastBlock(i/N, buf[i:i+N]) for i in range(0, len(buf), N)] vi = VariableInfo(val, blockNum, len(buf)) vi.has_blocks = blockNum return vi
def blockifyObject(self, obj): if marshalable(obj): buf = marshal.dumps(obj) else: buf = cPickle.dumps(obj, -1) buf = zlib.compress(buf, 1) N = self.BlockSize blockNum = len(buf) / N if len(buf) % N != 0: blockNum += 1 val = [ BroadcastBlock(i / N, buf[i:i + N]) for i in range(0, len(buf), N) ] vi = VariableInfo(val, blockNum, len(buf)) vi.has_blocks = blockNum return vi
def run(self, attempId): logger.debug("shuffling %d of %s", self.partition, self.rdd) numOutputSplits = self.partitioner.numPartitions getPartition = self.partitioner.getPartition mergeValue = self.aggregator.mergeValue createCombiner = self.aggregator.createCombiner buckets = [{} for i in range(numOutputSplits)] for k, v in self.rdd.iterator(self.split): bucketId = getPartition(k) bucket = buckets[bucketId] r = bucket.get(k, None) if r is not None: bucket[k] = mergeValue(r, v) else: bucket[k] = createCombiner(v) for i in range(numOutputSplits): path = LocalFileShuffle.getOutputFile(self.shuffleId, self.partition, i) if os.path.exists(path): continue tpath = path + ".%s.%s" % (socket.gethostname(), os.getpid()) if marshalable(buckets[i]): flag, d = 'm', marshal.dumps(buckets[i]) else: flag, d = 'p', cPickle.dumps(buckets[i], -1) cd = comp.compress(d, 1) f = open(tpath, 'wb', 1024 * 4096) f.write(flag + struct.pack("I", 5 + len(cd))) f.write(cd) # f.flush() # os.fsync(f.fileno()) f.close() if not os.path.exists(path): os.rename(tpath, path) else: os.unlink(tpath) return LocalFileShuffle.getServerUri()
def run(self, attempId): logger.debug("shuffling %d of %s", self.partition, self.rdd) numOutputSplits = self.partitioner.numPartitions getPartition = self.partitioner.getPartition mergeValue = self.aggregator.mergeValue createCombiner = self.aggregator.createCombiner buckets = [{} for i in range(numOutputSplits)] for k,v in self.rdd.iterator(self.split): bucketId = getPartition(k) bucket = buckets[bucketId] r = bucket.get(k, None) if r is not None: bucket[k] = mergeValue(r, v) else: bucket[k] = createCombiner(v) for i in range(numOutputSplits): path = LocalFileShuffle.getOutputFile(self.shuffleId, self.partition, i) if os.path.exists(path): continue tpath = path + ".%s.%s" % (socket.gethostname(), os.getpid()) if marshalable(buckets[i]): flag, d = 'm', marshal.dumps(buckets[i]) else: flag, d = 'p', cPickle.dumps(buckets[i], -1) cd = compress(d) f = open(tpath, 'wb', 1024*4096) f.write(flag + struct.pack("I", 5 + len(cd))) f.write(cd) f.close() if not os.path.exists(path): os.rename(tpath, path) else: os.unlink(tpath) return LocalFileShuffle.getServerUri()