Beispiel #1
0
 def sendBroadcast(self):
     f = open(self.path, 'wb', 65536*100)
     if marshalable(self.value):
         marshal.dump(self.value, f)
     else:
         cPickle.dump(self.value, f, -1)
     f.close()
     logger.debug("dump to %s", self.path)
Beispiel #2
0
 def sendBroadcast(self):
     f = gzip.open(self.path, 'wb')
     if marshalable(self.value):
         marshal.dump(self.value, f)
     else:
         cPickle.dump(self.value, f, -1)
     f.flush()
     self.bytes = f.tell()
     f.close()
     logger.debug("dump to %s", self.path)
Beispiel #3
0
 def sendBroadcast(self):
     f = gzip.open(self.path, 'wb')
     if marshalable(self.value):
         marshal.dump(self.value, f)
     else:
         cPickle.dump(self.value, f, -1)
     f.flush()
     self.bytes = f.tell()
     f.close()
     logger.debug("dump to %s", self.path)
Beispiel #4
0
 def blockifyObject(self, obj):
     if marshalable(obj):
         buf = marshal.dumps(obj)
     else:
         buf = cPickle.dumps(obj, -1)
     N = self.BlockSize
     blockNum = len(buf) / N
     if len(buf) % N != 0:
         blockNum += 1
     val = [BroadcastBlock(i/N, buf[i:i+N]) 
                 for i in range(0, len(buf), N)]
     vi = VariableInfo(val, blockNum, len(buf))
     vi.has_blocks = blockNum
     return vi
Beispiel #5
0
 def blockifyObject(self, obj):
     if marshalable(obj):
         buf = marshal.dumps(obj)
     else:
         buf = cPickle.dumps(obj, -1)
     buf = zlib.compress(buf, 1)
     N = self.BlockSize
     blockNum = len(buf) / N
     if len(buf) % N != 0:
         blockNum += 1
     val = [
         BroadcastBlock(i / N, buf[i:i + N]) for i in range(0, len(buf), N)
     ]
     vi = VariableInfo(val, blockNum, len(buf))
     vi.has_blocks = blockNum
     return vi
Beispiel #6
0
    def run(self, attempId):
        logger.debug("shuffling %d of %s", self.partition, self.rdd)
        numOutputSplits = self.partitioner.numPartitions
        getPartition = self.partitioner.getPartition
        mergeValue = self.aggregator.mergeValue
        createCombiner = self.aggregator.createCombiner

        buckets = [{} for i in range(numOutputSplits)]
        for k, v in self.rdd.iterator(self.split):
            bucketId = getPartition(k)
            bucket = buckets[bucketId]
            r = bucket.get(k, None)
            if r is not None:
                bucket[k] = mergeValue(r, v)
            else:
                bucket[k] = createCombiner(v)

        for i in range(numOutputSplits):
            path = LocalFileShuffle.getOutputFile(self.shuffleId,
                                                  self.partition, i)
            if os.path.exists(path):
                continue
            tpath = path + ".%s.%s" % (socket.gethostname(), os.getpid())
            if marshalable(buckets[i]):
                flag, d = 'm', marshal.dumps(buckets[i])
            else:
                flag, d = 'p', cPickle.dumps(buckets[i], -1)
            cd = comp.compress(d, 1)
            f = open(tpath, 'wb', 1024 * 4096)
            f.write(flag + struct.pack("I", 5 + len(cd)))
            f.write(cd)
            #            f.flush()
            #            os.fsync(f.fileno())
            f.close()
            if not os.path.exists(path):
                os.rename(tpath, path)
            else:
                os.unlink(tpath)
        return LocalFileShuffle.getServerUri()
Beispiel #7
0
    def run(self, attempId):
        logger.debug("shuffling %d of %s", self.partition, self.rdd)
        numOutputSplits = self.partitioner.numPartitions
        getPartition = self.partitioner.getPartition
        mergeValue = self.aggregator.mergeValue
        createCombiner = self.aggregator.createCombiner

        buckets = [{} for i in range(numOutputSplits)]
        for k,v in self.rdd.iterator(self.split):
            bucketId = getPartition(k)
            bucket = buckets[bucketId]
            r = bucket.get(k, None)
            if r is not None:
                bucket[k] = mergeValue(r, v)
            else:
                bucket[k] = createCombiner(v)

        for i in range(numOutputSplits):
            path = LocalFileShuffle.getOutputFile(self.shuffleId, self.partition, i)
            if os.path.exists(path):
                continue
            tpath = path + ".%s.%s" % (socket.gethostname(), os.getpid())
            if marshalable(buckets[i]):
                flag, d = 'm', marshal.dumps(buckets[i])
            else:
                flag, d = 'p', cPickle.dumps(buckets[i], -1)
            cd = compress(d)
            f = open(tpath, 'wb', 1024*4096)
            f.write(flag + struct.pack("I", 5 + len(cd)))
            f.write(cd)
            f.close()
            if not os.path.exists(path):
                os.rename(tpath, path)
            else:
                os.unlink(tpath)
        return LocalFileShuffle.getServerUri()