def run_without_sorted(self, it): for i, bucket in it: try: if marshalable(bucket): flag, d = b'm', marshal.dumps(bucket) else: flag, d = b'p', six.moves.cPickle.dumps(bucket, -1) except ValueError: flag, d = b'p', six.moves.cPickle.dumps(bucket, -1) cd = compress(d) env.task_stats.bytes_shuffle_write += len(cd) for tried in range(1, 4): try: path = LocalFileShuffle.getOutputFile( self.shuffleId, self.partition, i, len(cd) * tried) with atomic_file(path, bufsize=1024 * 4096) as f: f.write(flag + struct.pack("I", 5 + len(cd))) f.write(cd) break except IOError as e: logger.warning("write %s failed: %s, try again (%d)", path, e, tried) else: raise e return LocalFileShuffle.getServerUri()
def run_task(task_data): try: gc.disable() task, ntry = cPickle.loads(decompress(task_data)) setproctitle('dpark worker %s: run task %s' % (Script, task)) Accumulator.clear() result = task.run(ntry) accUpdate = Accumulator.values() if marshalable(result): flag, data = 0, marshal.dumps(result) else: flag, data = 1, cPickle.dumps(result, -1) data = compress(data) if len(data) > TASK_RESULT_LIMIT: path = LocalFileShuffle.getOutputFile(0, ntry, task.id, len(data)) f = open(path, 'w') f.write(data) f.close() data = '/'.join([LocalFileShuffle.getServerUri()] + path.split('/')[-3:]) flag += 2 return mesos_pb2.TASK_FINISHED, cPickle.dumps((Success(), (flag, data), accUpdate), -1) except FetchFailed, e: return mesos_pb2.TASK_FAILED, cPickle.dumps((e, None, None), -1)
def run(self, attempId): logger.debug("shuffling %d of %s", self.partition, self.rdd) for i, bucket in self.rdd._prepare_shuffle(self.split, self.partitioner, self.aggregator): try: if marshalable(bucket): flag, d = 'm', marshal.dumps(bucket) else: flag, d = 'p', cPickle.dumps(bucket, -1) except ValueError: flag, d = 'p', cPickle.dumps(bucket, -1) cd = compress(d) for tried in range(1, 4): try: path = LocalFileShuffle.getOutputFile( self.shuffleId, self.partition, i, len(cd) * tried) tpath = path + ".%s.%s" % (socket.gethostname(), os.getpid()) f = open(tpath, 'wb', 1024 * 4096) f.write(flag + struct.pack("I", 5 + len(cd))) f.write(cd) f.close() os.rename(tpath, path) break except IOError, e: logger.warning("write %s failed: %s, try again (%d)", path, e, tried) try: os.remove(tpath) except OSError: pass else: raise
def run(self, attempId): logger.debug("shuffling %d of %s", self.partition, self.rdd) for i, bucket in self._prepare_shuffle(self.rdd): try: if marshalable(bucket): flag, d = 'm', marshal.dumps(bucket) else: flag, d = 'p', cPickle.dumps(bucket, -1) except ValueError: flag, d = 'p', cPickle.dumps(bucket, -1) cd = compress(d) for tried in range(1, 4): try: path = LocalFileShuffle.getOutputFile( self.shuffleId, self.partition, i, len(cd) * tried) with atomic_file(path, bufsize=1024 * 4096) as f: f.write(flag + struct.pack("I", 5 + len(cd))) f.write(cd) break except IOError, e: logger.warning("write %s failed: %s, try again (%d)", path, e, tried) else: raise
def run_task(task, ntry): try: setproctitle('dpark worker %s: run task %s' % (Script, task)) Accumulator.clear() gc.disable() result = task.run(ntry) accUpdate = Accumulator.values() if marshalable(result): flag, data = 0, marshal.dumps(result) else: flag, data = 1, cPickle.dumps(result, -1) data = compress(data) if len(data) > TASK_RESULT_LIMIT: workdir = env.get('WORKDIR') name = 'task_%s_%s.result' % (task.id, ntry) path = os.path.join(workdir, name) f = open(path, 'w') f.write(data) f.close() data = LocalFileShuffle.getServerUri() + '/' + name flag += 2 return mesos_pb2.TASK_FINISHED, cPickle.dumps( (task.id, Success(), (flag, data), accUpdate), -1) except Exception, e: import traceback msg = traceback.format_exc() return mesos_pb2.TASK_FAILED, cPickle.dumps( (task.id, OtherFailure(msg), None, None), -1)
def test(): from dpark.util import compress logging.basicConfig(level=logging.DEBUG) from dpark.env import env env.start(True) l = [] for i in range(10): d = zip(range(10000), range(10000)) random.shuffle(d) l.append(SortedItems(d)) hl = heap_merged(l, lambda x, y: x + y, MAX_SHUFFLE_MEMORY) for i in range(10): print i, hl.next() path = LocalFileShuffle.getOutputFile(1, 0, 0) d = compress(cPickle.dumps({'key': 'value'}, -1)) f = open(path, 'w') f.write('p' + struct.pack('I', 5 + len(d)) + d) f.close() uri = LocalFileShuffle.getServerUri() env.mapOutputTracker.registerMapOutputs(1, [uri]) fetcher = SimpleShuffleFetcher() def func(it): k, v = next(it) assert k == 'key' assert v == 'value' fetcher.fetch(1, 0, func) tracker = MapOutputTracker() tracker.registerMapOutputs(2, [None, uri, None, None, None]) assert tracker.getServerUris(2) == [None, uri, None, None, None] tracker.stop()
def test(): from dpark.util import compress import logging logging.basicConfig(level=logging.DEBUG) from dpark.env import env env.start() path = LocalFileShuffle.getOutputFile(1, 0, 0) d = compress(six.moves.cPickle.dumps({'key': 'value'}, -1)) f = open(path, 'w') f.write(pack_header(len(d), False, False) + d) f.close() uri = LocalFileShuffle.getServerUri() env.mapOutputTracker.registerMapOutputs(1, [uri]) fetcher = SimpleShuffleFetcher() def func(it): k, v = next(it) assert k == 'key' assert v == 'value' fetcher.fetch(1, 0, func) tracker = MapOutputTracker() tracker.registerMapOutputs(2, [None, uri, None, None, None]) assert tracker.getServerUris(2) == [None, uri, None, None, None] tracker.stop()
def createTask(self, o, job, t): task = Dict() tid = '%s:%s:%s' % (job.id, t.id, t.tried) task.name = 'task %s' % tid task.task_id.value = tid task.agent_id.value = o.agent_id.value task.data = encode_data( compress(six.moves.cPickle.dumps((t, t.tried), -1))) task.executor = self.executor if len(task.data) > 1000 * 1024: logger.warning('task too large: %s %d', t, len(task.data)) resources = task.resources = [] cpu = Dict() resources.append(cpu) cpu.name = 'cpus' cpu.type = 'SCALAR' cpu.scalar.value = t.cpus mem = Dict() resources.append(mem) mem.name = 'mem' mem.type = 'SCALAR' mem.scalar.value = t.mem return task
def createTask(self, o, job, t): task = Dict() tid = '%s:%s:%s' % (job.id, t.id, t.tried) task.name = 'task %s' % tid task.task_id.value = tid task.agent_id.value = o.agent_id.value task.data = encode_data( compress(cPickle.dumps((t, t.tried), -1)) ) task.executor = self.executor if len(task.data) > 1000 * 1024: logger.warning('task too large: %s %d', t, len(task.data)) resources = task.resources = [] cpu = Dict() resources.append(cpu) cpu.name = 'cpus' cpu.type = 'SCALAR' cpu.scalar.value = t.cpus mem = Dict() resources.append(mem) mem.name = 'mem' mem.type = 'SCALAR' mem.scalar.value = t.mem return task
def run_task(task_data): try: gc.disable() task, ntry = cPickle.loads(decompress(task_data)) setproctitle('dpark worker %s: run task %s' % (Script, task)) Accumulator.clear() result = task.run(ntry) accUpdate = Accumulator.values() if marshalable(result): flag, data = 0, marshal.dumps(result) else: flag, data = 1, cPickle.dumps(result, -1) data = compress(data) if len(data) > TASK_RESULT_LIMIT: workdir = env.get('WORKDIR') name = 'task_%s_%s.result' % (task.id, ntry) path = os.path.join(workdir, name) f = open(path, 'w') f.write(data) f.close() data = LocalFileShuffle.getServerUri() + '/' + name flag += 2 return mesos_pb2.TASK_FINISHED, cPickle.dumps((task.id, Success(), (flag, data), accUpdate), -1) except Exception, e: import traceback msg = traceback.format_exc() return mesos_pb2.TASK_FAILED, cPickle.dumps((task.id, OtherFailure(msg), None, None), -1)
def run(self, attempId): logger.debug("shuffling %d of %s", self.partition, self.rdd) for i, bucket in self.rdd._prepare_shuffle(self.split, self.partitioner, self.aggregator): try: if marshalable(bucket): flag, d = 'm', marshal.dumps(bucket) else: flag, d = 'p', cPickle.dumps(bucket, -1) except ValueError: flag, d = 'p', cPickle.dumps(bucket, -1) cd = compress(d) for tried in range(1, 4): try: path = LocalFileShuffle.getOutputFile(self.shuffleId, self.partition, i, len(cd) * tried) tpath = path + ".%s.%s" % (socket.gethostname(), os.getpid()) f = open(tpath, 'wb', 1024*4096) f.write(flag + struct.pack("I", 5 + len(cd))) f.write(cd) f.close() os.rename(tpath, path) break except IOError, e: logger.warning("write %s failed: %s, try again (%d)", path, e, tried) try: os.remove(tpath) except OSError: pass else: raise
def blockifyObject(self, obj): try: buf = marshal.dumps(obj) except Exception: buf = cPickle.dumps(obj, -1) N = self.BlockSize blockNum = len(buf) / N + 1 val = [Block(i, compress(buf[i*N:i*N+N])) for i in range(blockNum)] return val, len(buf)
def _prepare(self, items): items = list(items) try: if marshalable(items): is_marshal, d = True, marshal.dumps(items) else: is_marshal, d = False, six.moves.cPickle.dumps(items, -1) except ValueError: is_marshal, d = False, six.moves.cPickle.dumps(items, -1) data = compress(d) size = len(data) return (is_marshal, data), size
def blockifyObject(self, obj): try: buf = marshal.dumps(obj) except Exception: buf = cPickle.dumps(obj, -1) N = self.BlockSize blockNum = len(buf) / N + 1 val = [ Block(i, compress(buf[i * N:i * N + N])) for i in range(blockNum) ] return val, len(buf)
def _flush(self): if not self.updated: return updated_keys = {} path = self._get_path() uri = env.get('SERVER_URI') server_uri = '%s/%s' % (uri, os.path.basename(path)) st = os.statvfs(path) ratio = st.f_bfree * 1.0 / st.f_blocks if ratio < 0.66: raise Exception('Insufficient disk space') for k,v in self.updated.items(): key = self._get_key(k) if key in updated_keys: updated_keys[key][k] = v else: updated_keys[key] = {k:v} uid = uuid.uuid4().get_hex() for key, updated in updated_keys.items(): new = self._fetch_missing(key) for k,v in updated.items(): if v is None: new.pop(k) else: new[k] = v filename = '%s_%s_%s' % (key, self.generation, uid) fn = os.path.join(path, filename) if os.path.exists(fn): raise RuntimeError('conflict uuid for mutable_dict') url = '%s/%s' % (server_uri, filename) with open(fn+'.tmp', 'wb+') as f: data = compress(cPickle.dumps(new)) f.write(struct.pack('<I', len(data)+4) + data) os.rename(fn+'.tmp', fn) env.trackerClient.call(AddItemMessage('mutable_dict_new:%s' % key, url)) files = glob.glob(os.path.join(path, '%s_*' % key)) for f in files: if int(f.split('_')[-2]) < self.generation -1: try: os.remove(f) except OSError, e: pass
def _flush(self): if not self.updated: return updated_keys = {} path = self._get_path() uri = env.get('SERVER_URI') server_uri = '%s/%s' % (uri, os.path.basename(path)) for k, v in self.updated.items(): key = self._get_key(k) if key in updated_keys: updated_keys[key][k] = v else: updated_keys[key] = {k: v} uid = uuid.uuid4().get_hex() for key, updated in updated_keys.items(): new = self._fetch_missing(key) for k, v in updated.items(): if v is None: new.pop(k) else: new[k] = v filename = '%s_%s_%s' % (key, self.generation, uid) fn = os.path.join(path, filename) if os.path.exists(fn): raise RuntimeError('conflict uuid for mutable_dict') url = '%s/%s' % (server_uri, filename) with atomic_file(fn) as f: data = compress(six.moves.cPickle.dumps(new)) f.write(struct.pack('<I', len(data) + 4) + data) env.trackerClient.call( AddItemMessage('mutable_dict_new:%s' % key, url)) files = glob.glob(os.path.join(path, '%s-*' % self.uuid)) for f in files: if int(f.split('_')[-2]) < self.generation - 1: try: os.remove(f) except OSError: pass self.updated.clear() self.data = LRUDict(self.cacheLimit)
def run(self, attempId): logger.debug("shuffling %d of %s", self.partition, self.rdd) numOutputSplits = self.partitioner.numPartitions getPartition = self.partitioner.getPartition mergeValue = self.aggregator.mergeValue createCombiner = self.aggregator.createCombiner buckets = [{} for i in range(numOutputSplits)] for k, v in self.rdd.iterator(self.split): bucketId = getPartition(k) bucket = buckets[bucketId] r = bucket.get(k, None) if r is not None: bucket[k] = mergeValue(r, v) else: bucket[k] = createCombiner(v) for i in range(numOutputSplits): try: if marshalable(buckets[i]): flag, d = 'm', marshal.dumps(buckets[i]) else: flag, d = 'p', cPickle.dumps(buckets[i], -1) except ValueError: flag, d = 'p', cPickle.dumps(buckets[i], -1) cd = compress(d) for tried in range(1, 4): try: path = LocalFileShuffle.getOutputFile( self.shuffleId, self.partition, i, len(cd) * tried) tpath = path + ".%s.%s" % (socket.gethostname(), os.getpid()) f = open(tpath, 'wb', 1024 * 4096) f.write(flag + struct.pack("I", 5 + len(cd))) f.write(cd) f.close() os.rename(tpath, path) break except IOError, e: logger.warning("write %s failed: %s, try again (%d)", path, e, tried) try: os.remove(tpath) except OSError: pass else: raise
def _flush(self): if not self.updated: return updated_keys = {} path = self._get_path() uri = env.get('SERVER_URI') server_uri = '%s/%s' % (uri, os.path.basename(path)) for k,v in self.updated.items(): key = self._get_key(k) if key in updated_keys: updated_keys[key][k] = v else: updated_keys[key] = {k:v} uid = uuid.uuid4().get_hex() for key, updated in updated_keys.items(): new = self._fetch_missing(key) for k,v in updated.items(): if v is None: new.pop(k) else: new[k] = v filename = '%s_%s_%s' % (key, self.generation, uid) fn = os.path.join(path, filename) if os.path.exists(fn): raise RuntimeError('conflict uuid for mutable_dict') url = '%s/%s' % (server_uri, filename) with atomic_file(fn) as f: data = compress(cPickle.dumps(new)) f.write(struct.pack('<I', len(data)+4) + data) env.trackerClient.call(AddItemMessage('mutable_dict_new:%s' % key, url)) files = glob.glob(os.path.join(path, '%s-*' % self.uuid )) for f in files: if int(f.split('_')[-2]) < self.generation -1: try: os.remove(f) except OSError: pass self.updated.clear() self.data = LRUDict(self.cacheLimit)
def to_blocks(self, uuid, obj): try: if marshalable(obj): buf = marshal.dumps((uuid, obj)) type = MARSHAL_TYPE else: buf = cPickle.dumps((uuid, obj), -1) type = PICKLE_TYPE except Exception: buf = cPickle.dumps((uuid, obj), -1) type = PICKLE_TYPE checksum = binascii.crc32(buf) & 0xFFFF stream = struct.pack(self.header_fmt, type, checksum) + buf blockNum = (len(stream) + (BLOCK_SIZE - 1)) >> BLOCK_SHIFT blocks = [compress(stream[i*BLOCK_SIZE:(i+1)*BLOCK_SIZE]) for i in range(blockNum)] return blocks
def run_task(task_data): try: gc.disable() task, (job_id, ntry) = loads(decompress(task_data)) tid = '%s:%s:%s' % (job_id, task.id, ntry) Accumulator.clear() result = task.run(tid) env.task_stats.bytes_max_rss = resource.getrusage( resource.RUSAGE_SELF).ru_maxrss * 1024 accUpdate = Accumulator.values() MutableDict.flush() if marshalable(result): try: flag, data = 0, marshal.dumps(result) except Exception as e: flag, data = 1, six.moves.cPickle.dumps(result, -1) else: flag, data = 1, six.moves.cPickle.dumps(result, -1) data = compress(data) if len(data) > TASK_RESULT_LIMIT: path = LocalFileShuffle.getOutputFile(0, ntry, task.id, len(data)) f = open(path, 'wb') f.write(data) f.close() data = '/'.join([LocalFileShuffle.getServerUri()] + path.split('/')[-3:]) flag += 2 return 'TASK_FINISHED', six.moves.cPickle.dumps( (Success(), (flag, data), accUpdate, env.task_stats), -1) except FetchFailed as e: return 'TASK_FAILED', six.moves.cPickle.dumps((e, None, None, None), -1) except: import traceback msg = traceback.format_exc() return 'TASK_FAILED', six.moves.cPickle.dumps( (OtherFailure(msg), None, None, None), -1) finally: gc.collect() gc.enable()
def run_task(task_data): try: gc.disable() task, ntry = loads(decompress(task_data)) Accumulator.clear() result = task.run(ntry) accUpdate = Accumulator.values() MutableDict.flush() if marshalable(result): try: flag, data = 0, marshal.dumps(result) except Exception as e: flag, data = 1, cPickle.dumps(result, -1) else: flag, data = 1, cPickle.dumps(result, -1) data = compress(data) if len(data) > TASK_RESULT_LIMIT: path = LocalFileShuffle.getOutputFile(0, ntry, task.id, len(data)) f = open(path, 'w') f.write(data) f.close() data = '/'.join( [LocalFileShuffle.getServerUri()] + path.split('/')[-3:] ) flag += 2 return 'TASK_FINISHED', cPickle.dumps( (Success(), (flag, data), accUpdate), -1) except FetchFailed as e: return 'TASK_FAILED', cPickle.dumps((e, None, None), -1) except: import traceback msg = traceback.format_exc() return 'TASK_FAILED', cPickle.dumps( (OtherFailure(msg), None, None), -1) finally: close_mfs() gc.collect() gc.enable()
def run(self, attempId): logger.debug("shuffling %d of %s", self.partition, self.rdd) numOutputSplits = self.partitioner.numPartitions getPartition = self.partitioner.getPartition mergeValue = self.aggregator.mergeValue createCombiner = self.aggregator.createCombiner buckets = [{} for i in range(numOutputSplits)] for k,v in self.rdd.iterator(self.split): bucketId = getPartition(k) bucket = buckets[bucketId] r = bucket.get(k, None) if r is not None: bucket[k] = mergeValue(r, v) else: bucket[k] = createCombiner(v) for i in range(numOutputSplits): try: if marshalable(buckets[i]): flag, d = 'm', marshal.dumps(buckets[i]) else: flag, d = 'p', cPickle.dumps(buckets[i], -1) except ValueError: flag, d = 'p', cPickle.dumps(buckets[i], -1) cd = compress(d) for tried in range(1, 4): try: path = LocalFileShuffle.getOutputFile(self.shuffleId, self.partition, i, len(cd) * tried) tpath = path + ".%s.%s" % (socket.gethostname(), os.getpid()) f = open(tpath, 'wb', 1024*4096) f.write(flag + struct.pack("I", 5 + len(cd))) f.write(cd) f.close() os.rename(tpath, path) break except IOError, e: logger.warning("write %s failed: %s, try again (%d)", path, e, tried) try: os.remove(tpath) except OSError: pass else: raise
def createTask(self, o, job, t, available_cpus): task = mesos_pb2.TaskInfo() tid = "%s:%s:%s" % (job.id, t.id, t.tried) task.name = "task %s" % tid task.task_id.value = tid task.slave_id.value = o.slave_id.value task.data = compress(cPickle.dumps((t, t.tried), -1)) task.executor.MergeFrom(self.executor) if len(task.data) > 1000 * 1024: logger.warning("task too large: %s %d", t, len(task.data)) cpu = task.resources.add() cpu.name = 'cpus' cpu.type = 0 #mesos_pb2.Value.SCALAR cpu.scalar.value = min(t.cpus, available_cpus) mem = task.resources.add() mem.name = 'mem' mem.type = 0 #mesos_pb2.Value.SCALAR mem.scalar.value = t.mem return task
def createTask(self, o, job, t, available_cpus): task = mesos_pb2.TaskInfo() tid = "%s:%s:%s" % (job.id, t.id, t.tried) task.name = "task %s" % tid task.task_id.value = tid task.slave_id.value = o.slave_id.value task.data = compress(cPickle.dumps((t, t.tried), -1)) task.executor.MergeFrom(self.executor) if len(task.data) > 1000*1024: logger.warning("task too large: %s %d", t, len(task.data)) cpu = task.resources.add() cpu.name = 'cpus' cpu.type = 0 #mesos_pb2.Value.SCALAR cpu.scalar.value = min(t.cpus, available_cpus) mem = task.resources.add() mem.name = 'mem' mem.type = 0 #mesos_pb2.Value.SCALAR mem.scalar.value = t.mem return task
def to_blocks(self, uuid, obj): try: if marshalable(obj): buf = marshal.dumps((uuid, obj)) type = MARSHAL_TYPE else: buf = six.moves.cPickle.dumps((uuid, obj), -1) type = PICKLE_TYPE except Exception: buf = six.moves.cPickle.dumps((uuid, obj), -1) type = PICKLE_TYPE checksum = binascii.crc32(buf) & 0xFFFF stream = struct.pack(self.header_fmt, type, checksum) + buf blockNum = (len(stream) + (BLOCK_SIZE - 1)) >> BLOCK_SHIFT blocks = [compress(stream[i*BLOCK_SIZE:(i+1)*BLOCK_SIZE]) for i in range(blockNum)] sizes = [len(block) for block in blocks] size_l = accumulate_list(sizes) block_map = list(izip(size_l[:-1], sizes)) return blocks, size_l[-1], block_map
def run(self, attempId): logger.debug("shuffling %d of %s", self.partition, self.rdd) for i, bucket in self._prepare_shuffle(self.rdd): try: if marshalable(bucket): flag, d = 'm', marshal.dumps(bucket) else: flag, d = 'p', cPickle.dumps(bucket, -1) except ValueError: flag, d = 'p', cPickle.dumps(bucket, -1) cd = compress(d) for tried in range(1, 4): try: path = LocalFileShuffle.getOutputFile(self.shuffleId, self.partition, i, len(cd) * tried) with atomic_file(path, bufsize=1024*4096) as f: f.write(flag + struct.pack("I", 5 + len(cd))) f.write(cd) break except IOError, e: logger.warning("write %s failed: %s, try again (%d)", path, e, tried) else: raise
def test(): from dpark.util import compress import logging logging.basicConfig(level=logging.DEBUG) from dpark.env import env env.start(True) l = [] for i in range(10): d = zip(range(10000), range(10000)) random.shuffle(d) l.append(SortedItems(d)) hl = heap_merged(l, lambda x, y: x + y, MAX_SHUFFLE_MEMORY) for i in range(10): print i, hl.next() path = LocalFileShuffle.getOutputFile(1, 0, 0) d = compress(cPickle.dumps({'key': 'value'}, -1)) f = open(path, 'w') f.write('p' + struct.pack('I', 5 + len(d)) + d) f.close() uri = LocalFileShuffle.getServerUri() env.mapOutputTracker.registerMapOutputs(1, [uri]) fetcher = SimpleShuffleFetcher() def func(it): k, v = next(it) assert k == 'key' assert v == 'value' fetcher.fetch(1, 0, func) tracker = MapOutputTracker() tracker.registerMapOutputs(2, [None, uri, None, None, None]) assert tracker.getServerUris(2) == [None, uri, None, None, None] tracker.stop()
gc.disable() task, ntry = cPickle.loads(decompress(task_data)) Accumulator.clear() result = task.run(ntry) accUpdate = Accumulator.values() MutableDict.flush() if marshalable(result): try: flag, data = 0, marshal.dumps(result) except Exception, e: flag, data = 1, cPickle.dumps(result, -1) else: flag, data = 1, cPickle.dumps(result, -1) data = compress(data) if len(data) > TASK_RESULT_LIMIT: path = LocalFileShuffle.getOutputFile(0, ntry, task.id, len(data)) f = open(path, 'w') f.write(data) f.close() data = '/'.join([LocalFileShuffle.getServerUri()] + path.split('/')[-3:]) flag += 2 return mesos_pb2.TASK_FINISHED, cPickle.dumps( (Success(), (flag, data), accUpdate), -1) except FetchFailed, e: return mesos_pb2.TASK_FAILED, cPickle.dumps((e, None, None), -1) except:
def write_buf(stream, buf, is_marshal): buf = compress(buf) size = len(buf) stream.write(pack_header(size, is_marshal, True)) stream.write(buf) return size + 4
task, ntry = cPickle.loads(decompress(task_data)) setproctitle('dpark worker %s: run task %s' % (Script, task)) Accumulator.clear() result = task.run(ntry) accUpdate = Accumulator.values() if marshalable(result): try: flag, data = 0, marshal.dumps(result) except Exception, e: flag, data = 1, cPickle.dumps(result, -1) else: flag, data = 1, cPickle.dumps(result, -1) data = compress(data) if len(data) > TASK_RESULT_LIMIT: path = LocalFileShuffle.getOutputFile(0, ntry, task.id, len(data)) f = open(path, 'w') f.write(data) f.close() data = '/'.join([LocalFileShuffle.getServerUri()] + path.split('/')[-3:]) flag += 2 return mesos_pb2.TASK_FINISHED, cPickle.dumps((Success(), (flag, data), accUpdate), -1) except FetchFailed, e: return mesos_pb2.TASK_FAILED, cPickle.dumps((e, None, None), -1) except : import traceback msg = traceback.format_exc()