def run_task(task_data): try: gc.disable() task, ntry = cPickle.loads(decompress(task_data)) setproctitle('dpark worker %s: run task %s' % (Script, task)) Accumulator.clear() result = task.run(ntry) accUpdate = Accumulator.values() if marshalable(result): flag, data = 0, marshal.dumps(result) else: flag, data = 1, cPickle.dumps(result, -1) data = compress(data) if len(data) > TASK_RESULT_LIMIT: workdir = env.get('WORKDIR') name = 'task_%s_%s.result' % (task.id, ntry) path = os.path.join(workdir, name) f = open(path, 'w') f.write(data) f.close() data = LocalFileShuffle.getServerUri() + '/' + name flag += 2 return mesos_pb2.TASK_FINISHED, cPickle.dumps((task.id, Success(), (flag, data), accUpdate), -1) except Exception, e: import traceback msg = traceback.format_exc() return mesos_pb2.TASK_FAILED, cPickle.dumps((task.id, OtherFailure(msg), None, None), -1)
def save(self, path, items): # TODO: purge old cache with atomic_file(path) as f: c = 0 f.write(struct.pack("I", c)) # check is marshalable and compatible with broadcast can_marshal = marshalable(items) for v in items: if can_marshal: try: r = 0, marshal.dumps(v) except Exception: r = 1, cPickle.dumps(v, -1) can_marshal = False else: r = 1, cPickle.dumps(v, -1) f.write(msgpack.packb(r)) c += 1 yield v bytes = f.tell() if bytes > 10 << 20: logger.warning("cached result is %dMB (larger than 10MB)", bytes >> 20) # count f.seek(0) f.write(struct.pack("I", c))
def run(self, attempId): logger.debug("shuffling %d of %s", self.partition, self.rdd) for i, bucket in self.rdd._prepare_shuffle(self.split, self.partitioner, self.aggregator): try: if marshalable(bucket): flag, d = 'm', marshal.dumps(bucket) else: flag, d = 'p', cPickle.dumps(bucket, -1) except ValueError: flag, d = 'p', cPickle.dumps(bucket, -1) cd = compress(d) for tried in range(1, 4): try: path = LocalFileShuffle.getOutputFile( self.shuffleId, self.partition, i, len(cd) * tried) tpath = path + ".%s.%s" % (socket.gethostname(), os.getpid()) f = open(tpath, 'wb', 1024 * 4096) f.write(flag + struct.pack("I", 5 + len(cd))) f.write(cd) f.close() os.rename(tpath, path) break except IOError, e: logger.warning("write %s failed: %s, try again (%d)", path, e, tried) try: os.remove(tpath) except OSError: pass else: raise
def run_without_sorted(self, it): for i, bucket in it: try: if marshalable(bucket): flag, d = b'm', marshal.dumps(bucket) else: flag, d = b'p', six.moves.cPickle.dumps(bucket, -1) except ValueError: flag, d = b'p', six.moves.cPickle.dumps(bucket, -1) cd = compress(d) env.task_stats.bytes_shuffle_write += len(cd) for tried in range(1, 4): try: path = LocalFileShuffle.getOutputFile( self.shuffleId, self.partition, i, len(cd) * tried) with atomic_file(path, bufsize=1024 * 4096) as f: f.write(flag + struct.pack("I", 5 + len(cd))) f.write(cd) break except IOError as e: logger.warning("write %s failed: %s, try again (%d)", path, e, tried) else: raise e return LocalFileShuffle.getServerUri()
def run_task(task_data): try: gc.disable() task, ntry = cPickle.loads(decompress(task_data)) setproctitle('dpark worker %s: run task %s' % (Script, task)) Accumulator.clear() result = task.run(ntry) accUpdate = Accumulator.values() if marshalable(result): flag, data = 0, marshal.dumps(result) else: flag, data = 1, cPickle.dumps(result, -1) data = compress(data) if len(data) > TASK_RESULT_LIMIT: path = LocalFileShuffle.getOutputFile(0, ntry, task.id, len(data)) f = open(path, 'w') f.write(data) f.close() data = '/'.join([LocalFileShuffle.getServerUri()] + path.split('/')[-3:]) flag += 2 return mesos_pb2.TASK_FINISHED, cPickle.dumps((Success(), (flag, data), accUpdate), -1) except FetchFailed, e: return mesos_pb2.TASK_FAILED, cPickle.dumps((e, None, None), -1)
def save(self, path, items): # TODO: purge old cache with atomic_file(path) as f: c = 0 f.write(struct.pack("I", c)) # check is marshalable and compatible with broadcast can_marshal = marshalable(items) for v in items: if can_marshal: try: r = 0, marshal.dumps(v) except Exception: r = 1, six.moves.cPickle.dumps(v, -1) can_marshal = False else: r = 1, six.moves.cPickle.dumps(v, -1) f.write(msgpack.packb(r)) c += 1 yield v bytes = f.tell() if bytes > 10 << 20: logger.warning("cached result is %dMB (larger than 10MB)", bytes >> 20) # count f.seek(0) f.write(struct.pack("I", c))
def run(self, attempId): logger.debug("shuffling %d of %s", self.partition, self.rdd) for i, bucket in self._prepare_shuffle(self.rdd): try: if marshalable(bucket): flag, d = 'm', marshal.dumps(bucket) else: flag, d = 'p', cPickle.dumps(bucket, -1) except ValueError: flag, d = 'p', cPickle.dumps(bucket, -1) cd = compress(d) for tried in range(1, 4): try: path = LocalFileShuffle.getOutputFile( self.shuffleId, self.partition, i, len(cd) * tried) with atomic_file(path, bufsize=1024 * 4096) as f: f.write(flag + struct.pack("I", 5 + len(cd))) f.write(cd) break except IOError, e: logger.warning("write %s failed: %s, try again (%d)", path, e, tried) else: raise
def run_task(task, ntry): try: setproctitle('dpark worker %s: run task %s' % (Script, task)) Accumulator.clear() gc.disable() result = task.run(ntry) accUpdate = Accumulator.values() if marshalable(result): flag, data = 0, marshal.dumps(result) else: flag, data = 1, cPickle.dumps(result, -1) data = compress(data) if len(data) > TASK_RESULT_LIMIT: workdir = env.get('WORKDIR') name = 'task_%s_%s.result' % (task.id, ntry) path = os.path.join(workdir, name) f = open(path, 'w') f.write(data) f.close() data = LocalFileShuffle.getServerUri() + '/' + name flag += 2 return mesos_pb2.TASK_FINISHED, cPickle.dumps( (task.id, Success(), (flag, data), accUpdate), -1) except Exception, e: import traceback msg = traceback.format_exc() return mesos_pb2.TASK_FAILED, cPickle.dumps( (task.id, OtherFailure(msg), None, None), -1)
def run(self, attempId): logger.debug("shuffling %d of %s", self.partition, self.rdd) for i, bucket in self.rdd._prepare_shuffle(self.split, self.partitioner, self.aggregator): try: if marshalable(bucket): flag, d = 'm', marshal.dumps(bucket) else: flag, d = 'p', cPickle.dumps(bucket, -1) except ValueError: flag, d = 'p', cPickle.dumps(bucket, -1) cd = compress(d) for tried in range(1, 4): try: path = LocalFileShuffle.getOutputFile(self.shuffleId, self.partition, i, len(cd) * tried) tpath = path + ".%s.%s" % (socket.gethostname(), os.getpid()) f = open(tpath, 'wb', 1024*4096) f.write(flag + struct.pack("I", 5 + len(cd))) f.write(cd) f.close() os.rename(tpath, path) break except IOError, e: logger.warning("write %s failed: %s, try again (%d)", path, e, tried) try: os.remove(tpath) except OSError: pass else: raise
def _prepare(self, items): items = list(items) try: if marshalable(items): is_marshal, d = True, marshal.dumps(items) else: is_marshal, d = False, six.moves.cPickle.dumps(items, -1) except ValueError: is_marshal, d = False, six.moves.cPickle.dumps(items, -1) data = compress(d) size = len(data) return (is_marshal, data), size
def _prepare(self, items): items = list(items) try: if marshalable(items): is_marshal, d = True, marshal.dumps(items) else: is_marshal, d = False, cPickle.dumps(items, -1) except ValueError: is_marshal, d = False, cPickle.dumps(items, -1) data = compress(d) size = len(data) return (is_marshal, data), size
def run_task(task_data): try: gc.disable() task, task_try_id = loads(decompress(task_data)) ttid = TTID(task_try_id) Accumulator.clear() result = task.run(ttid.ttid) env.task_stats.bytes_max_rss = resource.getrusage( resource.RUSAGE_SELF).ru_maxrss * 1024 accUpdate = Accumulator.values() MutableDict.flush() if marshalable(result): try: flag, data = 0, marshal.dumps(result) except Exception: flag, data = 1, cPickle.dumps(result, -1) else: flag, data = 1, cPickle.dumps(result, -1) data = compress(data) if len(data) > TASK_RESULT_LIMIT: # shuffle_id start from 1 swd = ShuffleWorkDir(0, task.id, ttid.task_try) tmppath = swd.alloc_tmp(len(data)) with open(tmppath, 'wb') as f: f.write(data) f.close() path = swd.export(tmppath) data = '/'.join([env.server_uri] + path.split('/')[-3:]) flag += 2 return TaskState.finished, cPickle.dumps( ((flag, data), accUpdate, env.task_stats), -1) except FetchFailed as e: return TaskState.failed, TaskEndReason.fetch_failed, str( e), cPickle.dumps(e) except Exception as e: import traceback msg = traceback.format_exc() ename = e.__class__.__name__ fatal_exceptions = (DparkUserFatalError, ArithmeticError, ValueError, LookupError, SyntaxError, TypeError, AssertionError) prefix = "FATAL" if isinstance(e, fatal_exceptions) else "FAILED" return TaskState.failed, '{}_EXCEPTION_{}'.format( prefix, ename), msg, cPickle.dumps(e) finally: gc.collect() gc.enable()
def run_task(task_data): try: gc.disable() task, task_try_id = loads(decompress(task_data)) ttid = TTID(task_try_id) Accumulator.clear() result = task.run(ttid.ttid) env.task_stats.bytes_max_rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss * 1024 accUpdate = Accumulator.values() MutableDict.flush() if marshalable(result): try: flag, data = 0, marshal.dumps(result) except Exception: flag, data = 1, cPickle.dumps(result, -1) else: flag, data = 1, cPickle.dumps(result, -1) data = compress(data) if len(data) > TASK_RESULT_LIMIT: # shuffle_id start from 1 swd = ShuffleWorkDir(0, task.id, ttid.task_try) tmppath = swd.alloc_tmp(len(data)) with open(tmppath, 'wb') as f: f.write(data) f.close() path = swd.export(tmppath) data = '/'.join( [env.server_uri] + path.split('/')[-3:] ) flag += 2 return TaskState.finished, cPickle.dumps(((flag, data), accUpdate, env.task_stats), -1) except FetchFailed as e: return TaskState.failed, TaskEndReason.fetch_failed, str(e), cPickle.dumps(e) except Exception as e: import traceback msg = traceback.format_exc() ename = e.__class__.__name__ fatal_exceptions = (DparkUserFatalError, ArithmeticError, ValueError, LookupError, SyntaxError, TypeError, AssertionError) prefix = "FATAL" if isinstance(e, fatal_exceptions) else "FAILED" return TaskState.failed, '{}_EXCEPTION_{}'.format(prefix, ename), msg, cPickle.dumps(e) finally: gc.collect() gc.enable()
def blockifyObject(self, obj): try: if marshalable(obj): buf = '0'+marshal.dumps(obj) else: buf = '1'+cPickle.dumps(obj, -1) except Exception: buf = '1'+cPickle.dumps(obj, -1) N = self.BlockSize blockNum = len(buf) / N + 1 val = [Block(i, compress(buf[i*N:i*N+N])) for i in range(blockNum)] return val, len(buf)
def run(self, attempId): logger.debug("shuffling %d of %s", self.partition, self.rdd) numOutputSplits = self.partitioner.numPartitions getPartition = self.partitioner.getPartition mergeValue = self.aggregator.mergeValue createCombiner = self.aggregator.createCombiner buckets = [{} for i in range(numOutputSplits)] for k, v in self.rdd.iterator(self.split): bucketId = getPartition(k) bucket = buckets[bucketId] r = bucket.get(k, None) if r is not None: bucket[k] = mergeValue(r, v) else: bucket[k] = createCombiner(v) for i in range(numOutputSplits): try: if marshalable(buckets[i]): flag, d = 'm', marshal.dumps(buckets[i]) else: flag, d = 'p', cPickle.dumps(buckets[i], -1) except ValueError: flag, d = 'p', cPickle.dumps(buckets[i], -1) cd = compress(d) for tried in range(1, 4): try: path = LocalFileShuffle.getOutputFile( self.shuffleId, self.partition, i, len(cd) * tried) tpath = path + ".%s.%s" % (socket.gethostname(), os.getpid()) f = open(tpath, 'wb', 1024 * 4096) f.write(flag + struct.pack("I", 5 + len(cd))) f.write(cd) f.close() os.rename(tpath, path) break except IOError, e: logger.warning("write %s failed: %s, try again (%d)", path, e, tried) try: os.remove(tpath) except OSError: pass else: raise
def run_task(task_data): try: gc.disable() task, ntry = cPickle.loads(decompress(task_data)) Accumulator.clear() result = task.run(ntry) accUpdate = Accumulator.values() MutableDict.flush() if marshalable(result): try: flag, data = 0, marshal.dumps(result) except Exception, e: flag, data = 1, cPickle.dumps(result, -1) else:
def blockifyObject(self, obj): try: if marshalable(obj): buf = '0' + marshal.dumps(obj) else: buf = '1' + cPickle.dumps(obj, -1) except Exception: buf = '1' + cPickle.dumps(obj, -1) N = self.BlockSize blockNum = len(buf) / N + 1 val = [ Block(i, compress(buf[i * N:i * N + N])) for i in range(blockNum) ] return val, len(buf)
def run_task(task_data): try: gc.disable() task, ntry = cPickle.loads(decompress(task_data)) setproctitle('dpark worker %s: run task %s' % (Script, task)) Accumulator.clear() result = task.run(ntry) accUpdate = Accumulator.values() if marshalable(result): try: flag, data = 0, marshal.dumps(result) except Exception, e: flag, data = 1, cPickle.dumps(result, -1) else:
def run_task(task_data): try: gc.disable() task, task_try_id = loads(decompress(task_data)) ttid = TTID(task_try_id) Accumulator.clear() result = task.run(ttid.ttid) env.task_stats.bytes_max_rss = resource.getrusage( resource.RUSAGE_SELF).ru_maxrss * 1024 accUpdate = Accumulator.values() MutableDict.flush() if marshalable(result): try: flag, data = 0, marshal.dumps(result) except Exception: flag, data = 1, cPickle.dumps(result, -1) else: flag, data = 1, cPickle.dumps(result, -1) data = compress(data) if len(data) > TASK_RESULT_LIMIT: path = LocalFileShuffle.getOutputFile(0, task.id, ttid.task_try, len(data)) f = open(path, 'wb') f.write(data) f.close() data = '/'.join([LocalFileShuffle.getServerUri()] + path.split('/')[-3:]) flag += 2 return TaskState.finished, cPickle.dumps( ((flag, data), accUpdate, env.task_stats), -1) except FetchFailed as e: return TaskState.failed, TaskEndReason.fetch_failed, str( e), cPickle.dumps(e) except Exception as e: import traceback msg = traceback.format_exc() ename = e.__class__.__name__ return TaskState.failed, 'FAILED_EXCEPTION_{}'.format( ename), msg, cPickle.dumps(e) finally: gc.collect() gc.enable()
def to_blocks(self, uuid, obj): try: if marshalable(obj): buf = marshal.dumps((uuid, obj)) type = MARSHAL_TYPE else: buf = cPickle.dumps((uuid, obj), -1) type = PICKLE_TYPE except Exception: buf = cPickle.dumps((uuid, obj), -1) type = PICKLE_TYPE checksum = binascii.crc32(buf) & 0xFFFF stream = struct.pack(self.header_fmt, type, checksum) + buf blockNum = (len(stream) + (BLOCK_SIZE - 1)) >> BLOCK_SHIFT blocks = [compress(stream[i*BLOCK_SIZE:(i+1)*BLOCK_SIZE]) for i in range(blockNum)] return blocks
def run_task(task_data): try: gc.disable() task, task_try_id = loads(decompress(task_data)) ttid = TTID(task_try_id) Accumulator.clear() result = task.run(ttid.ttid) env.task_stats.bytes_max_rss = resource.getrusage( resource.RUSAGE_SELF).ru_maxrss * 1024 accUpdate = Accumulator.values() MutableDict.flush() if marshalable(result): try: flag, data = 0, marshal.dumps(result) except Exception: flag, data = 1, cPickle.dumps(result, -1) else: flag, data = 1, cPickle.dumps(result, -1) data = compress(data) if len(data) > TASK_RESULT_LIMIT: path = LocalFileShuffle.getOutputFile(0, task.id, ttid.task_try, len(data)) f = open(path, 'wb') f.write(data) f.close() data = '/'.join([LocalFileShuffle.getServerUri()] + path.split('/')[-3:]) flag += 2 return 'TASK_FINISHED', cPickle.dumps( (Success(), (flag, data), accUpdate, env.task_stats), -1) except FetchFailed as e: return 'TASK_FAILED', cPickle.dumps((e, None, None, None), -1) except: import traceback msg = traceback.format_exc() return 'TASK_FAILED', cPickle.dumps( (OtherFailure(msg), None, None, None), -1) finally: gc.collect() gc.enable()
def run_task(task_data): try: gc.disable() task, ntry = loads(decompress(task_data)) Accumulator.clear() result = task.run(ntry) accUpdate = Accumulator.values() MutableDict.flush() if marshalable(result): try: flag, data = 0, marshal.dumps(result) except Exception as e: flag, data = 1, cPickle.dumps(result, -1) else: flag, data = 1, cPickle.dumps(result, -1) data = compress(data) if len(data) > TASK_RESULT_LIMIT: path = LocalFileShuffle.getOutputFile(0, ntry, task.id, len(data)) f = open(path, 'w') f.write(data) f.close() data = '/'.join( [LocalFileShuffle.getServerUri()] + path.split('/')[-3:] ) flag += 2 return 'TASK_FINISHED', cPickle.dumps( (Success(), (flag, data), accUpdate), -1) except FetchFailed as e: return 'TASK_FAILED', cPickle.dumps((e, None, None), -1) except: import traceback msg = traceback.format_exc() return 'TASK_FAILED', cPickle.dumps( (OtherFailure(msg), None, None), -1) finally: close_mfs() gc.collect() gc.enable()
def run(self, attempId): logger.debug("shuffling %d of %s", self.partition, self.rdd) numOutputSplits = self.partitioner.numPartitions getPartition = self.partitioner.getPartition mergeValue = self.aggregator.mergeValue createCombiner = self.aggregator.createCombiner buckets = [{} for i in range(numOutputSplits)] for k,v in self.rdd.iterator(self.split): bucketId = getPartition(k) bucket = buckets[bucketId] r = bucket.get(k, None) if r is not None: bucket[k] = mergeValue(r, v) else: bucket[k] = createCombiner(v) for i in range(numOutputSplits): try: if marshalable(buckets[i]): flag, d = 'm', marshal.dumps(buckets[i]) else: flag, d = 'p', cPickle.dumps(buckets[i], -1) except ValueError: flag, d = 'p', cPickle.dumps(buckets[i], -1) cd = compress(d) for tried in range(1, 4): try: path = LocalFileShuffle.getOutputFile(self.shuffleId, self.partition, i, len(cd) * tried) tpath = path + ".%s.%s" % (socket.gethostname(), os.getpid()) f = open(tpath, 'wb', 1024*4096) f.write(flag + struct.pack("I", 5 + len(cd))) f.write(cd) f.close() os.rename(tpath, path) break except IOError, e: logger.warning("write %s failed: %s, try again (%d)", path, e, tried) try: os.remove(tpath) except OSError: pass else: raise
def to_blocks(self, uuid, obj): try: if marshalable(obj): buf = marshal.dumps((uuid, obj)) type_ = MARSHAL_TYPE else: buf = cPickle.dumps((uuid, obj), -1) type_ = PICKLE_TYPE except Exception: buf = cPickle.dumps((uuid, obj), -1) type_ = PICKLE_TYPE checksum = binascii.crc32(buf) & 0xFFFF stream = struct.pack(self.header_fmt, type_, checksum) + buf blockNum = (len(stream) + (BLOCK_SIZE - 1)) >> BLOCK_SHIFT blocks = [compress(stream[i * BLOCK_SIZE:(i + 1) * BLOCK_SIZE]) for i in range(blockNum)] sizes = [len(block) for block in blocks] size_l = accumulate_list(sizes) block_map = list(izip(size_l[:-1], sizes)) return blocks, size_l[-1], block_map
def run(self, attempId): logger.debug("shuffling %d of %s", self.partition, self.rdd) for i, bucket in self._prepare_shuffle(self.rdd): try: if marshalable(bucket): flag, d = 'm', marshal.dumps(bucket) else: flag, d = 'p', cPickle.dumps(bucket, -1) except ValueError: flag, d = 'p', cPickle.dumps(bucket, -1) cd = compress(d) for tried in range(1, 4): try: path = LocalFileShuffle.getOutputFile(self.shuffleId, self.partition, i, len(cd) * tried) with atomic_file(path, bufsize=1024*4096) as f: f.write(flag + struct.pack("I", 5 + len(cd))) f.write(cd) break except IOError, e: logger.warning("write %s failed: %s, try again (%d)", path, e, tried) else: raise