コード例 #1
0
ファイル: task.py プロジェクト: zhaochl/dpark
    def run(self, attempId):
        logger.debug("shuffling %d of %s", self.partition, self.rdd)
        for i, bucket in self._prepare_shuffle(self.rdd):
            try:
                if marshalable(bucket):
                    flag, d = 'm', marshal.dumps(bucket)
                else:
                    flag, d = 'p', cPickle.dumps(bucket, -1)
            except ValueError:
                flag, d = 'p', cPickle.dumps(bucket, -1)
            cd = compress(d)
            for tried in range(1, 4):
                try:
                    path = LocalFileShuffle.getOutputFile(
                        self.shuffleId, self.partition, i,
                        len(cd) * tried)
                    with atomic_file(path, bufsize=1024 * 4096) as f:
                        f.write(flag + struct.pack("I", 5 + len(cd)))
                        f.write(cd)

                    break
                except IOError, e:
                    logger.warning("write %s failed: %s, try again (%d)", path,
                                   e, tried)
            else:
                raise
コード例 #2
0
ファイル: task.py プロジェクト: foreverjay/dpark
 def run(self, attempId):
     logger.debug("shuffling %d of %s", self.partition, self.rdd)
     for i, bucket in self.rdd._prepare_shuffle(self.split, self.partitioner, self.aggregator):
         try:
             if marshalable(bucket):
                 flag, d = 'm', marshal.dumps(bucket)
             else:
                 flag, d = 'p', cPickle.dumps(bucket, -1)
         except ValueError:
             flag, d = 'p', cPickle.dumps(bucket, -1)
         cd = compress(d)
         for tried in range(1, 4):
             try:
                 path = LocalFileShuffle.getOutputFile(self.shuffleId, self.partition, i, len(cd) * tried)
                 tpath = path + ".%s.%s" % (socket.gethostname(), os.getpid())
                 f = open(tpath, 'wb', 1024*4096)
                 f.write(flag + struct.pack("I", 5 + len(cd)))
                 f.write(cd)
                 f.close()
                 os.rename(tpath, path)
                 break
             except IOError, e:
                 logger.warning("write %s failed: %s, try again (%d)", path, e, tried)
                 try: os.remove(tpath)
                 except OSError: pass
         else:
             raise
コード例 #3
0
ファイル: task.py プロジェクト: yangvipguang/dpark
 def run(self, attempId):
     logger.debug("shuffling %d of %s", self.partition, self.rdd)
     for i, bucket in self.rdd._prepare_shuffle(self.split,
                                                self.partitioner,
                                                self.aggregator):
         try:
             if marshalable(bucket):
                 flag, d = 'm', marshal.dumps(bucket)
             else:
                 flag, d = 'p', cPickle.dumps(bucket, -1)
         except ValueError:
             flag, d = 'p', cPickle.dumps(bucket, -1)
         cd = compress(d)
         for tried in range(1, 4):
             try:
                 path = LocalFileShuffle.getOutputFile(
                     self.shuffleId, self.partition, i,
                     len(cd) * tried)
                 tpath = path + ".%s.%s" % (socket.gethostname(),
                                            os.getpid())
                 f = open(tpath, 'wb', 1024 * 4096)
                 f.write(flag + struct.pack("I", 5 + len(cd)))
                 f.write(cd)
                 f.close()
                 os.rename(tpath, path)
                 break
             except IOError, e:
                 logger.warning("write %s failed: %s, try again (%d)", path,
                                e, tried)
                 try:
                     os.remove(tpath)
                 except OSError:
                     pass
         else:
             raise
コード例 #4
0
ファイル: task.py プロジェクト: weiqiangzheng/dpark
    def run_without_sorted(self, it):
        for i, bucket in it:
            try:
                if marshalable(bucket):
                    flag, d = b'm', marshal.dumps(bucket)
                else:
                    flag, d = b'p', six.moves.cPickle.dumps(bucket, -1)
            except ValueError:
                flag, d = b'p', six.moves.cPickle.dumps(bucket, -1)
            cd = compress(d)
            env.task_stats.bytes_shuffle_write += len(cd)
            for tried in range(1, 4):
                try:
                    path = LocalFileShuffle.getOutputFile(
                        self.shuffleId, self.partition, i,
                        len(cd) * tried)
                    with atomic_file(path, bufsize=1024 * 4096) as f:
                        f.write(flag + struct.pack("I", 5 + len(cd)))
                        f.write(cd)

                    break
                except IOError as e:
                    logger.warning("write %s failed: %s, try again (%d)", path,
                                   e, tried)
            else:
                raise e

        return LocalFileShuffle.getServerUri()
コード例 #5
0
def run_task(task_data):
    try:
        gc.disable()
        task, ntry = cPickle.loads(decompress(task_data))
        setproctitle('dpark worker %s: run task %s' % (Script, task))

        Accumulator.clear()
        result = task.run(ntry)
        accUpdate = Accumulator.values()

        if marshalable(result):
            flag, data = 0, marshal.dumps(result)
        else:
            flag, data = 1, cPickle.dumps(result, -1)
        data = compress(data)

        if len(data) > TASK_RESULT_LIMIT:
            path = LocalFileShuffle.getOutputFile(0, ntry, task.id, len(data))
            f = open(path, 'w')
            f.write(data)
            f.close()
            data = '/'.join([LocalFileShuffle.getServerUri()] + path.split('/')[-3:])
            flag += 2

        return mesos_pb2.TASK_FINISHED, cPickle.dumps((Success(), (flag, data), accUpdate), -1)
    except FetchFailed, e:
        return mesos_pb2.TASK_FAILED, cPickle.dumps((e, None, None), -1)
コード例 #6
0
ファイル: executor.py プロジェクト: rohithreddy/dpark
def run_task(task_data):
    try:
        gc.disable()
        task, task_try_id = loads(decompress(task_data))
        ttid = TTID(task_try_id)
        Accumulator.clear()
        result = task.run(ttid.ttid)
        env.task_stats.bytes_max_rss = resource.getrusage(
            resource.RUSAGE_SELF).ru_maxrss * 1024
        accUpdate = Accumulator.values()
        MutableDict.flush()

        if marshalable(result):
            try:
                flag, data = 0, marshal.dumps(result)
            except Exception:
                flag, data = 1, cPickle.dumps(result, -1)

        else:
            flag, data = 1, cPickle.dumps(result, -1)
        data = compress(data)

        if len(data) > TASK_RESULT_LIMIT:
            path = LocalFileShuffle.getOutputFile(0, task.id, ttid.task_try,
                                                  len(data))
            f = open(path, 'wb')
            f.write(data)
            f.close()
            data = '/'.join([LocalFileShuffle.getServerUri()] +
                            path.split('/')[-3:])
            flag += 2

        return TaskState.finished, cPickle.dumps(
            ((flag, data), accUpdate, env.task_stats), -1)
    except FetchFailed as e:
        return TaskState.failed, TaskEndReason.fetch_failed, str(
            e), cPickle.dumps(e)
    except Exception as e:
        import traceback
        msg = traceback.format_exc()
        ename = e.__class__.__name__
        fatal_exceptions = (DparkUserFatalError, ArithmeticError, ValueError,
                            LookupError, SyntaxError, TypeError,
                            AssertionError)
        prefix = "FATAL" if isinstance(e, fatal_exceptions) else "FAILED"
        return TaskState.failed, '{}_EXCEPTION_{}'.format(
            prefix, ename), msg, cPickle.dumps(e)
    finally:
        gc.collect()
        gc.enable()
コード例 #7
0
ファイル: task.py プロジェクト: zofuthan/dpark
    def run(self, attempId):
        logger.debug("shuffling %d of %s", self.partition, self.rdd)
        numOutputSplits = self.partitioner.numPartitions
        getPartition = self.partitioner.getPartition
        mergeValue = self.aggregator.mergeValue
        createCombiner = self.aggregator.createCombiner

        buckets = [{} for i in range(numOutputSplits)]
        for k, v in self.rdd.iterator(self.split):
            bucketId = getPartition(k)
            bucket = buckets[bucketId]
            r = bucket.get(k, None)
            if r is not None:
                bucket[k] = mergeValue(r, v)
            else:
                bucket[k] = createCombiner(v)

        for i in range(numOutputSplits):
            try:
                if marshalable(buckets[i]):
                    flag, d = 'm', marshal.dumps(buckets[i])
                else:
                    flag, d = 'p', cPickle.dumps(buckets[i], -1)
            except ValueError:
                flag, d = 'p', cPickle.dumps(buckets[i], -1)
            cd = compress(d)
            for tried in range(1, 4):
                try:
                    path = LocalFileShuffle.getOutputFile(
                        self.shuffleId, self.partition, i,
                        len(cd) * tried)
                    tpath = path + ".%s.%s" % (socket.gethostname(),
                                               os.getpid())
                    f = open(tpath, 'wb', 1024 * 4096)
                    f.write(flag + struct.pack("I", 5 + len(cd)))
                    f.write(cd)
                    f.close()
                    os.rename(tpath, path)
                    break
                except IOError, e:
                    logger.warning("write %s failed: %s, try again (%d)", path,
                                   e, tried)
                    try:
                        os.remove(tpath)
                    except OSError:
                        pass
            else:
                raise
コード例 #8
0
ファイル: executor.py プロジェクト: rohithreddy/dpark
def run_task(task_data):
    try:
        gc.disable()
        task, task_try_id = loads(decompress(task_data))
        ttid = TTID(task_try_id)
        Accumulator.clear()
        result = task.run(ttid.ttid)
        env.task_stats.bytes_max_rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss * 1024
        accUpdate = Accumulator.values()
        MutableDict.flush()

        if marshalable(result):
            try:
                flag, data = 0, marshal.dumps(result)
            except Exception:
                flag, data = 1, cPickle.dumps(result, -1)

        else:
            flag, data = 1, cPickle.dumps(result, -1)
        data = compress(data)

        if len(data) > TASK_RESULT_LIMIT:
            path = LocalFileShuffle.getOutputFile(0, task.id, ttid.task_try, len(data))
            f = open(path, 'wb')
            f.write(data)
            f.close()
            data = '/'.join(
                [LocalFileShuffle.getServerUri()] + path.split('/')[-3:]
            )
            flag += 2

        return TaskState.finished, cPickle.dumps(((flag, data), accUpdate, env.task_stats), -1)
    except FetchFailed as e:
        return TaskState.failed, TaskEndReason.fetch_failed, str(e), cPickle.dumps(e)
    except Exception as e:
        import traceback
        msg = traceback.format_exc()
        ename = e.__class__.__name__
        fatal_exceptions = (DparkUserFatalError, ArithmeticError,
                            ValueError, LookupError, SyntaxError,
                            TypeError, AssertionError)
        prefix = "FATAL" if isinstance(e, fatal_exceptions) else "FAILED"
        return TaskState.failed, '{}_EXCEPTION_{}'.format(prefix, ename), msg, cPickle.dumps(e)
    finally:
        gc.collect()
        gc.enable()
コード例 #9
0
ファイル: executor.py プロジェクト: posens/dpark
def run_task(task_data):
    try:
        gc.disable()
        task, task_try_id = loads(decompress(task_data))
        ttid = TTID(task_try_id)
        Accumulator.clear()
        result = task.run(ttid.ttid)
        env.task_stats.bytes_max_rss = resource.getrusage(
            resource.RUSAGE_SELF).ru_maxrss * 1024
        accUpdate = Accumulator.values()
        MutableDict.flush()

        if marshalable(result):
            try:
                flag, data = 0, marshal.dumps(result)
            except Exception:
                flag, data = 1, cPickle.dumps(result, -1)

        else:
            flag, data = 1, cPickle.dumps(result, -1)
        data = compress(data)

        if len(data) > TASK_RESULT_LIMIT:
            path = LocalFileShuffle.getOutputFile(0, task.id, ttid.task_try,
                                                  len(data))
            f = open(path, 'wb')
            f.write(data)
            f.close()
            data = '/'.join([LocalFileShuffle.getServerUri()] +
                            path.split('/')[-3:])
            flag += 2

        return 'TASK_FINISHED', cPickle.dumps(
            (Success(), (flag, data), accUpdate, env.task_stats), -1)
    except FetchFailed as e:
        return 'TASK_FAILED', cPickle.dumps((e, None, None, None), -1)
    except:
        import traceback
        msg = traceback.format_exc()
        return 'TASK_FAILED', cPickle.dumps(
            (OtherFailure(msg), None, None, None), -1)
    finally:
        gc.collect()
        gc.enable()
コード例 #10
0
ファイル: task.py プロジェクト: weiqiangzheng/dpark
 def run_with_sorted(self, it):
     serializer = GroupByAutoBatchedSerializer(
     ) if self.iter_values else AutoBatchedSerializer()
     for i, bucket in it:
         for tried in range(1, 4):
             try:
                 path = LocalFileShuffle.getOutputFile(
                     self.shuffleId, self.partition, i)
                 with atomic_file(path, bufsize=1024 * 4096) as f:
                     items = sorted(bucket.items(), key=lambda x: x[0])
                     serializer.dump_stream(items, f)
                     env.task_stats.bytes_shuffle_write += f.tell()
                 break
             except IOError as e:
                 logger.warning("write %s failed: %s, try again (%d)", path,
                                e, tried)
         else:
             raise e
     return LocalFileShuffle.getServerUri()
コード例 #11
0
ファイル: task.py プロジェクト: 1060460048/dpark
    def run(self, attempId):
        logger.debug("shuffling %d of %s", self.partition, self.rdd)
        numOutputSplits = self.partitioner.numPartitions
        getPartition = self.partitioner.getPartition
        mergeValue = self.aggregator.mergeValue
        createCombiner = self.aggregator.createCombiner

        buckets = [{} for i in range(numOutputSplits)]
        for k,v in self.rdd.iterator(self.split):
            bucketId = getPartition(k)
            bucket = buckets[bucketId]
            r = bucket.get(k, None)
            if r is not None:
                bucket[k] = mergeValue(r, v)
            else:
                bucket[k] = createCombiner(v)

        for i in range(numOutputSplits):
            try:
                if marshalable(buckets[i]):
                    flag, d = 'm', marshal.dumps(buckets[i])
                else:
                    flag, d = 'p', cPickle.dumps(buckets[i], -1)
            except ValueError:
                flag, d = 'p', cPickle.dumps(buckets[i], -1)
            cd = compress(d)
            for tried in range(1, 4):
                try:
                    path = LocalFileShuffle.getOutputFile(self.shuffleId, self.partition, i, len(cd) * tried)
                    tpath = path + ".%s.%s" % (socket.gethostname(), os.getpid())
                    f = open(tpath, 'wb', 1024*4096)
                    f.write(flag + struct.pack("I", 5 + len(cd)))
                    f.write(cd)
                    f.close()
                    os.rename(tpath, path)
                    break
                except IOError, e:
                    logger.warning("write %s failed: %s, try again (%d)", path, e, tried)
                    try: os.remove(tpath)
                    except OSError: pass
            else:
                raise
コード例 #12
0
ファイル: executor.py プロジェクト: wiesel2/dpark
def run_task(task_data):
    try:
        gc.disable()
        task, ntry = loads(decompress(task_data))
        Accumulator.clear()
        result = task.run(ntry)
        accUpdate = Accumulator.values()
        MutableDict.flush()

        if marshalable(result):
            try:
                flag, data = 0, marshal.dumps(result)
            except Exception as e:
                flag, data = 1, cPickle.dumps(result, -1)

        else:
            flag, data = 1, cPickle.dumps(result, -1)
        data = compress(data)

        if len(data) > TASK_RESULT_LIMIT:
            path = LocalFileShuffle.getOutputFile(0, ntry, task.id, len(data))
            f = open(path, 'w')
            f.write(data)
            f.close()
            data = '/'.join(
                [LocalFileShuffle.getServerUri()] + path.split('/')[-3:]
            )
            flag += 2

        return 'TASK_FINISHED', cPickle.dumps(
            (Success(), (flag, data), accUpdate), -1)
    except FetchFailed as e:
        return 'TASK_FAILED', cPickle.dumps((e, None, None), -1)
    except:
        import traceback
        msg = traceback.format_exc()
        return 'TASK_FAILED', cPickle.dumps(
            (OtherFailure(msg), None, None), -1)
    finally:
        close_mfs()
        gc.collect()
        gc.enable()
コード例 #13
0
ファイル: task.py プロジェクト: windreamer/dpark
    def run(self, attempId):
        logger.debug("shuffling %d of %s", self.partition, self.rdd)
        for i, bucket in self._prepare_shuffle(self.rdd):
            try:
                if marshalable(bucket):
                    flag, d = 'm', marshal.dumps(bucket)
                else:
                    flag, d = 'p', cPickle.dumps(bucket, -1)
            except ValueError:
                flag, d = 'p', cPickle.dumps(bucket, -1)
            cd = compress(d)
            for tried in range(1, 4):
                try:
                    path = LocalFileShuffle.getOutputFile(self.shuffleId, self.partition, i, len(cd) * tried)
                    with atomic_file(path, bufsize=1024*4096) as f:
                        f.write(flag + struct.pack("I", 5 + len(cd)))
                        f.write(cd)

                    break
                except IOError, e:
                    logger.warning("write %s failed: %s, try again (%d)", path, e, tried)
            else:
                raise
コード例 #14
0
ファイル: executor.py プロジェクト: zofuthan/dpark
        result = task.run(ntry)
        accUpdate = Accumulator.values()
        MutableDict.flush()

        if marshalable(result):
            try:
                flag, data = 0, marshal.dumps(result)
            except Exception, e:
                flag, data = 1, cPickle.dumps(result, -1)

        else:
            flag, data = 1, cPickle.dumps(result, -1)
        data = compress(data)

        if len(data) > TASK_RESULT_LIMIT:
            path = LocalFileShuffle.getOutputFile(0, ntry, task.id, len(data))
            f = open(path, 'w')
            f.write(data)
            f.close()
            data = '/'.join([LocalFileShuffle.getServerUri()] +
                            path.split('/')[-3:])
            flag += 2

        return mesos_pb2.TASK_FINISHED, cPickle.dumps(
            (Success(), (flag, data), accUpdate), -1)
    except FetchFailed, e:
        return mesos_pb2.TASK_FAILED, cPickle.dumps((e, None, None), -1)
    except:
        import traceback
        msg = traceback.format_exc()
        return mesos_pb2.TASK_FAILED, cPickle.dumps(
コード例 #15
0
 def _get_path(self, i, size):
     return LocalFileShuffle.getOutputFile(self.shuffle_id, self.map_id, i,
                                           size)
コード例 #16
0
ファイル: executor.py プロジェクト: FashtimeDotCom/dpark
        Accumulator.clear()
        result = task.run(ntry)
        accUpdate = Accumulator.values()

        if marshalable(result):
            try:
                flag, data = 0, marshal.dumps(result)
            except Exception, e:
                flag, data = 1, cPickle.dumps(result, -1)

        else:
            flag, data = 1, cPickle.dumps(result, -1)
        data = compress(data)

        if len(data) > TASK_RESULT_LIMIT:
            path = LocalFileShuffle.getOutputFile(0, ntry, task.id, len(data))
            f = open(path, 'w')
            f.write(data)
            f.close()
            data = '/'.join([LocalFileShuffle.getServerUri()] + path.split('/')[-3:])
            flag += 2

        return mesos_pb2.TASK_FINISHED, cPickle.dumps((Success(), (flag, data), accUpdate), -1)
    except FetchFailed, e:
        return mesos_pb2.TASK_FAILED, cPickle.dumps((e, None, None), -1)
    except :
        import traceback
        msg = traceback.format_exc()
        return mesos_pb2.TASK_FAILED, cPickle.dumps((OtherFailure(msg), None, None), -1)
    finally:
        setproctitle('dpark worker: idle')