Example #1
0
def run_task(task_data):
    try:
        gc.disable()
        task, ntry = cPickle.loads(decompress(task_data))
        setproctitle('dpark worker %s: run task %s' % (Script, task))

        Accumulator.clear()
        result = task.run(ntry)
        accUpdate = Accumulator.values()

        if marshalable(result):
            flag, data = 0, marshal.dumps(result)
        else:
            flag, data = 1, cPickle.dumps(result, -1)
        data = compress(data)

        if len(data) > TASK_RESULT_LIMIT:
            path = LocalFileShuffle.getOutputFile(0, ntry, task.id, len(data))
            f = open(path, 'w')
            f.write(data)
            f.close()
            data = '/'.join([LocalFileShuffle.getServerUri()] + path.split('/')[-3:])
            flag += 2

        return mesos_pb2.TASK_FINISHED, cPickle.dumps((Success(), (flag, data), accUpdate), -1)
    except FetchFailed, e:
        return mesos_pb2.TASK_FAILED, cPickle.dumps((e, None, None), -1)
Example #2
0
File: env.py Project: npc7/dpark
    def start(self, isMaster, environ={}, isLocal=False):
        if self.started:
            return
        logger.debug("start env in %s: %s %s", os.getpid(), isMaster, environ)
        self.isMaster = isMaster
        self.isLocal = isLocal
        if isMaster:
            roots = conf.DPARK_WORK_DIR
            if isinstance(roots, str):
                roots = roots.split(",")
            if isLocal:
                root = roots[0]  # for local mode
                if not os.path.exists(root):
                    os.mkdir(root, 0777)
                    os.chmod(root, 0777)  # because of umask

            name = "%s-%s-%d" % (time.strftime("%Y%m%d-%H%M%S"), socket.gethostname(), os.getpid())
            self.workdir = [os.path.join(root, name) for root in roots]
            for d in self.workdir:
                if not os.path.exists(d):
                    try:
                        os.makedirs(d)
                    except OSError:
                        pass
            self.environ["WORKDIR"] = self.workdir
            self.environ["COMPRESS"] = util.COMPRESS
        else:
            self.environ.update(environ)
            if self.environ["COMPRESS"] != util.COMPRESS:
                raise Exception("no %s available" % self.environ["COMPRESS"])

        self.ctx = zmq.Context()

        from dpark.cache import CacheTracker, LocalCacheTracker

        if isLocal:
            self.cacheTracker = LocalCacheTracker(isMaster)
        else:
            self.cacheTracker = CacheTracker(isMaster)

        from dpark.shuffle import LocalFileShuffle, MapOutputTracker, LocalMapOutputTracker

        LocalFileShuffle.initialize(isMaster)
        if isLocal:
            self.mapOutputTracker = LocalMapOutputTracker(isMaster)
        else:
            self.mapOutputTracker = MapOutputTracker(isMaster)
        from dpark.shuffle import SimpleShuffleFetcher, ParallelShuffleFetcher

        # self.shuffleFetcher = SimpleShuffleFetcher()
        self.shuffleFetcher = ParallelShuffleFetcher(2)

        from dpark.broadcast import TheBroadcast

        TheBroadcast.initialize(isMaster)

        self.started = True
        logger.debug("env started")
Example #3
0
    def start(self, isMaster, environ={}):
        if self.started:
            return
        logger.debug("start env in %s: %s %s", os.getpid(), isMaster, environ)
        self.isMaster = isMaster
        if isMaster:
            roots = conf.DPARK_WORK_DIR
            if isinstance(roots, str):
                roots = roots.split(',')
            name = '%s-%s-%d' % (time.strftime("%Y%m%d-%H%M%S"),
                                 socket.gethostname(), os.getpid())
            self.workdir = [os.path.join(root, name) for root in roots]
            try:
                for d in self.workdir:
                    util.mkdir_p(d)
            except OSError as e:
                if environ.get('is_local', False):
                    raise e

            self.environ['SERVER_URI'] = 'file://' + self.workdir[0]
            self.environ['WORKDIR'] = self.workdir
            self.environ['COMPRESS'] = util.COMPRESS
        else:
            self.environ.update(environ)
            if self.environ['COMPRESS'] != util.COMPRESS:
                raise Exception("no %s available" % self.environ['COMPRESS'])

        self.ctx = zmq.Context()

        from dpark.tracker import TrackerServer, TrackerClient
        if isMaster:
            self.trackerServer = TrackerServer()
            self.trackerServer.start()
            addr = self.trackerServer.addr
            env.register('TrackerAddr', addr)
        else:
            addr = env.get('TrackerAddr')

        self.trackerClient = TrackerClient(addr)

        from dpark.cache import CacheTracker
        self.cacheTracker = CacheTracker()

        from dpark.shuffle import LocalFileShuffle, MapOutputTracker
        LocalFileShuffle.initialize(isMaster)
        self.mapOutputTracker = MapOutputTracker()
        from dpark.shuffle import ParallelShuffleFetcher
        self.shuffleFetcher = ParallelShuffleFetcher(2)

        from dpark.broadcast import start_manager
        start_manager(isMaster)

        self.started = True
        logger.debug("env started")
Example #4
0
File: env.py Project: npc7/dpark
    def start(self, isMaster, environ={}, isLocal=False):
        if self.started:
            return
        logger.debug("start env in %s: %s %s", os.getpid(),
                isMaster, environ)
        self.isMaster = isMaster
        self.isLocal = isLocal
        if isMaster:
            roots = conf.DPARK_WORK_DIR
            if isinstance(roots, str):
                roots = roots.split(',')
            if isLocal:
                root = roots[0] # for local mode 
                if not os.path.exists(root):
                    os.mkdir(root, 0777)
                    os.chmod(root, 0777) # because of umask

            name = '%s-%s-%d' % (time.strftime("%Y%m%d-%H%M%S"),
                socket.gethostname(), os.getpid())
            self.workdir = [os.path.join(root, name) for root in roots]
            for d in self.workdir:
                if not os.path.exists(d):
                    try: os.makedirs(d)
                    except OSError: pass
            self.environ['WORKDIR'] = self.workdir
            self.environ['COMPRESS'] = util.COMPRESS
        else:
            self.environ.update(environ)
            if self.environ['COMPRESS'] != util.COMPRESS:
                raise Exception("no %s available" % self.environ['COMPRESS'])

        self.ctx = zmq.Context()

        from dpark.cache import CacheTracker, LocalCacheTracker
        if isLocal:
            self.cacheTracker = LocalCacheTracker(isMaster)
        else:
            self.cacheTracker = CacheTracker(isMaster)

        from dpark.shuffle import LocalFileShuffle, MapOutputTracker, LocalMapOutputTracker
        LocalFileShuffle.initialize(isMaster)
        if isLocal:
            self.mapOutputTracker = LocalMapOutputTracker(isMaster)
        else:
            self.mapOutputTracker = MapOutputTracker(isMaster)
        from dpark.shuffle import SimpleShuffleFetcher, ParallelShuffleFetcher
        #self.shuffleFetcher = SimpleShuffleFetcher()
        self.shuffleFetcher = ParallelShuffleFetcher(2)

        from dpark.broadcast import TheBroadcast
        TheBroadcast.initialize(isMaster)

        self.started = True
        logger.debug("env started")
Example #5
0
def run_task(task_data):
    try:
        gc.disable()
        task, task_try_id = loads(decompress(task_data))
        ttid = TTID(task_try_id)
        Accumulator.clear()
        result = task.run(ttid.ttid)
        env.task_stats.bytes_max_rss = resource.getrusage(
            resource.RUSAGE_SELF).ru_maxrss * 1024
        accUpdate = Accumulator.values()
        MutableDict.flush()

        if marshalable(result):
            try:
                flag, data = 0, marshal.dumps(result)
            except Exception:
                flag, data = 1, cPickle.dumps(result, -1)

        else:
            flag, data = 1, cPickle.dumps(result, -1)
        data = compress(data)

        if len(data) > TASK_RESULT_LIMIT:
            path = LocalFileShuffle.getOutputFile(0, task.id, ttid.task_try,
                                                  len(data))
            f = open(path, 'wb')
            f.write(data)
            f.close()
            data = '/'.join([LocalFileShuffle.getServerUri()] +
                            path.split('/')[-3:])
            flag += 2

        return TaskState.finished, cPickle.dumps(
            ((flag, data), accUpdate, env.task_stats), -1)
    except FetchFailed as e:
        return TaskState.failed, TaskEndReason.fetch_failed, str(
            e), cPickle.dumps(e)
    except Exception as e:
        import traceback
        msg = traceback.format_exc()
        ename = e.__class__.__name__
        fatal_exceptions = (DparkUserFatalError, ArithmeticError, ValueError,
                            LookupError, SyntaxError, TypeError,
                            AssertionError)
        prefix = "FATAL" if isinstance(e, fatal_exceptions) else "FAILED"
        return TaskState.failed, '{}_EXCEPTION_{}'.format(
            prefix, ename), msg, cPickle.dumps(e)
    finally:
        gc.collect()
        gc.enable()
Example #6
0
 def run(self, attempId):
     logger.debug("shuffling %d of %s", self.partition, self.rdd)
     for i, bucket in self.rdd._prepare_shuffle(self.split,
                                                self.partitioner,
                                                self.aggregator):
         try:
             if marshalable(bucket):
                 flag, d = 'm', marshal.dumps(bucket)
             else:
                 flag, d = 'p', cPickle.dumps(bucket, -1)
         except ValueError:
             flag, d = 'p', cPickle.dumps(bucket, -1)
         cd = compress(d)
         for tried in range(1, 4):
             try:
                 path = LocalFileShuffle.getOutputFile(
                     self.shuffleId, self.partition, i,
                     len(cd) * tried)
                 tpath = path + ".%s.%s" % (socket.gethostname(),
                                            os.getpid())
                 f = open(tpath, 'wb', 1024 * 4096)
                 f.write(flag + struct.pack("I", 5 + len(cd)))
                 f.write(cd)
                 f.close()
                 os.rename(tpath, path)
                 break
             except IOError, e:
                 logger.warning("write %s failed: %s, try again (%d)", path,
                                e, tried)
                 try:
                     os.remove(tpath)
                 except OSError:
                     pass
         else:
             raise
Example #7
0
def run_task(task_data):
    try:
        gc.disable()
        task, ntry = cPickle.loads(decompress(task_data))
        setproctitle('dpark worker %s: run task %s' % (Script, task))
        
        Accumulator.clear()
        result = task.run(ntry)
        accUpdate = Accumulator.values()

        if marshalable(result):
            flag, data = 0, marshal.dumps(result)
        else:
            flag, data = 1, cPickle.dumps(result, -1)
        data = compress(data)

        if len(data) > TASK_RESULT_LIMIT:
            workdir = env.get('WORKDIR')
            name = 'task_%s_%s.result' % (task.id, ntry)
            path = os.path.join(workdir, name) 
            f = open(path, 'w')
            f.write(data)
            f.close()
            data = LocalFileShuffle.getServerUri() + '/' + name
            flag += 2

        return mesos_pb2.TASK_FINISHED, cPickle.dumps((task.id, Success(), (flag, data), accUpdate), -1)
    except Exception, e:
        import traceback
        msg = traceback.format_exc()
        return mesos_pb2.TASK_FAILED, cPickle.dumps((task.id, OtherFailure(msg), None, None), -1)
Example #8
0
    def _run(self, task_id):
        mem_limit = env.meminfo.mem_limit_soft
        t0 = time.time()
        logger.debug("run task with shuffle_flag %r" % (self.rddconf, ))
        rdd = self.rdd
        meminfo = env.meminfo
        n = self.partitioner.numPartitions
        get_partition = self.partitioner.getPartition
        merge_value = self.aggregator.mergeValue
        create_combiner = self.aggregator.createCombiner
        dumper_cls = SortMergeBucketDumper if self.rddconf.sort_merge else BucketDumper
        dumper = dumper_cls(self.shuffleId, self.partition, n, self.rddconf)
        buckets = [{} for _ in range(n)]
        env.meminfo.ratio = min(float(n) / (n + 1), env.meminfo.ratio)

        last_i = 0
        for i, item in enumerate(rdd.iterator(self.split)):
            try:
                try:
                    k, v = item
                except:
                    raise DparkUserFatalError(
                        "item of {} should be (k, v) pair, got: {}".format(
                            rdd.scope.key, item))

                bucket = buckets[get_partition(k)]
                r = bucket.get(k, None)
                if r is not None:
                    bucket[k] = merge_value(r, v)
                else:
                    bucket[k] = create_combiner(v)

                if dpark.conf.MULTI_SEGMENT_DUMP and meminfo.rss > mem_limit:
                    _log = logger.info if dpark.conf.LOG_ROTATE else logger.debug
                    _log(
                        "dump rotate %d with %d kv: mem %d MB, sort limit %d MB, limit %d MB",
                        env.task_stats.num_dump_rotate + 1, i - last_i,
                        int(meminfo.rss) >> 20, mem_limit >> 20,
                        int(meminfo.mem) >> 20)
                    dumper.dump(buckets, False)
                    [buckets[j].clear() for j in range(n)]
                    env.meminfo.after_rotate()
                    mem_limit = env.meminfo.mem_limit_soft
                    last_i = i
            except ValueError as e:
                logger.exception('The ValueError exception: %s at %s', str(e),
                                 str(rdd.scope.api_callsite))
                raise

        t1 = time.time()
        dumper.dump(buckets, True)
        dumper.commit(self.aggregator)
        del buckets
        env.task_stats.bytes_dump += dumper.get_size()
        env.task_stats.num_dump_rotate += 1
        t = time.time()
        env.task_stats.secs_dump += t - t1
        env.task_stats.secs_all = t - t0

        return LocalFileShuffle.getServerUri()
Example #9
0
    def run(self, attempId):
        logger.debug("shuffling %d of %s", self.partition, self.rdd)
        for i, bucket in self._prepare_shuffle(self.rdd):
            try:
                if marshalable(bucket):
                    flag, d = 'm', marshal.dumps(bucket)
                else:
                    flag, d = 'p', cPickle.dumps(bucket, -1)
            except ValueError:
                flag, d = 'p', cPickle.dumps(bucket, -1)
            cd = compress(d)
            for tried in range(1, 4):
                try:
                    path = LocalFileShuffle.getOutputFile(
                        self.shuffleId, self.partition, i,
                        len(cd) * tried)
                    with atomic_file(path, bufsize=1024 * 4096) as f:
                        f.write(flag + struct.pack("I", 5 + len(cd)))
                        f.write(cd)

                    break
                except IOError, e:
                    logger.warning("write %s failed: %s, try again (%d)", path,
                                   e, tried)
            else:
                raise
Example #10
0
 def run(self, attempId):
     logger.debug("shuffling %d of %s", self.partition, self.rdd)
     for i, bucket in self.rdd._prepare_shuffle(self.split, self.partitioner, self.aggregator):
         try:
             if marshalable(bucket):
                 flag, d = 'm', marshal.dumps(bucket)
             else:
                 flag, d = 'p', cPickle.dumps(bucket, -1)
         except ValueError:
             flag, d = 'p', cPickle.dumps(bucket, -1)
         cd = compress(d)
         for tried in range(1, 4):
             try:
                 path = LocalFileShuffle.getOutputFile(self.shuffleId, self.partition, i, len(cd) * tried)
                 tpath = path + ".%s.%s" % (socket.gethostname(), os.getpid())
                 f = open(tpath, 'wb', 1024*4096)
                 f.write(flag + struct.pack("I", 5 + len(cd)))
                 f.write(cd)
                 f.close()
                 os.rename(tpath, path)
                 break
             except IOError, e:
                 logger.warning("write %s failed: %s, try again (%d)", path, e, tried)
                 try: os.remove(tpath)
                 except OSError: pass
         else:
             raise
Example #11
0
def run_task(task, ntry):
    try:
        setproctitle('dpark worker %s: run task %s' % (Script, task))
        Accumulator.clear()
        gc.disable()
        result = task.run(ntry)
        accUpdate = Accumulator.values()

        if marshalable(result):
            flag, data = 0, marshal.dumps(result)
        else:
            flag, data = 1, cPickle.dumps(result, -1)
        data = compress(data)

        if len(data) > TASK_RESULT_LIMIT:
            workdir = env.get('WORKDIR')
            name = 'task_%s_%s.result' % (task.id, ntry)
            path = os.path.join(workdir, name)
            f = open(path, 'w')
            f.write(data)
            f.close()
            data = LocalFileShuffle.getServerUri() + '/' + name
            flag += 2

        return mesos_pb2.TASK_FINISHED, cPickle.dumps(
            (task.id, Success(), (flag, data), accUpdate), -1)
    except Exception, e:
        import traceback
        msg = traceback.format_exc()
        return mesos_pb2.TASK_FAILED, cPickle.dumps(
            (task.id, OtherFailure(msg), None, None), -1)
Example #12
0
def run_task(task_data):
    try:
        gc.disable()
        task, task_try_id = loads(decompress(task_data))
        ttid = TTID(task_try_id)
        Accumulator.clear()
        result = task.run(ttid.ttid)
        env.task_stats.bytes_max_rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss * 1024
        accUpdate = Accumulator.values()
        MutableDict.flush()

        if marshalable(result):
            try:
                flag, data = 0, marshal.dumps(result)
            except Exception:
                flag, data = 1, cPickle.dumps(result, -1)

        else:
            flag, data = 1, cPickle.dumps(result, -1)
        data = compress(data)

        if len(data) > TASK_RESULT_LIMIT:
            path = LocalFileShuffle.getOutputFile(0, task.id, ttid.task_try, len(data))
            f = open(path, 'wb')
            f.write(data)
            f.close()
            data = '/'.join(
                [LocalFileShuffle.getServerUri()] + path.split('/')[-3:]
            )
            flag += 2

        return TaskState.finished, cPickle.dumps(((flag, data), accUpdate, env.task_stats), -1)
    except FetchFailed as e:
        return TaskState.failed, TaskEndReason.fetch_failed, str(e), cPickle.dumps(e)
    except Exception as e:
        import traceback
        msg = traceback.format_exc()
        ename = e.__class__.__name__
        fatal_exceptions = (DparkUserFatalError, ArithmeticError,
                            ValueError, LookupError, SyntaxError,
                            TypeError, AssertionError)
        prefix = "FATAL" if isinstance(e, fatal_exceptions) else "FAILED"
        return TaskState.failed, '{}_EXCEPTION_{}'.format(prefix, ename), msg, cPickle.dumps(e)
    finally:
        gc.collect()
        gc.enable()
Example #13
0
def run_task(task_data):
    try:
        gc.disable()
        task, task_try_id = loads(decompress(task_data))
        ttid = TTID(task_try_id)
        Accumulator.clear()
        result = task.run(ttid.ttid)
        env.task_stats.bytes_max_rss = resource.getrusage(
            resource.RUSAGE_SELF).ru_maxrss * 1024
        accUpdate = Accumulator.values()
        MutableDict.flush()

        if marshalable(result):
            try:
                flag, data = 0, marshal.dumps(result)
            except Exception:
                flag, data = 1, cPickle.dumps(result, -1)

        else:
            flag, data = 1, cPickle.dumps(result, -1)
        data = compress(data)

        if len(data) > TASK_RESULT_LIMIT:
            path = LocalFileShuffle.getOutputFile(0, task.id, ttid.task_try,
                                                  len(data))
            f = open(path, 'wb')
            f.write(data)
            f.close()
            data = '/'.join([LocalFileShuffle.getServerUri()] +
                            path.split('/')[-3:])
            flag += 2

        return 'TASK_FINISHED', cPickle.dumps(
            (Success(), (flag, data), accUpdate, env.task_stats), -1)
    except FetchFailed as e:
        return 'TASK_FAILED', cPickle.dumps((e, None, None, None), -1)
    except:
        import traceback
        msg = traceback.format_exc()
        return 'TASK_FAILED', cPickle.dumps(
            (OtherFailure(msg), None, None, None), -1)
    finally:
        gc.collect()
        gc.enable()
Example #14
0
class ShuffleMapTask(DAGTask):
    def __init__(self, stageId, rdd, dep, partition, locs):
        DAGTask.__init__(self, stageId)
        self.rdd = rdd
        self.shuffleId = dep.shuffleId
        self.aggregator = dep.aggregator
        self.partitioner = dep.partitioner
        self.partition = partition
        self.split = rdd.splits[partition]
        self.locs = locs

    def __repr__(self):
        return '<ShuffleTask(%d, %d) of %s>' % (self.shuffleId, self.partition, self.rdd)

    def __getstate__(self):
        d = dict(self.__dict__)
        del d['rdd']
        return d, dumps(self.rdd)

    def __setstate__(self, state):
        d, rdd = state
        self.__dict__.update(d)
        self.rdd = loads(rdd)

    def preferredLocations(self):
        return self.locs

    def run(self, attempId):
        logger.debug("shuffling %d of %s", self.partition, self.rdd)
        for i, bucket in self.rdd._prepare_shuffle(self.split, self.partitioner, self.aggregator):
            try:
                if marshalable(bucket):
                    flag, d = 'm', marshal.dumps(bucket)
                else:
                    flag, d = 'p', cPickle.dumps(bucket, -1)
            except ValueError:
                flag, d = 'p', cPickle.dumps(bucket, -1)
            cd = compress(d)
            for tried in range(1, 4):
                try:
                    path = LocalFileShuffle.getOutputFile(self.shuffleId, self.partition, i, len(cd) * tried)
                    tpath = path + ".%s.%s" % (socket.gethostname(), os.getpid())
                    f = open(tpath, 'wb', 1024*4096)
                    f.write(flag + struct.pack("I", 5 + len(cd)))
                    f.write(cd)
                    f.close()
                    os.rename(tpath, path)
                    break
                except IOError, e:
                    logger.warning("write %s failed: %s, try again (%d)", path, e, tried)
                    try: os.remove(tpath)
                    except OSError: pass
            else:
                raise

        return LocalFileShuffle.getServerUri()
Example #15
0
 def run_with_sorted(self, it):
     serializer = GroupByAutoBatchedSerializer(
     ) if self.iter_values else AutoBatchedSerializer()
     for i, bucket in it:
         for tried in range(1, 4):
             try:
                 path = LocalFileShuffle.getOutputFile(
                     self.shuffleId, self.partition, i)
                 with atomic_file(path, bufsize=1024 * 4096) as f:
                     items = sorted(bucket.items(), key=lambda x: x[0])
                     serializer.dump_stream(items, f)
                     env.task_stats.bytes_shuffle_write += f.tell()
                 break
             except IOError as e:
                 logger.warning("write %s failed: %s, try again (%d)", path,
                                e, tried)
         else:
             raise e
     return LocalFileShuffle.getServerUri()
Example #16
0
def run_task(task_data):
    try:
        gc.disable()
        task, ntry = loads(decompress(task_data))
        Accumulator.clear()
        result = task.run(ntry)
        accUpdate = Accumulator.values()
        MutableDict.flush()

        if marshalable(result):
            try:
                flag, data = 0, marshal.dumps(result)
            except Exception as e:
                flag, data = 1, cPickle.dumps(result, -1)

        else:
            flag, data = 1, cPickle.dumps(result, -1)
        data = compress(data)

        if len(data) > TASK_RESULT_LIMIT:
            path = LocalFileShuffle.getOutputFile(0, ntry, task.id, len(data))
            f = open(path, 'w')
            f.write(data)
            f.close()
            data = '/'.join(
                [LocalFileShuffle.getServerUri()] + path.split('/')[-3:]
            )
            flag += 2

        return 'TASK_FINISHED', cPickle.dumps(
            (Success(), (flag, data), accUpdate), -1)
    except FetchFailed as e:
        return 'TASK_FAILED', cPickle.dumps((e, None, None), -1)
    except:
        import traceback
        msg = traceback.format_exc()
        return 'TASK_FAILED', cPickle.dumps(
            (OtherFailure(msg), None, None), -1)
    finally:
        close_mfs()
        gc.collect()
        gc.enable()
Example #17
0
def run_task(task_data):
    try:
        gc.disable()
        task, ntry = cPickle.loads(decompress(task_data))
        setproctitle('dpark worker %s: run task %s' % (Script, task))

        Accumulator.clear()
        result = task.run(ntry)
        accUpdate = Accumulator.values()

        if marshalable(result):
            flag, data = 0, marshal.dumps(result)
        else:
            flag, data = 1, cPickle.dumps(result, -1)
        data = compress(data)

        if len(data) > TASK_RESULT_LIMIT:
            path = LocalFileShuffle.getOutputFile(0, ntry, task.id, len(data))
            f = open(path, 'w')
            f.write(data)
            f.close()
            data = '/'.join([LocalFileShuffle.getServerUri()] +
                            path.split('/')[-3:])
            flag += 2

        return mesos_pb2.TASK_FINISHED, cPickle.dumps(
            (Success(), (flag, data), accUpdate), -1)
    except:
        import traceback
        msg = traceback.format_exc()
        return mesos_pb2.TASK_FAILED, cPickle.dumps(
            (OtherFailure(msg), None, None), -1)
    finally:
        setproctitle('dpark worker: idle')
        gc.collect()
        gc.enable()
Example #18
0
    def run(self, attempId):
        logger.debug("shuffling %d of %s", self.partition, self.rdd)
        numOutputSplits = self.partitioner.numPartitions
        getPartition = self.partitioner.getPartition
        mergeValue = self.aggregator.mergeValue
        createCombiner = self.aggregator.createCombiner

        buckets = [{} for i in range(numOutputSplits)]
        for k, v in self.rdd.iterator(self.split):
            bucketId = getPartition(k)
            bucket = buckets[bucketId]
            r = bucket.get(k, None)
            if r is not None:
                bucket[k] = mergeValue(r, v)
            else:
                bucket[k] = createCombiner(v)

        for i in range(numOutputSplits):
            try:
                if marshalable(buckets[i]):
                    flag, d = 'm', marshal.dumps(buckets[i])
                else:
                    flag, d = 'p', cPickle.dumps(buckets[i], -1)
            except ValueError:
                flag, d = 'p', cPickle.dumps(buckets[i], -1)
            cd = compress(d)
            for tried in range(1, 4):
                try:
                    path = LocalFileShuffle.getOutputFile(
                        self.shuffleId, self.partition, i,
                        len(cd) * tried)
                    tpath = path + ".%s.%s" % (socket.gethostname(),
                                               os.getpid())
                    f = open(tpath, 'wb', 1024 * 4096)
                    f.write(flag + struct.pack("I", 5 + len(cd)))
                    f.write(cd)
                    f.close()
                    os.rename(tpath, path)
                    break
                except IOError, e:
                    logger.warning("write %s failed: %s, try again (%d)", path,
                                   e, tried)
                    try:
                        os.remove(tpath)
                    except OSError:
                        pass
            else:
                raise
Example #19
0
    def run(self, attempId):
        logger.debug("shuffling %d of %s", self.partition, self.rdd)
        numOutputSplits = self.partitioner.numPartitions
        getPartition = self.partitioner.getPartition
        mergeValue = self.aggregator.mergeValue
        createCombiner = self.aggregator.createCombiner

        buckets = [{} for i in range(numOutputSplits)]
        for k,v in self.rdd.iterator(self.split):
            bucketId = getPartition(k)
            bucket = buckets[bucketId]
            r = bucket.get(k, None)
            if r is not None:
                bucket[k] = mergeValue(r, v)
            else:
                bucket[k] = createCombiner(v)

        for i in range(numOutputSplits):
            try:
                if marshalable(buckets[i]):
                    flag, d = 'm', marshal.dumps(buckets[i])
                else:
                    flag, d = 'p', cPickle.dumps(buckets[i], -1)
            except ValueError:
                flag, d = 'p', cPickle.dumps(buckets[i], -1)
            cd = compress(d)
            for tried in range(1, 4):
                try:
                    path = LocalFileShuffle.getOutputFile(self.shuffleId, self.partition, i, len(cd) * tried)
                    tpath = path + ".%s.%s" % (socket.gethostname(), os.getpid())
                    f = open(tpath, 'wb', 1024*4096)
                    f.write(flag + struct.pack("I", 5 + len(cd)))
                    f.write(cd)
                    f.close()
                    os.rename(tpath, path)
                    break
                except IOError, e:
                    logger.warning("write %s failed: %s, try again (%d)", path, e, tried)
                    try: os.remove(tpath)
                    except OSError: pass
            else:
                raise
Example #20
0
    def run(self, attempId):
        logger.debug("shuffling %d of %s", self.partition, self.rdd)
        for i, bucket in self._prepare_shuffle(self.rdd):
            try:
                if marshalable(bucket):
                    flag, d = 'm', marshal.dumps(bucket)
                else:
                    flag, d = 'p', cPickle.dumps(bucket, -1)
            except ValueError:
                flag, d = 'p', cPickle.dumps(bucket, -1)
            cd = compress(d)
            for tried in range(1, 4):
                try:
                    path = LocalFileShuffle.getOutputFile(self.shuffleId, self.partition, i, len(cd) * tried)
                    with atomic_file(path, bufsize=1024*4096) as f:
                        f.write(flag + struct.pack("I", 5 + len(cd)))
                        f.write(cd)

                    break
                except IOError, e:
                    logger.warning("write %s failed: %s, try again (%d)", path, e, tried)
            else:
                raise
Example #21
0
class ShuffleMapTask(DAGTask):
    def __init__(self, stageId, rdd, dep, partition, locs):
        DAGTask.__init__(self, stageId)
        self.rdd = rdd
        self.shuffleId = dep.shuffleId
        self.aggregator = dep.aggregator
        self.partitioner = dep.partitioner
        self.partition = partition
        self.split = rdd.splits[partition]
        self.locs = locs

    def __repr__(self):
        return '<ShuffleTask(%d, %d) of %s>' % (self.shuffleId, self.partition,
                                                self.rdd)

    def preferredLocations(self):
        return self.locs

    def run(self, attempId):
        logger.debug("shuffling %d of %s", self.partition, self.rdd)
        numOutputSplits = self.partitioner.numPartitions
        getPartition = self.partitioner.getPartition
        mergeValue = self.aggregator.mergeValue
        createCombiner = self.aggregator.createCombiner

        buckets = [{} for i in range(numOutputSplits)]
        for k, v in self.rdd.iterator(self.split):
            bucketId = getPartition(k)
            bucket = buckets[bucketId]
            r = bucket.get(k, None)
            if r is not None:
                bucket[k] = mergeValue(r, v)
            else:
                bucket[k] = createCombiner(v)

        for i in range(numOutputSplits):
            try:
                if marshalable(buckets[i]):
                    flag, d = 'm', marshal.dumps(buckets[i])
                else:
                    flag, d = 'p', cPickle.dumps(buckets[i], -1)
            except ValueError:
                flag, d = 'p', cPickle.dumps(buckets[i], -1)
            cd = compress(d)
            for tried in range(1, 4):
                try:
                    path = LocalFileShuffle.getOutputFile(
                        self.shuffleId, self.partition, i,
                        len(cd) * tried)
                    tpath = path + ".%s.%s" % (socket.gethostname(),
                                               os.getpid())
                    f = open(tpath, 'wb', 1024 * 4096)
                    f.write(flag + struct.pack("I", 5 + len(cd)))
                    f.write(cd)
                    f.close()
                    os.rename(tpath, path)
                    break
                except IOError, e:
                    logger.warning("write %s failed: %s, try again (%d)", path,
                                   e, tried)
                    try:
                        os.remove(tpath)
                    except OSError:
                        pass
            else:
                raise

        return LocalFileShuffle.getServerUri()
Example #22
0
        Accumulator.clear()
        result = task.run(ntry)
        accUpdate = Accumulator.values()

        if marshalable(result):
            try:
                flag, data = 0, marshal.dumps(result)
            except Exception, e:
                flag, data = 1, cPickle.dumps(result, -1)

        else:
            flag, data = 1, cPickle.dumps(result, -1)
        data = compress(data)

        if len(data) > TASK_RESULT_LIMIT:
            path = LocalFileShuffle.getOutputFile(0, ntry, task.id, len(data))
            f = open(path, 'w')
            f.write(data)
            f.close()
            data = '/'.join([LocalFileShuffle.getServerUri()] + path.split('/')[-3:])
            flag += 2

        return mesos_pb2.TASK_FINISHED, cPickle.dumps((Success(), (flag, data), accUpdate), -1)
    except FetchFailed, e:
        return mesos_pb2.TASK_FAILED, cPickle.dumps((e, None, None), -1)
    except :
        import traceback
        msg = traceback.format_exc()
        return mesos_pb2.TASK_FAILED, cPickle.dumps((OtherFailure(msg), None, None), -1)
    finally:
        setproctitle('dpark worker: idle')
Example #23
0
        result = task.run(ntry)
        accUpdate = Accumulator.values()
        MutableDict.flush()

        if marshalable(result):
            try:
                flag, data = 0, marshal.dumps(result)
            except Exception, e:
                flag, data = 1, cPickle.dumps(result, -1)

        else:
            flag, data = 1, cPickle.dumps(result, -1)
        data = compress(data)

        if len(data) > TASK_RESULT_LIMIT:
            path = LocalFileShuffle.getOutputFile(0, ntry, task.id, len(data))
            f = open(path, 'w')
            f.write(data)
            f.close()
            data = '/'.join([LocalFileShuffle.getServerUri()] +
                            path.split('/')[-3:])
            flag += 2

        return mesos_pb2.TASK_FINISHED, cPickle.dumps(
            (Success(), (flag, data), accUpdate), -1)
    except FetchFailed, e:
        return mesos_pb2.TASK_FAILED, cPickle.dumps((e, None, None), -1)
    except:
        import traceback
        msg = traceback.format_exc()
        return mesos_pb2.TASK_FAILED, cPickle.dumps(
Example #24
0
class ShuffleMapTask(DAGTask):
    def __init__(self, stageId, rdd, dep, partition, locs):
        DAGTask.__init__(self, stageId)
        self.rdd = rdd
        self.shuffleId = dep.shuffleId
        self.aggregator = dep.aggregator
        self.partitioner = dep.partitioner
        self.partition = partition
        self.split = rdd.splits[partition]
        self.locs = locs

    def __repr__(self):
        shuffleId = getattr(self, 'shuffleId', None)
        partition = getattr(self, 'partition', None)
        rdd = getattr(self, 'rdd', None)
        return '<ShuffleTask(%s, %s) of %s>' % (shuffleId, partition, rdd)

    def __getstate__(self):
        d = dict(self.__dict__)
        del d['rdd']
        del d['split']
        return d, dumps(self.rdd), dumps(self.split)

    def __setstate__(self, state):
        d, rdd, split = state
        self.__dict__.update(d)
        self.rdd = loads(rdd)
        self.split = loads(split)

    def preferredLocations(self):
        return self.locs

    def _prepare_shuffle(self, rdd):
        split = self.split
        numOutputSplits = self.partitioner.numPartitions
        getPartition = self.partitioner.getPartition
        mergeValue = self.aggregator.mergeValue
        createCombiner = self.aggregator.createCombiner

        buckets = [{} for i in range(numOutputSplits)]
        for k, v in rdd.iterator(split):
            bucketId = getPartition(k)
            bucket = buckets[bucketId]
            r = bucket.get(k, None)
            if r is not None:
                bucket[k] = mergeValue(r, v)
            else:
                bucket[k] = createCombiner(v)

        return enumerate(buckets)

    def run(self, attempId):
        logger.debug("shuffling %d of %s", self.partition, self.rdd)
        for i, bucket in self._prepare_shuffle(self.rdd):
            try:
                if marshalable(bucket):
                    flag, d = 'm', marshal.dumps(bucket)
                else:
                    flag, d = 'p', cPickle.dumps(bucket, -1)
            except ValueError:
                flag, d = 'p', cPickle.dumps(bucket, -1)
            cd = compress(d)
            for tried in range(1, 4):
                try:
                    path = LocalFileShuffle.getOutputFile(
                        self.shuffleId, self.partition, i,
                        len(cd) * tried)
                    with atomic_file(path, bufsize=1024 * 4096) as f:
                        f.write(flag + struct.pack("I", 5 + len(cd)))
                        f.write(cd)

                    break
                except IOError, e:
                    logger.warning("write %s failed: %s, try again (%d)", path,
                                   e, tried)
            else:
                raise

        return LocalFileShuffle.getServerUri()
Example #25
0
 def _get_path(self, i, size):
     return LocalFileShuffle.getOutputFile(self.shuffle_id, self.map_id, i,
                                           size)