Ejemplo n.º 1
0
    def _fetch_missing(cls, key):
        result = {}
        urls = env.trackerClient.call(GetValueMessage('mutable_dict:%s' % key))
        for url in urls:
            f = urllib.request.urlopen(url)
            if f.code is not None and f.code != 200:
                raise IOError('Open %s failed:%s' % (url, f.code))

            data = f.read()
            if len(data) < 4:
                raise IOError('Transfer %s failed: %s received' % (url, len(data)))

            length, = struct.unpack('<I', data[:4])
            if length != len(data):
                raise IOError('Transfer %s failed: %s received, %s expected' % (url,
                                                                                len(data), length))

            data = cPickle.loads(decompress(data[4:]))
            for k, v in data.items():
                if k in result:
                    r = result[k]
                    if v[1] == r[1]:
                        r0 = r[0]
                        v0 = v[0]
                        merged = r0.value if isinstance(r0, ConflictValues) else [r0]
                        merged += v0.value if isinstance(v0, ConflictValues) else [v0]
                        result[k] = (ConflictValues(merged), r[1])
                    else:
                        result[k] = v if v[1] > r[1] else r
                else:
                    result[k] = v

        return result
Ejemplo n.º 2
0
def run_task(task_data):
    try:
        gc.disable()
        task, task_try_id = loads(decompress(task_data))
        ttid = TTID(task_try_id)
        Accumulator.clear()
        result = task.run(ttid.ttid)
        env.task_stats.bytes_max_rss = resource.getrusage(
            resource.RUSAGE_SELF).ru_maxrss * 1024
        accUpdate = Accumulator.values()
        MutableDict.flush()

        if marshalable(result):
            try:
                flag, data = 0, marshal.dumps(result)
            except Exception:
                flag, data = 1, cPickle.dumps(result, -1)

        else:
            flag, data = 1, cPickle.dumps(result, -1)
        data = compress(data)

        if len(data) > TASK_RESULT_LIMIT:
            # shuffle_id start from 1
            swd = ShuffleWorkDir(0, task.id, ttid.task_try)
            tmppath = swd.alloc_tmp(len(data))
            with open(tmppath, 'wb') as f:
                f.write(data)
                f.close()
            path = swd.export(tmppath)
            data = '/'.join([env.server_uri] + path.split('/')[-3:])
            flag += 2

        return TaskState.finished, cPickle.dumps(
            ((flag, data), accUpdate, env.task_stats), -1)
    except FetchFailed as e:
        return TaskState.failed, TaskEndReason.fetch_failed, str(
            e), cPickle.dumps(e)
    except Exception as e:
        import traceback
        msg = traceback.format_exc()
        ename = e.__class__.__name__
        fatal_exceptions = (DparkUserFatalError, ArithmeticError, ValueError,
                            LookupError, SyntaxError, TypeError,
                            AssertionError)
        prefix = "FATAL" if isinstance(e, fatal_exceptions) else "FAILED"
        return TaskState.failed, '{}_EXCEPTION_{}'.format(
            prefix, ename), msg, cPickle.dumps(e)
    finally:
        gc.collect()
        gc.enable()
Ejemplo n.º 3
0
def run_task(task_data):
    try:
        gc.disable()
        task, task_try_id = loads(decompress(task_data))
        ttid = TTID(task_try_id)
        Accumulator.clear()
        result = task.run(ttid.ttid)
        env.task_stats.bytes_max_rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss * 1024
        accUpdate = Accumulator.values()
        MutableDict.flush()

        if marshalable(result):
            try:
                flag, data = 0, marshal.dumps(result)
            except Exception:
                flag, data = 1, cPickle.dumps(result, -1)

        else:
            flag, data = 1, cPickle.dumps(result, -1)
        data = compress(data)

        if len(data) > TASK_RESULT_LIMIT:
            # shuffle_id start from 1
            swd = ShuffleWorkDir(0, task.id, ttid.task_try)
            tmppath = swd.alloc_tmp(len(data))
            with open(tmppath, 'wb') as f:
                f.write(data)
                f.close()
            path = swd.export(tmppath)
            data = '/'.join(
                [env.server_uri] + path.split('/')[-3:]
            )
            flag += 2

        return TaskState.finished, cPickle.dumps(((flag, data), accUpdate, env.task_stats), -1)
    except FetchFailed as e:
        return TaskState.failed, TaskEndReason.fetch_failed, str(e), cPickle.dumps(e)
    except Exception as e:
        import traceback
        msg = traceback.format_exc()
        ename = e.__class__.__name__
        fatal_exceptions = (DparkUserFatalError, ArithmeticError,
                            ValueError, LookupError, SyntaxError,
                            TypeError, AssertionError)
        prefix = "FATAL" if isinstance(e, fatal_exceptions) else "FAILED"
        return TaskState.failed, '{}_EXCEPTION_{}'.format(prefix, ename), msg, cPickle.dumps(e)
    finally:
        gc.collect()
        gc.enable()
Ejemplo n.º 4
0
def run_task(task_data):
    try:
        gc.disable()
        task, task_try_id = loads(decompress(task_data))
        ttid = TTID(task_try_id)
        Accumulator.clear()
        result = task.run(ttid.ttid)
        env.task_stats.bytes_max_rss = resource.getrusage(
            resource.RUSAGE_SELF).ru_maxrss * 1024
        accUpdate = Accumulator.values()
        MutableDict.flush()

        if marshalable(result):
            try:
                flag, data = 0, marshal.dumps(result)
            except Exception:
                flag, data = 1, cPickle.dumps(result, -1)

        else:
            flag, data = 1, cPickle.dumps(result, -1)
        data = compress(data)

        if len(data) > TASK_RESULT_LIMIT:
            path = LocalFileShuffle.getOutputFile(0, task.id, ttid.task_try,
                                                  len(data))
            f = open(path, 'wb')
            f.write(data)
            f.close()
            data = '/'.join([LocalFileShuffle.getServerUri()] +
                            path.split('/')[-3:])
            flag += 2

        return TaskState.finished, cPickle.dumps(
            ((flag, data), accUpdate, env.task_stats), -1)
    except FetchFailed as e:
        return TaskState.failed, TaskEndReason.fetch_failed, str(
            e), cPickle.dumps(e)
    except Exception as e:
        import traceback
        msg = traceback.format_exc()
        ename = e.__class__.__name__
        return TaskState.failed, 'FAILED_EXCEPTION_{}'.format(
            ename), msg, cPickle.dumps(e)
    finally:
        gc.collect()
        gc.enable()
Ejemplo n.º 5
0
def run_task(task_data):
    try:
        gc.disable()
        task, task_try_id = loads(decompress(task_data))
        ttid = TTID(task_try_id)
        Accumulator.clear()
        result = task.run(ttid.ttid)
        env.task_stats.bytes_max_rss = resource.getrusage(
            resource.RUSAGE_SELF).ru_maxrss * 1024
        accUpdate = Accumulator.values()
        MutableDict.flush()

        if marshalable(result):
            try:
                flag, data = 0, marshal.dumps(result)
            except Exception:
                flag, data = 1, cPickle.dumps(result, -1)

        else:
            flag, data = 1, cPickle.dumps(result, -1)
        data = compress(data)

        if len(data) > TASK_RESULT_LIMIT:
            path = LocalFileShuffle.getOutputFile(0, task.id, ttid.task_try,
                                                  len(data))
            f = open(path, 'wb')
            f.write(data)
            f.close()
            data = '/'.join([LocalFileShuffle.getServerUri()] +
                            path.split('/')[-3:])
            flag += 2

        return 'TASK_FINISHED', cPickle.dumps(
            (Success(), (flag, data), accUpdate, env.task_stats), -1)
    except FetchFailed as e:
        return 'TASK_FAILED', cPickle.dumps((e, None, None, None), -1)
    except:
        import traceback
        msg = traceback.format_exc()
        return 'TASK_FAILED', cPickle.dumps(
            (OtherFailure(msg), None, None, None), -1)
    finally:
        gc.collect()
        gc.enable()
Ejemplo n.º 6
0
    def load_stream(self, stream):
        while True:
            head = stream.read(5)
            if not head:
                return
            length, is_marshal, is_sorted = unpack_header(head)
            assert (is_sorted)
            buf = stream.read(length)
            if len(buf) < length:
                raise IOError("length not match: expected %d, but got %d" % (length, len(buf)))

            buf = decompress(buf)
            AutoBatchedSerializer.size_loaded += len(buf)
            if is_marshal:
                vs = marshal.loads(buf)
            else:
                vs = pickle.loads(buf)
            for v in vs:
                yield v
Ejemplo n.º 7
0
    def unsorted_batches(self):
        f = None
        # TEST_RETRY = True
        try:
            f, exp_size = self.open()
            total_size = 0

            while True:
                head = f.read(5)
                if len(head) == 0:
                    break
                length, is_marshal, is_sorted = unpack_header(head)
                assert (not is_sorted)
                total_size += length + 5
                d = f.read(length)
                if length != len(d):
                    raise IOError(
                        "length not match: expected %d, but got %d" %
                        (length, len(d)))
                d = decompress(d)
                if is_marshal:
                    items = marshal.loads(d)
                else:
                    try:
                        items = pickle.loads(d)
                    except:
                        time.sleep(1)
                        items = pickle.loads(d)
                yield items

                # if TEST_RETRY and self.num_retry == 0:
                #    raise Exception("test_retry")

            if total_size != exp_size:
                raise IOError(
                    "fetch size not match: expected %d, but got %d" %
                    (exp_size, total_size))

            env.task_stats.bytes_fetch += exp_size
        finally:
            if f:
                f.close()
Ejemplo n.º 8
0
    def load_stream(self, stream):
        while True:
            head = stream.read(5)
            if not head:
                return
            length, is_marshal, is_sorted = unpack_header(head)
            assert (is_sorted)
            buf = stream.read(length)
            if len(buf) < length:
                raise IOError("length not match: expected %d, but got %d" %
                              (length, len(buf)))

            buf = decompress(buf)
            AutoBatchedSerializer.size_loaded += len(buf)
            if is_marshal:
                vs = marshal.loads(buf)
            else:
                vs = pickle.loads(buf)
            for v in vs:
                yield v
Ejemplo n.º 9
0
    def unsorted_batches(self):
        f = None
        # TEST_RETRY = True
        try:
            f, exp_size = self.open()
            total_size = 0

            while True:
                head = f.read(5)
                if len(head) == 0:
                    break
                length, is_marshal, is_sorted = unpack_header(head)
                assert (not is_sorted)
                total_size += length + 5
                d = f.read(length)
                if length != len(d):
                    raise IOError("length not match: expected %d, but got %d" %
                                  (length, len(d)))
                d = decompress(d)
                if is_marshal:
                    items = marshal.loads(d)
                else:
                    try:
                        items = pickle.loads(d)
                    except:
                        time.sleep(1)
                        items = pickle.loads(d)
                yield items

                # if TEST_RETRY and self.num_retry == 0:
                #    raise Exception("test_retry")

            if total_size != exp_size:
                raise IOError("fetch size not match: expected %d, but got %d" %
                              (exp_size, total_size))

            env.task_stats.bytes_fetch += exp_size
        finally:
            if f:
                f.close()
Ejemplo n.º 10
0
    def _fetch_missing(cls, key):
        result = {}
        urls = env.trackerClient.call(GetValueMessage('mutable_dict:%s' % key))
        for url in urls:
            f = urllib.request.urlopen(url)
            if f.code is not None and f.code != 200:
                raise IOError('Open %s failed:%s' % (url, f.code))

            data = f.read()
            if len(data) < 4:
                raise IOError('Transfer %s failed: %s received' %
                              (url, len(data)))

            length, = struct.unpack('<I', data[:4])
            if length != len(data):
                raise IOError('Transfer %s failed: %s received, %s expected' %
                              (url, len(data), length))

            data = cPickle.loads(decompress(data[4:]))
            for k, v in data.items():
                if k in result:
                    r = result[k]
                    if v[1] == r[1]:
                        r0 = r[0]
                        v0 = v[0]
                        merged = r0.value if isinstance(
                            r0, ConflictValues) else [r0]
                        merged += v0.value if isinstance(
                            v0, ConflictValues) else [v0]
                        result[k] = (ConflictValues(merged), r[1])
                    else:
                        result[k] = v if v[1] > r[1] else r
                else:
                    result[k] = v

        return result
Ejemplo n.º 11
0
    def statusUpdate(self, driver, status):
        def plot_progresses():
            if self.color:
                total = len(self.active_tasksets)
                logger.info('\x1b[2K\x1b[J\x1b[1A')
                for i, taskset_id in enumerate(self.active_tasksets):
                    if i == total - 1:
                        ending = '\x1b[%sA' % total
                    else:
                        ending = ''

                    tasksets = self.active_tasksets[taskset_id]
                    tasksets.progress(ending)

        mesos_task_id = status.task_id.value
        state = status.state
        reason = status.get('message')  # set by mesos
        data = status.get('data')

        logger.debug('status update: %s %s', mesos_task_id, state)

        ttid = TTID(mesos_task_id)

        taskset = self.active_tasksets.get(ttid.taskset_id)

        if taskset is None:
            if state == 'TASK_RUNNING':
                logger.debug('kill task %s as its taskset has gone',
                             mesos_task_id)
                self.driver.killTask(Dict(value=mesos_task_id))
            else:
                logger.debug('ignore task %s as its taskset has gone',
                             mesos_task_id)
            return

        if state == 'TASK_RUNNING':
            taskset.statusUpdate(ttid.task_id, ttid.task_try, state)
            if taskset.tasksFinished == 0:
                plot_progresses()
        else:
            if mesos_task_id not in taskset.ttids:
                logger.debug(
                    'ignore task %s as it has finished or failed, new msg: %s',
                    mesos_task_id, (state, reason))
            else:
                taskset.ttids.remove(mesos_task_id)
                if mesos_task_id in self.ttid_to_agent_id:
                    agent_id = self.ttid_to_agent_id[mesos_task_id]
                    if agent_id in self.agent_id_to_ttids:
                        self.agent_id_to_ttids[agent_id] -= 1
                    del self.ttid_to_agent_id[mesos_task_id]

                if state in ('TASK_FINISHED', 'TASK_FAILED') and data:
                    try:
                        reason, result, accUpdate, task_stats = cPickle.loads(
                            decode_data(data))
                        if result:
                            flag, data = result
                            if flag >= 2:
                                try:
                                    data = urllib.request.urlopen(data).read()
                                except IOError:
                                    # try again
                                    data = urllib.request.urlopen(data).read()
                                flag -= 2
                            data = decompress(data)
                            if flag == 0:
                                result = marshal.loads(data)
                            else:
                                result = cPickle.loads(data)
                        taskset.statusUpdate(ttid.task_id, ttid.task_try,
                                             state, reason, result, accUpdate,
                                             task_stats)
                        if state == 'TASK_FINISHED':
                            plot_progresses()
                    except Exception as e:
                        logger.warning(
                            'error when cPickle.loads(): %s, data:%s', e,
                            len(data))
                        state = 'TASK_FAILED'
                        taskset.statusUpdate(ttid.task_id, ttid.task_try,
                                             state, 'load failed: %s' % e)
                else:
                    # killed, lost
                    taskset.statusUpdate(ttid.task_id, ttid.task_try, state,
                                         reason or data)
Ejemplo n.º 12
0
    def statusUpdate(self, driver, status):
        def plot_progresses():
            if self.color:
                total = len(self.activeJobs)
                logger.info('\x1b[2K\x1b[J\x1b[1A')
                for i, job_id in enumerate(self.activeJobs):
                    if i == total - 1:
                        ending = '\x1b[%sA' % total
                    else:
                        ending = ''

                    jobs = self.activeJobs[job_id]
                    jobs.progress(ending)

        tid = status.task_id.value
        state = status.state
        logger.debug('status update: %s %s', tid, state)

        jid = self.taskIdToJobId.get(tid)
        _, task_id, tried = list(map(int, tid.split(':')))
        if state == 'TASK_RUNNING':
            if jid in self.activeJobs:
                job = self.activeJobs[jid]
                job.statusUpdate(task_id, tried, state)
                if job.tasksFinished == 0:
                    plot_progresses()
            else:
                logger.debug('kill task %s as its job has gone', tid)
                self.driver.killTask(Dict(value=tid))

            return

        self.taskIdToJobId.pop(tid, None)
        if jid in self.jobTasks:
            self.jobTasks[jid].remove(tid)
        if tid in self.taskIdToAgentId:
            agent_id = self.taskIdToAgentId[tid]
            if agent_id in self.agentTasks:
                self.agentTasks[agent_id] -= 1
            del self.taskIdToAgentId[tid]

        if jid not in self.activeJobs:
            logger.debug('ignore task %s as its job has gone', tid)
            return

        job = self.activeJobs[jid]
        reason = status.get('message')
        data = status.get('data')
        if state in ('TASK_FINISHED', 'TASK_FAILED') and data:
            try:
                reason, result, accUpdate, task_stats = cPickle.loads(
                    decode_data(data))
                if result:
                    flag, data = result
                    if flag >= 2:
                        try:
                            data = urllib.request.urlopen(data).read()
                        except IOError:
                            # try again
                            data = urllib.request.urlopen(data).read()
                        flag -= 2
                    data = decompress(data)
                    if flag == 0:
                        result = marshal.loads(data)
                    else:
                        result = cPickle.loads(data)
            except Exception as e:
                logger.warning('error when cPickle.loads(): %s, data:%s', e,
                               len(data))
                state = 'TASK_FAILED'
                job.statusUpdate(task_id, tried, state, 'load failed: %s' % e)
                return
            else:
                job.statusUpdate(task_id, tried, state, reason, result,
                                 accUpdate, task_stats)
                if state == 'TASK_FINISHED':
                    plot_progresses()
                return

        # killed, lost, load failed
        job.statusUpdate(task_id, tried, state, reason or data)