Exemple #1
0
    def _fetch_missing(self, key):
        result = {}
        urls = env.trackerClient.call(GetValueMessage('mutable_dict:%s' % key))
        for url in urls:
            f = urllib.urlopen(url)
            if f.code is not None and f.code != 200:
                raise IOError('Open %s failed:%s' % (url, f.code))

            data = f.read()
            if len(data) < 4:
                raise IOError('Transfer %s failed: %s received' % (url, len(data)))

            length, = struct.unpack('<I', data[:4])
            if length != len(data):
                raise IOError('Transfer %s failed: %s received, %s expected' % (url,
                    len(data), length))

            data = cPickle.loads(decompress(data[4:]))
            for k,v in data.items():
                if k in result:
                    r = result[k]
                    if v[1] == r[1]:
                        r = r.value if isinstance(r, ConflictValues) else [r]
                        r += v.value if isinstance(v, ConflictValues) else [v]
                        result[k] = ConflictValues(r)
                    else:
                        result[k] = v if v[1] > r[1] else r
                else:
                    result[k] = v

        return result
Exemple #2
0
def run_task(task_data):
    try:
        gc.disable()
        task, ntry = cPickle.loads(decompress(task_data))
        setproctitle('dpark worker %s: run task %s' % (Script, task))

        Accumulator.clear()
        result = task.run(ntry)
        accUpdate = Accumulator.values()

        if marshalable(result):
            flag, data = 0, marshal.dumps(result)
        else:
            flag, data = 1, cPickle.dumps(result, -1)
        data = compress(data)

        if len(data) > TASK_RESULT_LIMIT:
            path = LocalFileShuffle.getOutputFile(0, ntry, task.id, len(data))
            f = open(path, 'w')
            f.write(data)
            f.close()
            data = '/'.join([LocalFileShuffle.getServerUri()] + path.split('/')[-3:])
            flag += 2

        return mesos_pb2.TASK_FINISHED, cPickle.dumps((Success(), (flag, data), accUpdate), -1)
    except FetchFailed, e:
        return mesos_pb2.TASK_FAILED, cPickle.dumps((e, None, None), -1)
Exemple #3
0
    def launchTask(self, driver, task):
        try:
            t, ntry = cPickle.loads(decompress(task.data))
            
            reply_status(driver, task, mesos_pb2.TASK_RUNNING)
            
            logging.debug("launch task %s", t.id)
            
            pool = self.get_idle_worker()
            self.busy_workers[task.task_id.value] = (task, pool)

            def callback((state, data)):
                with self.lock:
                    if task.task_id.value not in self.busy_workers:
                        return
                    _, pool = self.busy_workers.pop(task.task_id.value)
                    pool.done += 1
                    reply_status(driver, task, state, data)
                    if (len(self.idle_workers) + len(self.busy_workers) < self.parallel 
                            and len(self.idle_workers) < MAX_IDLE_WORKERS
                            and pool.done < MAX_TASKS_PER_WORKER 
                            and get_pool_memory(pool) < get_task_memory(task)): # maybe memory leak in executor
                        self.idle_workers.append((time.time(), pool))
                    else:
                        try: pool.terminate() 
                        except: pass
        
            pool.apply_async(run_task, [t, ntry], callback=callback)
    
        except Exception, e:
            import traceback
            msg = traceback.format_exc()
            reply_status(driver, task, mesos_pb2.TASK_LOST, msg)
            return
Exemple #4
0
def run_task(task_data):
    try:
        gc.disable()
        task, ntry = cPickle.loads(decompress(task_data))
        setproctitle('dpark worker %s: run task %s' % (Script, task))
        
        Accumulator.clear()
        result = task.run(ntry)
        accUpdate = Accumulator.values()

        if marshalable(result):
            flag, data = 0, marshal.dumps(result)
        else:
            flag, data = 1, cPickle.dumps(result, -1)
        data = compress(data)

        if len(data) > TASK_RESULT_LIMIT:
            workdir = env.get('WORKDIR')
            name = 'task_%s_%s.result' % (task.id, ntry)
            path = os.path.join(workdir, name) 
            f = open(path, 'w')
            f.write(data)
            f.close()
            data = LocalFileShuffle.getServerUri() + '/' + name
            flag += 2

        return mesos_pb2.TASK_FINISHED, cPickle.dumps((task.id, Success(), (flag, data), accUpdate), -1)
    except Exception, e:
        import traceback
        msg = traceback.format_exc()
        return mesos_pb2.TASK_FAILED, cPickle.dumps((task.id, OtherFailure(msg), None, None), -1)
Exemple #5
0
    def statusUpdate(self, driver, status):
        tid = status.task_id.value
        state = status.state
        logger.debug("status update: %s %s", tid, state)

        jid = self.taskIdToJobId.get(tid)
        _, task_id, tried = map(int, tid.split(':'))
        if state == mesos_pb2.TASK_RUNNING:
            if jid in self.activeJobs:
                job = self.activeJobs[jid]
                job.statusUpdate(task_id, tried, state)
            else:
                logger.debug('kill task %s as its job has gone', tid)
                self.driver.killTask(mesos_pb2.TaskID(value=tid))

            return

        self.taskIdToJobId.pop(tid, None)
        if jid in self.jobTasks:
            self.jobTasks[jid].remove(tid)
        if tid in self.taskIdToSlaveId:
            slave_id = self.taskIdToSlaveId[tid]
            if slave_id in self.slaveTasks:
                self.slaveTasks[slave_id] -= 1
            del self.taskIdToSlaveId[tid]

        if jid not in self.activeJobs:
            logger.debug('ignore task %s as its job has gone', tid)
            return

        job = self.activeJobs[jid]
        if state in (mesos_pb2.TASK_FINISHED,
                     mesos_pb2.TASK_FAILED) and status.data:
            try:
                reason, result, accUpdate = cPickle.loads(status.data)
                if result:
                    flag, data = result
                    if flag >= 2:
                        try:
                            data = urllib.urlopen(data).read()
                        except IOError:
                            # try again
                            data = urllib.urlopen(data).read()
                        flag -= 2
                    data = decompress(data)
                    if flag == 0:
                        result = marshal.loads(data)
                    else:
                        result = cPickle.loads(data)
            except Exception, e:
                logger.warning("error when cPickle.loads(): %s, data:%s", e,
                               len(status.data))
                state = mesos_pb2.TASK_FAILED
                return job.statusUpdate(task_id, tried, mesos_pb2.TASK_FAILED,
                                        'load failed: %s' % e)
            else:
                return job.statusUpdate(task_id, tried, state, reason, result,
                                        accUpdate)
Exemple #6
0
    def statusUpdate(self, driver, status):
        tid = status.task_id.value
        state = status.state
        logger.debug("status update: %s %s", tid, state)

        jid = self.taskIdToJobId.get(tid)
        _, task_id, tried = map(int, tid.split(':'))
        if state == mesos_pb2.TASK_RUNNING:
            if jid in self.activeJobs:
                job = self.activeJobs[jid]
                job.statusUpdate(task_id, tried, state)
            else:
                logger.debug('kill task %s as its job has gone', tid)
                self.driver.killTask(mesos_pb2.TaskID(value=tid))
            
            return

        self.taskIdToJobId.pop(tid, None)
        if jid in self.jobTasks:
            self.jobTasks[jid].remove(tid)
        if tid in self.taskIdToSlaveId:
            slave_id = self.taskIdToSlaveId[tid]
            if slave_id in self.slaveTasks:
                self.slaveTasks[slave_id] -= 1
            del self.taskIdToSlaveId[tid]

        if jid not in self.activeJobs:
            logger.debug('ignore task %s as its job has gone', tid)
            return

        job = self.activeJobs[jid]
        if state in (mesos_pb2.TASK_FINISHED, mesos_pb2.TASK_FAILED) and status.data:
            try:
                reason,result,accUpdate = cPickle.loads(status.data)
                if result:
                    flag, data = result
                    if flag >= 2:
                        try:
                            data = urllib.urlopen(data).read()
                        except IOError:
                            # try again
                            data = urllib.urlopen(data).read()
                        flag -= 2
                    data = decompress(data)
                    if flag == 0:
                        result = marshal.loads(data)
                    else:
                        result = cPickle.loads(data)
            except Exception, e:
                logger.warning("error when cPickle.loads(): %s, data:%s", e, len(status.data))
                state = mesos_pb2.TASK_FAILED
                return job.statusUpdate(task_id, tried, mesos_pb2.TASK_FAILED, 'load failed: %s' % e)
            else:
                return job.statusUpdate(task_id, tried, state,
                    reason, result, accUpdate)
Exemple #7
0
def run_task(task_data):
    try:
        gc.disable()
        task, ntry = cPickle.loads(decompress(task_data))
        Accumulator.clear()
        result = task.run(ntry)
        accUpdate = Accumulator.values()
        MutableDict.flush()

        if marshalable(result):
            try:
                flag, data = 0, marshal.dumps(result)
            except Exception, e:
                flag, data = 1, cPickle.dumps(result, -1)

        else:
Exemple #8
0
def run_task(task_data):
    try:
        gc.disable()
        task, ntry = cPickle.loads(decompress(task_data))
        Accumulator.clear()
        result = task.run(ntry)
        accUpdate = Accumulator.values()
        MutableDict.flush()

        if marshalable(result):
            try:
                flag, data = 0, marshal.dumps(result)
            except Exception, e:
                flag, data = 1, cPickle.dumps(result, -1)

        else:
Exemple #9
0
    def fetch_one(self, uri, shuffleId, part, reduceId):
        if uri == LocalFileShuffle.getServerUri():
            # urllib can open local file
            url = 'file://' + LocalFileShuffle.getOutputFile(
                shuffleId, part, reduceId)
        else:
            url = "%s/%d/%d/%d" % (uri, shuffleId, part, reduceId)
        logger.debug("fetch %s", url)

        tries = 2
        while True:
            try:
                f = urllib.request.urlopen(url)
                if f.code == 404:
                    f.close()
                    raise IOError("not found")

                d = f.read()
                flag = d[:1]
                length, = struct.unpack("I", d[1:5])
                if length != len(d):
                    raise ValueError(
                        "length not match: expected %d, but got %d" %
                        (length, len(d)))
                d = decompress(d[5:])
                f.close()
                if flag == b'm':
                    d = marshal.loads(d)
                elif flag == b'p':
                    d = six.moves.cPickle.loads(d)
                else:
                    raise ValueError("invalid flag")
                return d
            except Exception as e:
                logger.debug(
                    "Fetch failed for shuffle %d,"
                    " reduce %d, %d, %s, %s, try again", shuffleId, reduceId,
                    part, url, e)
                tries -= 1
                if not tries:
                    logger.warning(
                        "Fetch failed for shuffle %d,"
                        " reduce %d, %d, %s, %s", shuffleId, reduceId, part,
                        url, e)
                    from dpark.schedule import FetchFailed
                    raise FetchFailed(uri, shuffleId, part, reduceId)
                time.sleep(2**(2 - tries) * 0.1)
Exemple #10
0
def run_task(task_data):
    try:
        gc.disable()
        task, ntry = cPickle.loads(decompress(task_data))
        setproctitle('dpark worker %s: run task %s' % (Script, task))

        Accumulator.clear()
        result = task.run(ntry)
        accUpdate = Accumulator.values()

        if marshalable(result):
            try:
                flag, data = 0, marshal.dumps(result)
            except Exception, e:
                flag, data = 1, cPickle.dumps(result, -1)

        else:
Exemple #11
0
def run_task(task_data):
    try:
        gc.disable()
        task, ntry = cPickle.loads(decompress(task_data))
        setproctitle('dpark worker %s: run task %s' % (Script, task))

        Accumulator.clear()
        result = task.run(ntry)
        accUpdate = Accumulator.values()

        if marshalable(result):
            try:
                flag, data = 0, marshal.dumps(result)
            except Exception, e:
                flag, data = 1, cPickle.dumps(result, -1)

        else:
Exemple #12
0
    def fetch_one(self, uri, shuffleId, part, reduceId):
        if uri == LocalFileShuffle.getServerUri():
            # urllib can open local file
            url = LocalFileShuffle.getOutputFile(shuffleId, part, reduceId)
        else:
            url = "%s/%d/%d/%d" % (uri, shuffleId, part, reduceId)
        logger.debug("fetch %s", url)

        tries = 2
        while True:
            try:
                f = urllib.urlopen(url)
                if f.code == 404:
                    f.close()
                    raise IOError("not found")

                d = f.read()
                flag = d[:1]
                length, = struct.unpack("I", d[1:5])
                if length != len(d):
                    raise ValueError(
                        "length not match: expected %d, but got %d" %
                        (length, len(d)))
                d = decompress(d[5:])
                f.close()
                if flag == 'm':
                    d = marshal.loads(d)
                elif flag == 'p':
                    d = cPickle.loads(d)
                else:
                    raise ValueError("invalid flag")
                return d
            except Exception as e:
                logger.debug("Fetch failed for shuffle %d,"
                             " reduce %d, %d, %s, %s, try again",
                             shuffleId, reduceId, part, url, e)
                tries -= 1
                if not tries:
                    logger.warning("Fetch failed for shuffle %d,"
                                   " reduce %d, %d, %s, %s",
                                   shuffleId, reduceId, part, url, e)
                    from dpark.schedule import FetchFailed
                    raise FetchFailed(uri, shuffleId, part, reduceId)
                time.sleep(2 ** (2 - tries) * 0.1)
Exemple #13
0
def run_task(task_data):
    try:
        gc.disable()
        task, (job_id, ntry) = loads(decompress(task_data))
        tid = '%s:%s:%s' % (job_id, task.id, ntry)
        Accumulator.clear()
        result = task.run(tid)
        env.task_stats.bytes_max_rss = resource.getrusage(
            resource.RUSAGE_SELF).ru_maxrss * 1024
        accUpdate = Accumulator.values()
        MutableDict.flush()

        if marshalable(result):
            try:
                flag, data = 0, marshal.dumps(result)
            except Exception as e:
                flag, data = 1, six.moves.cPickle.dumps(result, -1)

        else:
            flag, data = 1, six.moves.cPickle.dumps(result, -1)
        data = compress(data)

        if len(data) > TASK_RESULT_LIMIT:
            path = LocalFileShuffle.getOutputFile(0, ntry, task.id, len(data))
            f = open(path, 'wb')
            f.write(data)
            f.close()
            data = '/'.join([LocalFileShuffle.getServerUri()] +
                            path.split('/')[-3:])
            flag += 2

        return 'TASK_FINISHED', six.moves.cPickle.dumps(
            (Success(), (flag, data), accUpdate, env.task_stats), -1)
    except FetchFailed as e:
        return 'TASK_FAILED', six.moves.cPickle.dumps((e, None, None, None),
                                                      -1)
    except:
        import traceback
        msg = traceback.format_exc()
        return 'TASK_FAILED', six.moves.cPickle.dumps(
            (OtherFailure(msg), None, None, None), -1)
    finally:
        gc.collect()
        gc.enable()
Exemple #14
0
def run_task(task_data):
    try:
        gc.disable()
        task, ntry = loads(decompress(task_data))
        Accumulator.clear()
        result = task.run(ntry)
        accUpdate = Accumulator.values()
        MutableDict.flush()

        if marshalable(result):
            try:
                flag, data = 0, marshal.dumps(result)
            except Exception as e:
                flag, data = 1, cPickle.dumps(result, -1)

        else:
            flag, data = 1, cPickle.dumps(result, -1)
        data = compress(data)

        if len(data) > TASK_RESULT_LIMIT:
            path = LocalFileShuffle.getOutputFile(0, ntry, task.id, len(data))
            f = open(path, 'w')
            f.write(data)
            f.close()
            data = '/'.join(
                [LocalFileShuffle.getServerUri()] + path.split('/')[-3:]
            )
            flag += 2

        return 'TASK_FINISHED', cPickle.dumps(
            (Success(), (flag, data), accUpdate), -1)
    except FetchFailed as e:
        return 'TASK_FAILED', cPickle.dumps((e, None, None), -1)
    except:
        import traceback
        msg = traceback.format_exc()
        return 'TASK_FAILED', cPickle.dumps(
            (OtherFailure(msg), None, None), -1)
    finally:
        close_mfs()
        gc.collect()
        gc.enable()
Exemple #15
0
    def load_stream(self, stream):
        while True:
            head = stream.read(5)
            if not head:
                return
            length, is_marshal, is_sorted = unpack_header(head)
            assert (is_sorted)
            buf = stream.read(length)
            if len(buf) < length:
                raise IOError("length not match: expected %d, but got %d" %
                              (length, len(buf)))

            buf = decompress(buf)
            AutoBatchedSerializer.size_loaded += len(buf)
            if is_marshal:
                vs = marshal.loads(buf)
            else:
                vs = pickle.loads(buf)
            for v in vs:
                yield v
Exemple #16
0
    def unsorted_batches(self):
        f = None
        #TEST_RETRY = True
        try:
            f, exp_size = self.open()
            total_size = 0

            while True:
                head = f.read(5)
                if len(head) == 0:
                    break
                length, is_marshal, is_sorted = unpack_header(head)
                assert (not is_sorted)
                total_size += length + 5
                d = f.read(length)
                if length != len(d):
                    raise IOError("length not match: expected %d, but got %d" %
                                  (length, len(d)))
                d = decompress(d)
                if is_marshal:
                    items = marshal.loads(d)
                else:
                    try:
                        items = pickle.loads(d)
                    except:
                        time.sleep(1)
                        items = pickle.loads(d)
                yield items

                #if TEST_RETRY and self.num_retry == 0:
                #    raise Exception("test_retry")

            if total_size != exp_size:
                raise IOError("fetch size not match: expected %d, but got %d" %
                              (exp_size, total_size))

            env.task_stats.bytes_fetch += exp_size
        finally:
            if f:
                f.close()
Exemple #17
0
    def fetch_one(self, uri, shuffleId, part, reduceId):
        if uri == LocalFileShuffle.getServerUri():
            # urllib can open local file
            url = LocalFileShuffle.getOutputFile(shuffleId, part, reduceId)
        else:
            url = "%s/%d/%d/%d" % (uri, shuffleId, part, reduceId)
        logger.debug("fetch %s", url)

        tries = 4
        while True:
            try:
                f = urllib.urlopen(url)
                if f.code == 404:
                    f.close()
                    raise IOError("%s not found", url)
                
                d = f.read()
                flag = d[:1]
                length, = struct.unpack("I", d[1:5])
                if length != len(d):
                    raise ValueError("length not match: expected %d, but got %d" % (length, len(d)))
                d = decompress(d[5:])
                f.close()
                if flag == 'm':
                    d = marshal.loads(d)
                elif flag == 'p':
                    d = cPickle.loads(d)
                else:
                    raise ValueError("invalid flag")
                return d
            except Exception, e:
                logger.debug("Fetch failed for shuffle %d, reduce %d, %d, %s, %s, try again",
                        shuffleId, reduceId, part, url, e)
                tries -= 1
                if not tries:
                    logger.error("Fetch failed for shuffle %d, reduce %d, %d, %s, %s",
                            shuffleId, reduceId, part, url, e)
                    raise
                time.sleep(2**(3-tries))
Exemple #18
0
    def _fetch_dct(self):
        f = urllib.request.urlopen(self.url)
        if f.code == 404:
            f.close()
            raise IOError("not found")

        d = f.read()
        flag = d[:1]
        length, = struct.unpack("I", d[1:5])
        if length != len(d):
            raise ValueError("length not match: expected %d, but got %d" %
                             (length, len(d)))
        env.task_stats.bytes_shuffle_read += length
        d = decompress(d[5:])
        f.close()
        if flag == b'm':
            d = marshal.loads(d)
        elif flag == b'p':
            d = pickle.loads(d)
        else:
            raise ValueError("invalid flag")
        return d
Exemple #19
0
    def launchTask(self, driver, task):
        try:
            t, ntry = cPickle.loads(decompress(task.data))

            reply_status(driver, task, mesos_pb2.TASK_RUNNING)

            logging.debug("launch task %s", t.id)

            pool = self.get_idle_worker()
            self.busy_workers[task.task_id.value] = (task, pool)

            def callback((state, data)):
                with self.lock:
                    if task.task_id.value not in self.busy_workers:
                        return
                    _, pool = self.busy_workers.pop(task.task_id.value)
                    pool.done += 1
                    reply_status(driver, task, state, data)
                    if (len(self.idle_workers) + len(self.busy_workers) <
                            self.parallel
                            and len(self.idle_workers) < MAX_IDLE_WORKERS
                            and pool.done < MAX_TASKS_PER_WORKER
                            and get_pool_memory(pool) < get_task_memory(task)
                        ):  # maybe memory leak in executor
                        self.idle_workers.append((time.time(), pool))
                    else:
                        try:
                            pool.terminate()
                        except:
                            pass

            pool.apply_async(run_task, [t, ntry], callback=callback)

        except Exception, e:
            import traceback
            msg = traceback.format_exc()
            reply_status(driver, task, mesos_pb2.TASK_LOST, msg)
            return
Exemple #20
0
    def statusUpdate(self, driver, status):

        def plot_progresses():
            if self.color:
                total = len(self.activeJobs)
                logger.info('\x1b[2K\x1b[J\x1b[1A')
                for i, jid in enumerate(self.activeJobs):
                    if i == total - 1:
                        ending = '\x1b[%sA' % total
                    else:
                        ending = ''

                    jobs = self.activeJobs[jid]
                    jobs.progress(ending)

        tid = status.task_id.value
        state = status.state
        logger.debug('status update: %s %s', tid, state)

        jid = self.taskIdToJobId.get(tid)
        _, task_id, tried = list(map(int, tid.split(':')))
        if state == 'TASK_RUNNING':
            if jid in self.activeJobs:
                job = self.activeJobs[jid]
                job.statusUpdate(task_id, tried, state)
                if job.tasksFinished == 0:
                    plot_progresses()
            else:
                logger.debug('kill task %s as its job has gone', tid)
                self.driver.killTask(Dict(value=tid))

            return

        self.taskIdToJobId.pop(tid, None)
        if jid in self.jobTasks:
            self.jobTasks[jid].remove(tid)
        if tid in self.taskIdToAgentId:
            agent_id = self.taskIdToAgentId[tid]
            if agent_id in self.agentTasks:
                self.agentTasks[agent_id] -= 1
            del self.taskIdToAgentId[tid]

        if jid not in self.activeJobs:
            logger.debug('ignore task %s as its job has gone', tid)
            return

        job = self.activeJobs[jid]
        reason = status.get('message')
        data = status.get('data')
        if state in ('TASK_FINISHED', 'TASK_FAILED') and data:
            try:
                reason, result, accUpdate, task_stats = six.moves.cPickle.loads(
                    decode_data(data))
                if result:
                    flag, data = result
                    if flag >= 2:
                        try:
                            data = urllib.request.urlopen(data).read()
                        except IOError:
                            # try again
                            data = urllib.request.urlopen(data).read()
                        flag -= 2
                    data = decompress(data)
                    if flag == 0:
                        result = marshal.loads(data)
                    else:
                        result = six.moves.cPickle.loads(data)
            except Exception as e:
                logger.warning(
                    'error when cPickle.loads(): %s, data:%s', e, len(data))
                state = 'TASK_FAILED'
                job.statusUpdate(task_id, tried, state, 'load failed: %s' % e)
                return
            else:
                job.statusUpdate(task_id, tried, state, reason, result, accUpdate, task_stats)
                if state == 'TASK_FINISHED':
                    plot_progresses()
                return

        # killed, lost, load failed
        job.statusUpdate(task_id, tried, state, reason or data)
Exemple #21
0
 def unBlockifyObject(self, blocks):
     s = ''.join(decompress(b.data) for b in blocks)
     try:
         return marshal.loads(s)
     except Exception :
         return cPickle.loads(s)
Exemple #22
0
    def statusUpdate(self, driver, status):
        tid = status.task_id.value
        state = status.state
        logger.debug('status update: %s %s', tid, state)

        jid = self.taskIdToJobId.get(tid)
        _, task_id, tried = map(int, tid.split(':'))
        if state == 'TASK_RUNNING':
            if jid in self.activeJobs:
                job = self.activeJobs[jid]
                job.statusUpdate(task_id, tried, state)
            else:
                logger.debug('kill task %s as its job has gone', tid)
                self.driver.killTask(Dict(value=tid))

            return

        self.taskIdToJobId.pop(tid, None)
        if jid in self.jobTasks:
            self.jobTasks[jid].remove(tid)
        if tid in self.taskIdToAgentId:
            agent_id = self.taskIdToAgentId[tid]
            if agent_id in self.agentTasks:
                self.agentTasks[agent_id] -= 1
            del self.taskIdToAgentId[tid]

        if jid not in self.activeJobs:
            logger.debug('ignore task %s as its job has gone', tid)
            return

        job = self.activeJobs[jid]
        data = status.get('data')
        if state in ('TASK_FINISHED', 'TASK_FAILED') and data:
            try:
                reason, result, accUpdate = cPickle.loads(
                    decode_data(data))
                if result:
                    flag, data = result
                    if flag >= 2:
                        try:
                            data = urllib.urlopen(data).read()
                        except IOError:
                            # try again
                            data = urllib.urlopen(data).read()
                        flag -= 2
                    data = decompress(data)
                    if flag == 0:
                        result = marshal.loads(data)
                    else:
                        result = cPickle.loads(data)
            except Exception as e:
                logger.warning(
                    'error when cPickle.loads(): %s, data:%s', e, len(data))
                state = 'TASK_FAILED'
                return job.statusUpdate(
                    task_id, tried, 'TASK_FAILED', 'load failed: %s' % e)
            else:
                return job.statusUpdate(task_id, tried, state,
                                        reason, result, accUpdate)

        # killed, lost, load failed
        job.statusUpdate(task_id, tried, state, data)
Exemple #23
0
 def unBlockifyObject(self, blocks):
     s = ''.join(decompress(b.data) for b in blocks)
     try:
         return marshal.loads(s)
     except Exception:
         return cPickle.loads(s)
Exemple #24
0
    def statusUpdate(self, driver, status):
        tid = status.task_id.value
        state = status.state
        logger.debug('status update: %s %s', tid, state)

        jid = self.taskIdToJobId.get(tid)
        _, task_id, tried = map(int, tid.split(':'))
        if state == 'TASK_RUNNING':
            if jid in self.activeJobs:
                job = self.activeJobs[jid]
                job.statusUpdate(task_id, tried, state)
            else:
                logger.debug('kill task %s as its job has gone', tid)
                self.driver.killTask(Dict(value=tid))

            return

        self.taskIdToJobId.pop(tid, None)
        if jid in self.jobTasks:
            self.jobTasks[jid].remove(tid)
        if tid in self.taskIdToAgentId:
            agent_id = self.taskIdToAgentId[tid]
            if agent_id in self.agentTasks:
                self.agentTasks[agent_id] -= 1
            del self.taskIdToAgentId[tid]

        if jid not in self.activeJobs:
            logger.debug('ignore task %s as its job has gone', tid)
            return

        job = self.activeJobs[jid]
        data = status.get('data')
        if state in ('TASK_FINISHED', 'TASK_FAILED') and data:
            try:
                reason, result, accUpdate = cPickle.loads(decode_data(data))
                if result:
                    flag, data = result
                    if flag >= 2:
                        try:
                            data = urllib.urlopen(data).read()
                        except IOError:
                            # try again
                            data = urllib.urlopen(data).read()
                        flag -= 2
                    data = decompress(data)
                    if flag == 0:
                        result = marshal.loads(data)
                    else:
                        result = cPickle.loads(data)
            except Exception as e:
                logger.warning('error when cPickle.loads(): %s, data:%s', e,
                               len(data))
                state = 'TASK_FAILED'
                return job.statusUpdate(task_id, tried, 'TASK_FAILED',
                                        'load failed: %s' % e)
            else:
                return job.statusUpdate(task_id, tried, state, reason, result,
                                        accUpdate)

        # killed, lost, load failed
        job.statusUpdate(task_id, tried, state, data)
Exemple #25
0
 def unBlockifyObject(self, blocks):
     s = ''.join(decompress(b.data) for b in blocks)
     if s[0] == '0':
         return marshal.loads(s[1:])
     else:
         return cPickle.loads(s[1:])
Exemple #26
0
 def unBlockifyObject(self, blocks):
     s = ''.join(decompress(b.data) for b in blocks)
     if s[0] == '0':
         return marshal.loads(s[1:])
     else:
         return cPickle.loads(s[1:])