Example #1
0
File: cs.py Project: xunzhang/dpark
def read_chunk_from_local(chunkid, version, size, offset=0):
    if offset + size > CHUNKSIZE:
        raise ValueError("size too large %s > %s" % 
            (size, CHUNKSIZE-offset))
    
    from dpark.accumulator import LocalReadBytes
    name = '%02X/chunk_%016X_%08X.mfs' % (chunkid & 0xFF, chunkid, version)
    for d in mfsdirs:
        p = os.path.join(d, name)
        if os.path.exists(p):
            if os.path.getsize(p) < CHUNKHDRSIZE + offset + size:
                logger.error('%s is not completed: %d < %d', name,
                        os.path.getsize(p), CHUNKHDRSIZE + offset + size)
                return
                #raise ValueError("size too large")
            f = open(p)
            f.seek(CHUNKHDRSIZE + offset)
            while size > 0:
                to_read = min(size, 640*1024)
                data = f.read(to_read)
                if not data:
                    return
                LocalReadBytes.add(len(data))
                yield data
                size -= len(data)
            f.close()
            return
    else:
        logger.warning("%s was not found", name)
Example #2
0
File: cs.py Project: zofuthan/dpark
def read_chunk_from_local(chunkid, version, size, offset=0):
    if offset + size > CHUNKSIZE:
        raise ValueError("size too large %s > %s" % (size, CHUNKSIZE - offset))

    from dpark.accumulator import LocalReadBytes
    name = '%02X/chunk_%016X_%08X.mfs' % (chunkid & 0xFF, chunkid, version)
    for d in mfsdirs:
        p = os.path.join(d, name)
        if os.path.exists(p):
            if os.path.getsize(p) < CHUNKHDRSIZE + offset + size:
                logger.error('%s is not completed: %d < %d', name,
                             os.path.getsize(p), CHUNKHDRSIZE + offset + size)
                return
                #raise ValueError("size too large")
            f = open(p)
            f.seek(CHUNKHDRSIZE + offset)
            while size > 0:
                to_read = min(size, 640 * 1024)
                data = f.read(to_read)
                if not data:
                    return
                LocalReadBytes.add(len(data))
                yield data
                size -= len(data)
            f.close()
            return
    else:
        logger.warning("%s was not found", name)
Example #3
0
def read_chunk_from_local(chunkid, version, size, offset=0):
    if offset + size > CHUNKSIZE:
        raise ValueError("size too large %s > %s" % (size, CHUNKSIZE - offset))

    from dpark.accumulator import ReadBytes, LocalReadBytes
    name = '%02X/chunk_%016X_%08X.mfs' % (chunkid & 0xFF, chunkid, version)
    for d in mfsdirs:
        p = os.path.join(d, name)
        if os.path.exists(p):
            if os.path.getsize(p) < CHUNKHDRSIZE + offset + size:
                print p, 'is not completed', os.path.getsize(
                    p), '<', CHUNKHDRSIZE + offset + size
                return
                #raise ValueError("size too large")
            f = open(p)
            f.seek(CHUNKHDRSIZE + offset)
            while size > 0:
                to_read = min(size, 1024 * 1024 * 4)
                data = f.read(to_read)
                if not data:
                    return
                LocalReadBytes.add(len(data))
                yield data
                size -= len(data)
            f.close()
            return
Example #4
0
File: cs.py Project: cute/dpark
def read_chunk_from_local(chunkid, version, size, offset=0):
    if offset + size > CHUNKSIZE:
        raise ValueError("size too large %s > %s" % 
            (size, CHUNKSIZE-offset))
    
    from dpark.accumulator import ReadBytes, LocalReadBytes
    name = '%02X/chunk_%016X_%08X.mfs' % (chunkid & 0xFF, chunkid, version)
    for d in mfsdirs:
        p = os.path.join(d, name)
        if os.path.exists(p):
            if os.path.getsize(p) < CHUNKHDRSIZE + offset + size:
                print p, 'is not completed', os.path.getsize(p), '<', CHUNKHDRSIZE + offset + size
                return
                #raise ValueError("size too large")
            f = open(p)
            f.seek(CHUNKHDRSIZE + offset)
            while size > 0:
                to_read = min(size, 1024*1024*4)
                data = f.read(to_read)
                if not data:
                    return
                LocalReadBytes.add(len(data))
                yield data
                size -= len(data)
            f.close()
            return
Example #5
0
File: job.py Project: XuYong/dpark
    def taskFinished(self, tid, tried, result, update):
        i = self.tidToIndex[tid]
        self.finished[i] = True
        self.tasksFinished += 1
        task = self.tasks[i]
        task.used += time.time() - task.start
        self.total_used += task.used
        if sys.stderr.isatty():
            title = "Job %d: task %s finished in %.1fs (%d/%d)     " % (self.id, tid,
                task.used, self.tasksFinished, self.numTasks)
            logger.info("Task %s finished in %.1fs (%d/%d)      \x1b]2;%s\x07\x1b[1A",
                    tid, task.used, self.tasksFinished, self.numTasks, title)

        from dpark.schedule import Success
        self.sched.taskEnded(task, Success(), result, update)

        for t in range(task.tried):
            if t + 1 != tried:
                self.sched.killTask(self.id, task.id, t + 1)

        if self.tasksFinished == self.numTasks:
            ts = [t.used for t in self.tasks]
            tried = [t.tried for t in self.tasks]
            logger.info("Job %d finished in %.1fs: min=%.1fs, avg=%.1fs, max=%.1fs, maxtry=%d",
                self.id, time.time()-self.start,
                min(ts), sum(ts)/len(ts), max(ts), max(tried))
            from dpark.accumulator import LocalReadBytes, RemoteReadBytes
            lb, rb = LocalReadBytes.reset(), RemoteReadBytes.reset()
            if rb > 0:
                logger.info("read %s (%d%% localized)",
                    readable(lb+rb), lb*100/(rb+lb))
            
            self.sched.jobFinished(self)
Example #6
0
File: job.py Project: zhaochl/dpark
    def taskFinished(self, tid, tried, result, update):
        i = self.tidToIndex[tid]
        self.finished[i] = True
        self.tasksFinished += 1
        task = self.tasks[i]
        task.used += time.time() - task.start
        self.total_used += task.used
        if getattr(sys.stderr, 'isatty', lambda: False)():
            title = 'Job %d: task %s finished in %.1fs (%d/%d)     ' % (
                self.id, tid, task.used, self.tasksFinished, self.numTasks)
            logger.info(
                'Task %s finished in %.1fs (%d/%d)'
                '      \x1b]2;%s\x07\x1b[1A', tid, task.used,
                self.tasksFinished, self.numTasks, title)

        from dpark.schedule import Success
        self.sched.taskEnded(task, Success(), result, update)

        for t in range(task.tried):
            if t + 1 != tried:
                self.sched.killTask(self.id, task.id, t + 1)

        if self.tasksFinished == self.numTasks:
            ts = [t.used for t in self.tasks]
            tried = [t.tried for t in self.tasks]
            logger.info(
                'Job %d finished in %.1fs: min=%.1fs, '
                'avg=%.1fs, max=%.1fs, maxtry=%d', self.id,
                time.time() - self.start, min(ts),
                sum(ts) / len(ts), max(ts), max(tried))
            from dpark.accumulator import LocalReadBytes, RemoteReadBytes
            lb, rb = LocalReadBytes.reset(), RemoteReadBytes.reset()
            if rb > 0:
                logger.info('read %s (%d%% localized)', readable(lb + rb),
                            lb * 100 / (rb + lb))

            self.sched.jobFinished(self)