def read_chunk_from_local(chunkid, version, size, offset=0): if offset + size > CHUNKSIZE: raise ValueError("size too large %s > %s" % (size, CHUNKSIZE-offset)) from dpark.accumulator import LocalReadBytes name = '%02X/chunk_%016X_%08X.mfs' % (chunkid & 0xFF, chunkid, version) for d in mfsdirs: p = os.path.join(d, name) if os.path.exists(p): if os.path.getsize(p) < CHUNKHDRSIZE + offset + size: logger.error('%s is not completed: %d < %d', name, os.path.getsize(p), CHUNKHDRSIZE + offset + size) return #raise ValueError("size too large") f = open(p) f.seek(CHUNKHDRSIZE + offset) while size > 0: to_read = min(size, 640*1024) data = f.read(to_read) if not data: return LocalReadBytes.add(len(data)) yield data size -= len(data) f.close() return else: logger.warning("%s was not found", name)
def read_chunk_from_local(chunkid, version, size, offset=0): if offset + size > CHUNKSIZE: raise ValueError("size too large %s > %s" % (size, CHUNKSIZE - offset)) from dpark.accumulator import LocalReadBytes name = '%02X/chunk_%016X_%08X.mfs' % (chunkid & 0xFF, chunkid, version) for d in mfsdirs: p = os.path.join(d, name) if os.path.exists(p): if os.path.getsize(p) < CHUNKHDRSIZE + offset + size: logger.error('%s is not completed: %d < %d', name, os.path.getsize(p), CHUNKHDRSIZE + offset + size) return #raise ValueError("size too large") f = open(p) f.seek(CHUNKHDRSIZE + offset) while size > 0: to_read = min(size, 640 * 1024) data = f.read(to_read) if not data: return LocalReadBytes.add(len(data)) yield data size -= len(data) f.close() return else: logger.warning("%s was not found", name)
def read_chunk_from_local(chunkid, version, size, offset=0): if offset + size > CHUNKSIZE: raise ValueError("size too large %s > %s" % (size, CHUNKSIZE - offset)) from dpark.accumulator import ReadBytes, LocalReadBytes name = '%02X/chunk_%016X_%08X.mfs' % (chunkid & 0xFF, chunkid, version) for d in mfsdirs: p = os.path.join(d, name) if os.path.exists(p): if os.path.getsize(p) < CHUNKHDRSIZE + offset + size: print p, 'is not completed', os.path.getsize( p), '<', CHUNKHDRSIZE + offset + size return #raise ValueError("size too large") f = open(p) f.seek(CHUNKHDRSIZE + offset) while size > 0: to_read = min(size, 1024 * 1024 * 4) data = f.read(to_read) if not data: return LocalReadBytes.add(len(data)) yield data size -= len(data) f.close() return
def read_chunk_from_local(chunkid, version, size, offset=0): if offset + size > CHUNKSIZE: raise ValueError("size too large %s > %s" % (size, CHUNKSIZE-offset)) from dpark.accumulator import ReadBytes, LocalReadBytes name = '%02X/chunk_%016X_%08X.mfs' % (chunkid & 0xFF, chunkid, version) for d in mfsdirs: p = os.path.join(d, name) if os.path.exists(p): if os.path.getsize(p) < CHUNKHDRSIZE + offset + size: print p, 'is not completed', os.path.getsize(p), '<', CHUNKHDRSIZE + offset + size return #raise ValueError("size too large") f = open(p) f.seek(CHUNKHDRSIZE + offset) while size > 0: to_read = min(size, 1024*1024*4) data = f.read(to_read) if not data: return LocalReadBytes.add(len(data)) yield data size -= len(data) f.close() return
def taskFinished(self, tid, tried, result, update): i = self.tidToIndex[tid] self.finished[i] = True self.tasksFinished += 1 task = self.tasks[i] task.used += time.time() - task.start self.total_used += task.used if sys.stderr.isatty(): title = "Job %d: task %s finished in %.1fs (%d/%d) " % (self.id, tid, task.used, self.tasksFinished, self.numTasks) logger.info("Task %s finished in %.1fs (%d/%d) \x1b]2;%s\x07\x1b[1A", tid, task.used, self.tasksFinished, self.numTasks, title) from dpark.schedule import Success self.sched.taskEnded(task, Success(), result, update) for t in range(task.tried): if t + 1 != tried: self.sched.killTask(self.id, task.id, t + 1) if self.tasksFinished == self.numTasks: ts = [t.used for t in self.tasks] tried = [t.tried for t in self.tasks] logger.info("Job %d finished in %.1fs: min=%.1fs, avg=%.1fs, max=%.1fs, maxtry=%d", self.id, time.time()-self.start, min(ts), sum(ts)/len(ts), max(ts), max(tried)) from dpark.accumulator import LocalReadBytes, RemoteReadBytes lb, rb = LocalReadBytes.reset(), RemoteReadBytes.reset() if rb > 0: logger.info("read %s (%d%% localized)", readable(lb+rb), lb*100/(rb+lb)) self.sched.jobFinished(self)
def taskFinished(self, tid, tried, result, update): i = self.tidToIndex[tid] self.finished[i] = True self.tasksFinished += 1 task = self.tasks[i] task.used += time.time() - task.start self.total_used += task.used if getattr(sys.stderr, 'isatty', lambda: False)(): title = 'Job %d: task %s finished in %.1fs (%d/%d) ' % ( self.id, tid, task.used, self.tasksFinished, self.numTasks) logger.info( 'Task %s finished in %.1fs (%d/%d)' ' \x1b]2;%s\x07\x1b[1A', tid, task.used, self.tasksFinished, self.numTasks, title) from dpark.schedule import Success self.sched.taskEnded(task, Success(), result, update) for t in range(task.tried): if t + 1 != tried: self.sched.killTask(self.id, task.id, t + 1) if self.tasksFinished == self.numTasks: ts = [t.used for t in self.tasks] tried = [t.tried for t in self.tasks] logger.info( 'Job %d finished in %.1fs: min=%.1fs, ' 'avg=%.1fs, max=%.1fs, maxtry=%d', self.id, time.time() - self.start, min(ts), sum(ts) / len(ts), max(ts), max(tried)) from dpark.accumulator import LocalReadBytes, RemoteReadBytes lb, rb = LocalReadBytes.reset(), RemoteReadBytes.reset() if rb > 0: logger.info('read %s (%d%% localized)', readable(lb + rb), lb * 100 / (rb + lb)) self.sched.jobFinished(self)