def _readJobs(self, jobLimit): utils.ensureDirExists(self._dbPath, 'job database directory', JobError) candidates = [] for jobFile in fnmatch.filter(os.listdir(self._dbPath), 'job_*.txt'): try: # 2xsplit is faster than regex jobNum = int(jobFile.split(".")[0].split("_")[1]) except Exception: continue candidates.append((jobNum, jobFile)) (jobMap, maxJobs) = ({}, len(candidates)) activity = Activity('Reading job infos') idx = 0 for (jobNum, jobFile) in sorted(candidates): idx += 1 if (jobLimit >= 0) and (jobNum >= jobLimit): self._log.info('Stopped reading job infos at job #%d out of %d available job files, since the limit of %d jobs is reached', jobNum, len(candidates), jobLimit) break jobObj = self._load_job(os.path.join(self._dbPath, jobFile)) jobMap[jobNum] = jobObj if idx % 100 == 0: activity.update('Reading job infos %d [%d%%]' % (idx, (100.0 * idx) / maxJobs)) activity.finish() return jobMap
def _read_jobs(self, job_limit): ensure_dir_exists(self._path_db, 'job database directory', JobError) candidates = [] for job_fn in fnmatch.filter(os.listdir(self._path_db), 'job_*.txt'): try: # 2xsplit is faster than regex jobnum = int(job_fn.split(".")[0].split("_")[1]) except Exception: clear_current_exception() continue candidates.append((jobnum, job_fn)) (job_map, max_job_len) = ({}, len(candidates)) activity = Activity('Reading job infos') idx = 0 for (jobnum, job_fn) in sorted(candidates): idx += 1 if jobnum >= job_limit >= 0: self._log.info('Stopped reading job infos at job #%d out of %d available job files, ' + 'since the limit of %d jobs is reached', jobnum, len(candidates), job_limit) break try: job_fn_full = os.path.join(self._path_db, job_fn) data = self._fmt.parse(SafeFile(job_fn_full).iter_close()) job_obj = self._create_job_obj(job_fn_full, data) except Exception: raise JobError('Unable to process job file %r' % job_fn_full) job_map[jobnum] = job_obj activity.update('Reading job infos %d [%d%%]' % (idx, (100.0 * idx) / max_job_len)) activity.finish() return job_map
def create_tarball(match_info_iter, **kwargs): tar = tarfile.open(mode='w:gz', **kwargs) activity = Activity('Generating tarball') for match_info in match_info_iter: if isinstance(match_info, tuple): (path_source, path_target) = match_info else: (path_source, path_target) = (match_info, None) if isinstance(path_source, str): if not os.path.exists(path_source): raise PathError('File %s does not exist!' % path_source) tar.add(path_source, path_target or os.path.basename(path_source), recursive=False) elif path_source is None: # Update activity activity.update('Generating tarball: %s' % path_target) else: # File handle info, handle = path_source.get_tar_info() if path_target: info.name = path_target info.mtime = time.time() info.mode = stat.S_IRUSR + stat.S_IWUSR + stat.S_IRGRP + stat.S_IROTH if info.name.endswith('.sh') or info.name.endswith('.py'): info.mode += stat.S_IXUSR + stat.S_IXGRP + stat.S_IXOTH tar.addfile(info, handle) handle.close() activity.finish() tar.close()
def _read_jobs(self, job_limit): ensure_dir_exists(self._path_db, 'job database directory', JobError) candidates = [] for job_fn in fnmatch.filter(os.listdir(self._path_db), 'job_*.txt'): try: # 2xsplit is faster than regex jobnum = int(job_fn.split(".")[0].split("_")[1]) except Exception: clear_current_exception() continue candidates.append((jobnum, job_fn)) (job_map, max_job_len) = ({}, len(candidates)) activity = Activity('Reading job infos') idx = 0 for (jobnum, job_fn) in sorted(candidates): idx += 1 if jobnum >= job_limit >= 0: self._log.info( 'Stopped reading job infos at job #%d out of %d available job files, ' + 'since the limit of %d jobs is reached', jobnum, len(candidates), job_limit) break try: job_fn_full = os.path.join(self._path_db, job_fn) data = self._fmt.parse(SafeFile(job_fn_full).iter_close()) job_obj = self._create_job_obj(job_fn_full, data) except Exception: raise JobError('Unable to process job file %r' % job_fn_full) job_map[jobnum] = job_obj activity.update('Reading job infos %d [%d%%]' % (idx, (100.0 * idx) / max_job_len)) activity.finish() return job_map
def _read_jobs(self, job_limit): job_map = {} max_job_len = 0 if os.path.exists(self._db_fn): try: tar = zipfile.ZipFile(self._db_fn, 'r', zipfile.ZIP_DEFLATED) tar.testzip() except Exception: # Try to recover job archive clear_current_exception() self._log.warning('Job database is corrupted - starting recovery') self._recover_jobs() self._log.info('Recover completed!') activity = Activity('Reading job transactions') max_job_len = len(tar.namelist()) map_jobnum2tarfn = {} for idx, tar_info_fn in enumerate(tar.namelist()): (jobnum, tid) = tuple(imap(lambda s: int(s[1:]), tar_info_fn.split('_', 1))) if tid < map_jobnum2tarfn.get(jobnum, 0): continue try: data = self._fmt.parse(tar.open(tar_info_fn).read()) except Exception: clear_current_exception() continue job_map[jobnum] = self._create_job_obj(tar_info_fn, data) map_jobnum2tarfn[jobnum] = tid if idx % 100 == 0: activity.update('Reading job transactions %d [%d%%]' % (idx, (100.0 * idx) / max_job_len)) activity.finish() self._serial = max_job_len return job_map
def _readJobs(self, jobLimit): utils.ensureDirExists(self._dbPath, 'job database directory', JobError) candidates = [] for jobFile in fnmatch.filter(os.listdir(self._dbPath), 'job_*.txt'): try: # 2xsplit is faster than regex jobNum = int(jobFile.split(".")[0].split("_")[1]) except Exception: continue candidates.append((jobNum, jobFile)) (jobMap, maxJobs) = ({}, len(candidates)) activity = Activity('Reading job infos') idx = 0 for (jobNum, jobFile) in sorted(candidates): idx += 1 if (jobLimit >= 0) and (jobNum >= jobLimit): self._log.info( 'Stopped reading job infos at job #%d out of %d available job files, since the limit of %d jobs is reached', jobNum, len(candidates), jobLimit) break jobObj = self._load_job(os.path.join(self._dbPath, jobFile)) jobMap[jobNum] = jobObj if idx % 100 == 0: activity.update('Reading job infos %d [%d%%]' % (idx, (100.0 * idx) / maxJobs)) activity.finish() return jobMap
def _saveStateToTar(self, tar, meta, source, sourceLen, message): # Write the splitting info grouped into subtarfiles activity = Activity(message) (jobNum, lastValid, subTar) = (-1, -1, None) for jobNum, entry in enumerate(source): if not entry.get(DataSplitter.Invalid, False): lastValid = jobNum if jobNum % self._keySize == 0: self._closeSubTar(tar, subTar) subTar = self._createSubTar('%03dXX.tgz' % int(jobNum / self._keySize)) activity.update('%s [%d / %d]' % (message, jobNum, sourceLen)) # Determine shortest way to store file list tmp = entry.pop(DataSplitter.FileList) savelist = self._getReducedFileList(entry, tmp) # can modify entry # Write files with infos / filelist data = str.join('', self._fmt.format(entry, fkt = self._formatFileEntry) + lmap(lambda fn: '=%s\n' % fn, savelist)) self._addToSubTar(subTar, '%05d' % jobNum, data) # Remove common prefix from info if DataSplitter.CommonPrefix in entry: entry.pop(DataSplitter.CommonPrefix) entry[DataSplitter.FileList] = tmp self._closeSubTar(tar, subTar) activity.finish() # Write metadata to allow reconstruction of data splitter meta['MaxJobs'] = lastValid + 1 for (fn, data) in [('Metadata', self._fmt.format(meta)), ('Version', '2')]: self._addToTar(tar, fn, data)
def wait(timeout): activity = Activity('Waiting', parent='root') for remaining in irange(timeout, 0, -1): if abort(): return False if (remaining == timeout) or (remaining < 5) or (remaining % 5 == 0): activity.update('Waiting for %d seconds' % remaining) time.sleep(1) activity.finish() return True
def getEntries(self, path, metadata, events, seList, objStore): metadata['GC_SOURCE_DIR'] = self._path counter = 0 activity = Activity('Reading source directory') for fn in self._iter_path(): activity.update('Reading source directory - [%d]' % counter) yield (os.path.join(self._path, fn.strip()), metadata, events, seList, objStore) counter += 1 activity.finish()
def _readJobs(self, jobLimit): jobMap = {} maxJobs = 0 if os.path.exists(self._dbFile): try: tar = zipfile.ZipFile(self._dbFile, 'r', zipfile.ZIP_DEFLATED) except Exception: # Try to recover job archive self._log.warning( '=' * 40 + '\nStarting recovery of broken job database => Answer "y" if asked "Is this a single-disk archive?"!\n' + '=' * 40) os.system('zip -FF %s --out %s.tmp 2> /dev/null' % (self._dbFile, self._dbFile)) os.rename(self._dbFile, self._dbFile + '.broken') os.rename(self._dbFile + '.tmp', self._dbFile) tar = zipfile.ZipFile(self._dbFile, 'r', zipfile.ZIP_DEFLATED) removeFiles([self._dbFile + '.broken']) brokenList = [] for idx, fnTarInfo in enumerate(tar.namelist()): (jobNum, tid) = tuple( imap(lambda s: int(s[1:]), fnTarInfo.split('_', 1))) try: fp = tar.open(fnTarInfo) try: fp.read() finally: fp.close() except Exception: clear_current_exception() for broken in brokenList: os.system('zip %s -d %s' % (self._dbFile, broken)) self._log.info('Recover completed!') activity = Activity('Reading job transactions') maxJobs = len(tar.namelist()) tMap = {} for idx, fnTarInfo in enumerate(tar.namelist()): (jobNum, tid) = tuple( imap(lambda s: int(s[1:]), fnTarInfo.split('_', 1))) if tid < tMap.get(jobNum, 0): continue try: data = self._fmt.parse(tar.open(fnTarInfo).read()) except Exception: continue jobMap[jobNum] = self._create_job_obj(fnTarInfo, data) tMap[jobNum] = tid if idx % 100 == 0: activity.update('Reading job transactions %d [%d%%]' % (idx, (100.0 * idx) / maxJobs)) activity.finish() self._serial = maxJobs return jobMap
def getEntries(self, path, metadata, events, seList, objStore): activity = Activity('Reading job logs') for jobNum in self._selected: activity.update('Reading job logs - [%d / %d]' % (jobNum, self._selected[-1])) metadata['GC_JOBNUM'] = jobNum objStore.update({ 'GC_TASK': self._extTask, 'GC_WORKDIR': self._extWorkDir }) yield (os.path.join(self._extWorkDir, 'output', 'job_%d' % jobNum), metadata, events, seList, objStore) activity.finish()
def _readJobs(self, jobLimit): jobMap = {} maxJobs = 0 if os.path.exists(self._dbFile): try: tar = zipfile.ZipFile(self._dbFile, 'r', zipfile.ZIP_DEFLATED) except Exception: # Try to recover job archive self._log.warning('=' * 40 + '\nStarting recovery of broken job database => Answer "y" if asked "Is this a single-disk archive?"!\n' + '=' * 40) os.system('zip -FF %s --out %s.tmp 2> /dev/null' % (self._dbFile, self._dbFile)) os.rename(self._dbFile, self._dbFile + '.broken') os.rename(self._dbFile + '.tmp', self._dbFile) tar = zipfile.ZipFile(self._dbFile, 'r', zipfile.ZIP_DEFLATED) removeFiles([self._dbFile + '.broken']) brokenList = [] for idx, fnTarInfo in enumerate(tar.namelist()): (jobNum, tid) = tuple(imap(lambda s: int(s[1:]), fnTarInfo.split('_', 1))) try: fp = tar.open(fnTarInfo) try: fp.read() finally: fp.close() except Exception: clear_current_exception() for broken in brokenList: os.system('zip %s -d %s' % (self._dbFile, broken)) self._log.info('Recover completed!') activity = Activity('Reading job transactions') maxJobs = len(tar.namelist()) tMap = {} for idx, fnTarInfo in enumerate(tar.namelist()): (jobNum, tid) = tuple(imap(lambda s: int(s[1:]), fnTarInfo.split('_', 1))) if tid < tMap.get(jobNum, 0): continue try: data = self._fmt.parse(tar.open(fnTarInfo).read()) except Exception: continue jobMap[jobNum] = self._create_job_obj(fnTarInfo, data) tMap[jobNum] = tid if idx % 100 == 0: activity.update('Reading job transactions %d [%d%%]' % (idx, (100.0 * idx) / maxJobs)) activity.finish() self._serial = maxJobs return jobMap
def getEntries(self, path, metadata, events, seList, objStore): allDirs = lfilter(lambda fn: fn.startswith('job_'), os.listdir(self._extOutputDir)) activity = Activity('Reading job logs') for idx, dirName in enumerate(allDirs): activity.update('Reading job logs - [%d / %d]' % (idx, len(allDirs))) try: metadata['GC_JOBNUM'] = int(dirName.split('_')[1]) except Exception: continue objStore['GC_WORKDIR'] = self._extWorkDir if self._selector and not self._selector(metadata['GC_JOBNUM'], None): continue yield (os.path.join(self._extOutputDir, dirName), metadata, events, seList, objStore) activity.finish()
def write(cls, fn, pa): fp = ZipFile(fn, 'w') try: keys = sorted(ifilter(lambda p: not p.untracked, pa.getJobKeys())) fp.write('# %s\n' % json.dumps(keys)) maxN = pa.getMaxJobs() if maxN: activity = Activity('Writing parameter dump') for jobNum in irange(maxN): activity.update('Writing parameter dump [%d/%d]' % (jobNum + 1, maxN)) meta = pa.getJobInfo(jobNum) meta_str = str.join('\t', imap(lambda k: json.dumps(meta.get(k, '')), keys)) if meta.get(ParameterInfo.ACTIVE, True): fp.write('%d\t%s\n' % (jobNum, meta_str)) else: fp.write('%d!\t%s\n' % (jobNum, meta_str)) activity.finish() finally: fp.close()
def write(cls, fn, pa): fp = ZipFile(fn, 'w') try: keys = sorted(ifilter(lambda p: not p.untracked, pa.getJobKeys())) fp.write('# %s\n' % json.dumps(keys)) maxN = pa.getMaxJobs() if maxN: activity = Activity('Writing parameter dump') for jobNum in irange(maxN): activity.update('Writing parameter dump [%d/%d]' % (jobNum + 1, maxN)) meta = pa.getJobInfo(jobNum) meta_str = str.join( '\t', imap(lambda k: json.dumps(meta.get(k, '')), keys)) if meta.get(ParameterInfo.ACTIVE, True): fp.write('%d\t%s\n' % (jobNum, meta_str)) else: fp.write('%d!\t%s\n' % (jobNum, meta_str)) activity.finish() finally: fp.close()
def genTarball(outFile, fileList): tar = tarfile.open(outFile, 'w:gz') activity = Activity('Generating tarball') for (pathAbs, pathRel, pathStatus) in fileList: if pathStatus is True: # Existing file tar.add(pathAbs, pathRel, recursive = False) elif pathStatus is False: # Existing file if not os.path.exists(pathAbs): raise UserError('File %s does not exist!' % pathRel) tar.add(pathAbs, pathRel, recursive = False) elif pathStatus is None: # Directory activity.update('Generating tarball: %s' % pathRel) else: # File handle info, handle = pathStatus.getTarInfo() info.mtime = time.time() info.mode = stat.S_IRUSR + stat.S_IWUSR + stat.S_IRGRP + stat.S_IROTH if info.name.endswith('.sh') or info.name.endswith('.py'): info.mode += stat.S_IXUSR + stat.S_IXGRP + stat.S_IXOTH tar.addfile(info, handle) handle.close() activity.finish() tar.close()
def genTarball(outFile, fileList): tar = tarfile.open(outFile, 'w:gz') activity = Activity('Generating tarball') for (pathAbs, pathRel, pathStatus) in fileList: if pathStatus is True: # Existing file tar.add(pathAbs, pathRel, recursive=False) elif pathStatus is False: # Existing file if not os.path.exists(pathAbs): raise UserError('File %s does not exist!' % pathRel) tar.add(pathAbs, pathRel, recursive=False) elif pathStatus is None: # Directory activity.update('Generating tarball: %s' % pathRel) else: # File handle info, handle = pathStatus.getTarInfo() info.mtime = time.time() info.mode = stat.S_IRUSR + stat.S_IWUSR + stat.S_IRGRP + stat.S_IROTH if info.name.endswith('.sh') or info.name.endswith('.py'): info.mode += stat.S_IXUSR + stat.S_IXGRP + stat.S_IXOTH tar.addfile(info, handle) handle.close() activity.finish() tar.close()
def _saveStateToTar(self, tar, meta, source, sourceLen, message): # Write the splitting info grouped into subtarfiles activity = Activity(message) (jobNum, subTar) = (-1, None) for jobNum, entry in enumerate(source): if jobNum % 100 == 0: self._closeSubTar(tar, subTar) subTar = self._createSubTar('%03dXX.tgz' % int(jobNum / 100)) activity.update('%s [%d / %d]' % (message, jobNum, sourceLen)) # Determine shortest way to store file list tmp = entry.pop(DataSplitter.FileList) savelist = self._getReducedFileList(entry, tmp) # can modify entry # Write files with infos / filelist for name, data in [('list', str.join('\n', savelist)), ('info', self._fmt.format(entry, fkt = self._formatFileEntry))]: self._addToSubTar(subTar, os.path.join('%05d' % jobNum, name), data) # Remove common prefix from info if DataSplitter.CommonPrefix in entry: entry.pop(DataSplitter.CommonPrefix) entry[DataSplitter.FileList] = tmp self._closeSubTar(tar, subTar) # Write metadata to allow reconstruction of data splitter meta['MaxJobs'] = jobNum + 1 self._addToTar(tar, 'Metadata', self._fmt.format(meta)) activity.finish()