def flatten(files): res = [] if not isinstance(files, list): files = [files] for entry in files: entry = os.path.expanduser(entry) if fs.isdir(entry): res += fs.ls(entry) elif fs.isfile(entry): res.append(entry) return res
def process_workflow(self, store, stats, wflow): files = set(fs.ls(wflow.label)) delete = [] merged = [] missing = [] cleaned = any(w.cleanup_input for w in wflow.dependents) for task, task_type in store.failed_tasks(wflow.label): for _, filename in wflow.get_outputs(task): if filename in files: logger.info( "found output from failed task: {0}".format(filename)) stats[wflow.label][0] += 1 delete.append(filename) for task, task_type in store.merged_tasks(wflow.label): for _, filename in wflow.get_outputs(task): if filename in files: logger.info( "found output from intermediate merged task: {0}". format(filename)) stats[wflow.label][1] += 1 merged.append(filename) if cleaned: for w in wflow.dependents: if not w.cleanup_input: continue if store.unfinished_units(w.label) == 0: for fn in files: logger.warning( 'found output from tasks that should have been cleaned up: {}' .format(fn)) stats[wflow.label][2] += len(files) delete.extend(files) else: logger.error( "can't validate workflow {}, as its dependents have not completed and cleaned it up" .format(wflow.label)) else: for task, task_type in store.successful_tasks(wflow.label): for _, filename in wflow.get_outputs(task): if filename not in files: missing.append(task) logger.warning( 'output file is missing for {0}'.format(task)) return delete, merged, missing
def validate(self): with fs.alternative(): if not self.dataset.validate(): msg = "cannot validate configuration for dataset of workflow '{0}'" raise AttributeError(msg.format(self.label)) if fs.exists(self.label) and len(list(fs.ls(self.label))) > 0: msg = "stageout directory for '{0}' is not empty" raise IOError(msg.format(self.label)) else: # try to create the stageout directory. if this fails, the # user does not have access... try: fs.makedirs(self.label) except Exception: msg = "failed to create stageout directory for '{0}'" raise IOError(msg.format(self.label))
def process_workflow(self, store, stats, wflow): files = set(fs.ls(wflow.label)) delete = [] missing = [] cleaned = any(w.cleanup_input for w in wflow.dependents) for task, task_type in store.failed_tasks(wflow.label): for _, filename in wflow.get_outputs(task): if filename in files: logger.info("found output from failed task: {0}".format(filename)) stats[wflow.label][0] += 1 delete.append(filename) for task, task_type in store.merged_tasks(wflow.label): for _, filename in wflow.get_outputs(task): if filename in files: logger.info("found output from intermediate merged task: {0}".format(filename)) stats[wflow.label][1] += 1 delete.append(filename) if cleaned: for w in wflow.dependents: if not w.cleanup_input: continue if store.unfinished_units(w.label) == 0: for fn in files: logger.warning('found output from tasks that should have been cleaned up: {}'.format(fn)) stats[wflow.label][2] += len(files) delete.extend(files) else: logger.error("can't validate workflow {}, as its dependents have not completed and cleaned it up".format(wflow.label)) else: for task, task_type in store.successful_tasks(wflow.label): for _, filename in wflow.get_outputs(task): if filename not in files: missing.append(task) logger.warning('output file is missing for {0}'.format(task)) return delete, missing
def flatten(files, matches=None): """Flatten a list of directories or files to a single list of files. Parameters ---------- files : str or list A list of paths to expand. Can also be a string containing a path. matches : list A list of patterns to match files against. Only successfully matched files will be returned. Returns ------- files : list A list of files found in the paths passed in the input parameter `files`, optionally matching the extensions in `exts`. """ def matchfn(fn): base = os.path.basename(fn) for m in matches: if fnmatch.fnmatch(base, m): return True return False res = [] if not isinstance(files, list): files = [files] for entry in files: entry = os.path.expanduser(entry) if fs.isdir(entry): res.extend(fs.ls(entry)) elif fs.isfile(entry): res.append(entry) if matches: return [fn for fn in res if matchfn(fn)] return res