def modify_paths(object, relative=True, basedir=None): """Modify filenames in a data structure to either full paths or relative paths """ if not basedir: basedir = os.getcwd() if isinstance(object, dict): out = {} for key, val in sorted(object.items()): if isdefined(val): out[key] = modify_paths(val, relative=relative, basedir=basedir) elif isinstance(object, (list,tuple)): out = [] for val in object: if isdefined(val): out.append(modify_paths(val, relative=relative, basedir=basedir)) if isinstance(object, tuple): out = tuple(out) else: if isdefined(object): if isinstance(object, str) and os.path.isfile(object): if relative: if config.getboolean('execution','use_relative_paths'): out = relpath(object,start=basedir) else: out = object else: out = os.path.abspath(os.path.join(basedir,object)) if not os.path.exists(out): raise FileNotFoundError('File %s not found'%out) else: out = object return out
def _execute_in_series(self, updatehash=False, force_execute=None): """Executes a pre-defined pipeline in a serial order. Parameters ---------- updatehash : boolean Allows one to rerun a pipeline and update all the hashes without actually executing any of the underlying interfaces. This is useful when moving the working directory from one location to another. It is also useful when the hashing function itself changes (although we hope that this will not happen often). default [False] force_execute : list of strings This forces execution of a node even if updatehash is True """ # In the absence of a dirty bit on the object, generate the # parameterization each time before running logger.info("Running serially.") old_wd = os.getcwd() notrun = [] donotrun = [] for node in nx.topological_sort(self._execgraph): # Assign outputs from dependent executed nodes to current node. # The dependencies are stored as data on edges connecting # nodes. try: if node in donotrun: continue for edge in self._execgraph.in_edges_iter(node): data = self._execgraph.get_edge_data(*edge) logger.debug('setting input: %s->%s %s', edge[0], edge[1], str(data)) for sourceinfo, destname in data['connect']: self._set_node_input(node, destname, edge[0], sourceinfo) self._set_output_directory_base(node) redo = None if force_execute: if isinstance(force_execute, str): force_execute = [force_execute] redo = any([node.name.lower()==l.lower() \ for l in force_execute]) if updatehash and not redo: node.run(updatehash=updatehash) else: node.run(force_execute=redo) except: os.chdir(old_wd) if config.getboolean('execution', 'stop_on_first_crash'): raise # bare except, but i really don't know where a # node might fail crashfile = node._report_crash(execgraph=self._execgraph) # remove dependencies from queue subnodes = nx.dfs_preorder(self._execgraph, node) notrun.append(dict(node = node, dependents = subnodes, crashfile = crashfile)) donotrun.extend(subnodes) _report_nodes_not_run(notrun)
def clean_working_directory(outputs, cwd, inputs, needed_outputs, files2keep=None, dirs2keep=None): """Removes all files not needed for further analysis from the directory """ if not outputs: return outputs_to_keep = outputs.get().keys() if needed_outputs: outputs_to_keep = needed_outputs # build a list of needed files output_files = [] outputdict = outputs.get() for output in outputs_to_keep: output_files.extend(walk_outputs(outputdict[output])) needed_files = [path for path, type in output_files if type == 'f'] if config.getboolean('execution', 'keep_inputs'): input_files = [] inputdict = inputs.get() input_files.extend(walk_outputs(inputdict)) needed_files += [path for path, type in input_files if type == 'f'] for extra in ['_0x*.json', 'provenance.xml', 'pyscript*.m', 'command.txt', 'result*.pklz', '_inputs.pklz', '_node.pklz']: needed_files.extend(glob(os.path.join(cwd, extra))) if files2keep: needed_files.extend(filename_to_list(files2keep)) needed_dirs = [path for path, type in output_files if type == 'd'] if dirs2keep: needed_dirs.extend(filename_to_list(dirs2keep)) for extra in ['_nipype', '_report']: needed_dirs.extend(glob(os.path.join(cwd, extra))) logger.debug('Needed files: %s' % (';'.join(needed_files))) logger.debug('Needed dirs: %s' % (';'.join(needed_dirs))) files2remove = [] for f in walk_files(cwd): if f not in needed_files: if len(needed_dirs) == 0: files2remove.append(f) elif not any([f.startswith(dirname) for dirname in needed_dirs]): files2remove.append(f) logger.debug('Removing files: %s' % (';'.join(files2remove))) for f in files2remove: os.remove(f) for key in outputs.copyable_trait_names(): if key not in outputs_to_keep: setattr(outputs, key, Undefined) return outputs
def _execute_with_manager(self): """Executes a pre-defined pipeline is distributed approaches based on IPython's parallel processing interface """ if config.getboolean('execution', 'run_in_series'): self._execute_in_series() return # retrieve clients again if not self.taskclient: try: self.taskclient = self.ipyclient.TaskClient() except Exception, e: if isinstance(e, ConnectionRefusedError): warn("No clients found, running serially for now.") if isinstance(e, ValueError): warn("Ipython kernel not installed") self._execute_in_series() return
def modify_paths(object, relative=True, basedir=None): """Convert paths in data structure to either full paths or relative paths Supports combinations of lists, dicts, tuples, strs Parameters ---------- relative : boolean indicating whether paths should be set relative to the current directory basedir : default os.getcwd() what base directory to use as default """ if not basedir: basedir = os.getcwd() if isinstance(object, dict): out = {} for key, val in sorted(object.items()): if isdefined(val): out[key] = modify_paths(val, relative=relative, basedir=basedir) elif isinstance(object, (list, tuple)): out = [] for val in object: if isdefined(val): out.append(modify_paths(val, relative=relative, basedir=basedir)) if isinstance(object, tuple): out = tuple(out) else: if isdefined(object): if isinstance(object, str) and os.path.isfile(object): if relative: if config.getboolean('execution', 'use_relative_paths'): out = relpath(object, start=basedir) else: out = object else: out = os.path.abspath(os.path.join(basedir, object)) if not os.path.exists(out): raise FileNotFoundError('File %s not found' % out) else: out = object return out
def __init__(self, matlab_cmd = None, **inputs): """initializes interface to matlab (default 'matlab -nodesktop -nosplash') """ super(MatlabCommand,self).__init__(**inputs) if matlab_cmd and isdefined(matlab_cmd): self._cmd = matlab_cmd elif self._default_matlab_cmd: self._cmd = self._default_matlab_cmd if self._default_mfile and not isdefined(self.inputs.mfile): self.inputs.mfile = self._default_mfile if self._default_paths and not isdefined(self.inputs.paths): self.inputs.paths = self._default_paths if not isdefined(self.inputs.single_comp_thread): if config.getboolean('execution','single_thread_matlab'): self.inputs.single_comp_thread = True