Beispiel #1
0
def clean_working_directory(outputs, cwd, inputs, needed_outputs, config,
                            files2keep=None, dirs2keep=None):
    """Removes all files not needed for further analysis from the directory
    """
    if not outputs:
        return
    outputs_to_keep = outputs.get().keys()
    if needed_outputs and \
       str2bool(config['execution']['remove_unnecessary_outputs']):
        outputs_to_keep = needed_outputs
    # build a list of needed files
    output_files = []
    outputdict = outputs.get()
    for output in outputs_to_keep:
        output_files.extend(walk_outputs(outputdict[output]))
    needed_files = [path for path, type in output_files if type == 'f']
    if str2bool(config['execution']['keep_inputs']):
        input_files = []
        inputdict = inputs.get()
        input_files.extend(walk_outputs(inputdict))
        needed_files += [path for path, type in input_files if type == 'f']
    for extra in ['_0x*.json', 'provenance.xml', 'pyscript*.m',
                  'command.txt', 'result*.pklz', '_inputs.pklz', '_node.pklz']:
        needed_files.extend(glob(os.path.join(cwd, extra)))
    if files2keep:
        needed_files.extend(filename_to_list(files2keep))
    needed_dirs = [path for path, type in output_files if type == 'd']
    if dirs2keep:
        needed_dirs.extend(filename_to_list(dirs2keep))
    for extra in ['_nipype', '_report']:
        needed_dirs.extend(glob(os.path.join(cwd, extra)))
    logger.debug('Needed files: %s' % (';'.join(needed_files)))
    logger.debug('Needed dirs: %s' % (';'.join(needed_dirs)))
    files2remove = []
    if str2bool(config['execution']['remove_unnecessary_outputs']):
        for f in walk_files(cwd):
            if f not in needed_files:
                if len(needed_dirs) == 0:
                    files2remove.append(f)
                elif not any([f.startswith(dirname) for dirname in needed_dirs]):
                    files2remove.append(f)
    else:
        if not str2bool(config['execution']['keep_inputs']):
            input_files = []
            inputdict = inputs.get()
            input_files.extend(walk_outputs(inputdict))
            input_files = [path for path, type in input_files if type == 'f']
            for f in walk_files(cwd):
                if f in input_files and f not in needed_files:
                    files2remove.append(f)
    logger.debug('Removing files: %s' % (';'.join(files2remove)))
    for f in files2remove:
        os.remove(f)
    for key in outputs.copyable_trait_names():
        if key not in outputs_to_keep:
            setattr(outputs, key, Undefined)
    return outputs
def test_str2bool():
    yield assert_true, str2bool("yes")
    yield assert_true, str2bool("true")
    yield assert_true, str2bool("t")
    yield assert_true, str2bool("1")
    yield assert_false, str2bool("no")
    yield assert_false, str2bool("false")
    yield assert_false, str2bool("n")
    yield assert_false, str2bool("f")
    yield assert_false, str2bool("0")
Beispiel #3
0
def test_str2bool():
    yield assert_true, str2bool("yes")
    yield assert_true, str2bool("true")
    yield assert_true, str2bool("t")
    yield assert_true, str2bool("1")
    yield assert_false, str2bool("no")
    yield assert_false, str2bool("false")
    yield assert_false, str2bool("n")
    yield assert_false, str2bool("f")
    yield assert_false, str2bool("0")
def run_instance(interface, options):
    if interface:
        print("setting function inputs")

        for input_name, _ in list(interface.inputs.items()):
            if getattr(options, input_name) != None:
                value = getattr(options, input_name)
                if not isinstance(value, bool):
                    # traits cannot cast from string to float or int
                    try:
                        value = float(value)
                    except:
                        pass
                    # try to cast string input to boolean
                    try:
                        value = str2bool(value)
                    except:
                        pass
                try:
                    setattr(interface.inputs, input_name,
                            value)
                except ValueError as e:
                    print("Error when setting the value of %s: '%s'" % (input_name, str(e)))

        print(interface.inputs)
        res = interface.run()
        print(res.outputs)
Beispiel #5
0
def run_instance(interface, options):
    if interface:
        print("setting function inputs")

        for input_name, _ in list(interface.inputs.items()):
            if getattr(options, input_name) != None:
                value = getattr(options, input_name)
                if not isinstance(value, bool):
                    # traits cannot cast from string to float or int
                    try:
                        value = float(value)
                    except:
                        pass
                    # try to cast string input to boolean
                    try:
                        value = str2bool(value)
                    except:
                        pass
                try:
                    setattr(interface.inputs, input_name, value)
                except ValueError as e:
                    print("Error when setting the value of %s: '%s'" %
                          (input_name, str(e)))

        print(interface.inputs)
        res = interface.run()
        print(res.outputs)
Beispiel #6
0
    def _get_hashval(self):
        """Return a hash of the input state"""
        self._get_inputs()
        if self._hashvalue is None and self._hashed_inputs is None:

            inputs = copy.deepcopy(self._interface.inputs)
            for f in self.ignore_cache:
                try:
                    delattr(inputs, f)
                except:
                    pass

            self._hashed_inputs, self._hashvalue = inputs.get_hashval(
                hash_method=self.config['execution']['hash_method'])

            rm_extra = self.config['execution']['remove_unnecessary_outputs']
            if str2bool(rm_extra) and self.needed_outputs:
                hashobject = md5()
                hashobject.update(self._hashvalue.encode())
                hashobject.update(str(self.needed_outputs).encode())
                self._hashvalue = hashobject.hexdigest()
                self._hashed_inputs.append(
                    ('needed_outputs', self.needed_outputs))

        return self._hashed_inputs, self._hashvalue
Beispiel #7
0
Datei: io.py Projekt: heffjos/ecp
    def _list_outputs(self):
        """Execute this module.
        """

        # Init variables
        outputs = self.output_spec().get()
        out_files = []
        # Use hardlink
        use_hardlink = str2bool(
            config.get('execution', 'try_hard_link_datasink'))

        outdir = os.path.abspath(self.inputs.base_directory)

        # Iterate through outputs attributes {key : path(s)}
        for key, files in list(self.inputs._outputs.items()):
            if not isdefined(files):
                continue
            files = ensure_list(files)

            # flattening list
            if isinstance(files, list):
                if isinstance(files[0], list):
                    files = [item for sublist in files for item in sublist]

            # Iterate through passed-in source files
            for src in ensure_list(files):
                # Format src and dst files
                src = os.path.abspath(src)
                if not os.path.isfile(src):
                    src = os.path.join(src, '')
                dst = self._get_dst(src)
                dst = os.path.join(outdir, dst)

                # If src is a file, copy it to dst
                if os.path.isfile(src):
                    copyfile(src,
                             dst,
                             copy=True,
                             hashmethod='content',
                             use_hardlink=use_hardlink)
                    out_files.append(dst)
                # If src is a directory, copy entire contents to dst dir
                elif os.path.isdir(src):
                    if os.path.exists(dst) and self.inputs.remove_dest_dir:
                        shutil.rmtree(dst)
                    copytree(src, dst)
                    out_files.append(dst)

        # Return outputs dictionary
        outputs['out_file'] = out_files

        return outputs
Beispiel #8
0
    def _make_empty_results(self):
        finalresult = InterfaceResult(
            interface=[], runtime=[], provenance=[], inputs=[], outputs=self.outputs
        )
        if self.outputs:
            assert self.config is not None
            for key, _ in list(self.outputs.items()):
                rm_extra = self.config["execution"]["remove_unnecessary_outputs"]
                if str2bool(rm_extra) and self.needed_outputs:
                    if key not in self.needed_outputs:
                        continue
                    setattr(finalresult.outputs, key, list())

        return finalresult
Beispiel #9
0
    def _get_hashval(self):
        """Compute hash including iterfield lists."""
        self._get_inputs()

        if self._hashvalue is not None and self._hashed_inputs is not None:
            return self._hashed_inputs, self._hashvalue

        self._check_iterfield()
        hashinputs = copy.deepcopy(self._interface.inputs)
        for name in self.iterfield:
            hashinputs.remove_trait(name)
            hashinputs.add_trait(
                name,
                InputMultiPath(
                    self._interface.inputs.traits()[name].trait_type))
            logger.debug('setting hashinput %s-> %s', name,
                         getattr(self._inputs, name))
            if self.nested:
                setattr(hashinputs, name, flatten(getattr(self._inputs, name)))
            else:
                setattr(hashinputs, name, getattr(self._inputs, name))

        for f in self.ignore_cache:
            try:
                delattr(hashinputs, f)
            except:
                pass

        hashed_inputs, hashvalue = hashinputs.get_hashval(
            hash_method=self.config['execution']['hash_method'])
        rm_extra = self.config['execution']['remove_unnecessary_outputs']
        if str2bool(rm_extra) and self.needed_outputs:
            hashobject = md5()
            hashobject.update(hashvalue.encode())
            sorted_outputs = sorted(self.needed_outputs)
            hashobject.update(str(sorted_outputs).encode())
            hashvalue = hashobject.hexdigest()
            hashed_inputs.append(('needed_outputs', sorted_outputs))
        self._hashed_inputs, self._hashvalue = hashed_inputs, hashvalue
        return self._hashed_inputs, self._hashvalue
Beispiel #10
0
def test_str2bool(string, expected):
    assert str2bool(string) == expected
Beispiel #11
0
    def _list_outputs(self):
        """Execute this module.
        """

        # Init variables
        outputs = self.output_spec().get()
        out_files = []
        # Use hardlink
        use_hardlink = str2bool(
            config.get('execution', 'try_hard_link_datasink'))

        # Set local output directory if specified
        if isdefined(self.inputs.local_copy):
            outdir = self.inputs.local_copy
        else:
            outdir = self.inputs.base_directory
            # If base directory isn't given, assume current directory
            if not isdefined(outdir):
                outdir = '.'

        # Check if base directory reflects S3 bucket upload
        s3_flag, bucket_name = self._check_s3_base_dir()
        if s3_flag:
            s3dir = self.inputs.base_directory
            # If user overrides bucket object, use that
            if self.inputs.bucket:
                bucket = self.inputs.bucket
            # Otherwise fetch bucket object using name
            else:
                try:
                    bucket = self._fetch_bucket(bucket_name)
                # If encountering an exception during bucket access, set output
                # base directory to a local folder
                except Exception as exc:
                    s3dir = '<N/A>'
                    if not isdefined(self.inputs.local_copy):
                        local_out_exception = os.path.join(
                            os.path.expanduser('~'),
                            's3_datasink_' + bucket_name)
                        outdir = local_out_exception
                    # Log local copying directory
                    iflogger.info(
                        'Access to S3 failed! Storing outputs locally at: '
                        '%s\nError: %s', outdir, exc)
        else:
            s3dir = '<N/A>'

        # If container input is given, append that to outdir
        if isdefined(self.inputs.container):
            outdir = os.path.join(outdir, self.inputs.container)
            s3dir = os.path.join(s3dir, self.inputs.container)

        # If sinking to local folder
        if outdir != s3dir:
            outdir = os.path.abspath(outdir)
            # Create the directory if it doesn't exist
            if not os.path.exists(outdir):
                try:
                    os.makedirs(outdir)
                except OSError as inst:
                    if 'File exists' in inst.strerror:
                        pass
                    else:
                        raise (inst)

        # Iterate through outputs attributes {key : path(s)}
        for key, files in list(self.inputs._outputs.items()):
            if not isdefined(files):
                continue
            iflogger.debug("key: %s files: %s", key, str(files))
            files = ensure_list(files if files else [])
            tempoutdir = outdir
            if s3_flag:
                s3tempoutdir = s3dir
            for d in key.split('.'):
                if d[0] == '@':
                    continue
                tempoutdir = os.path.join(tempoutdir, d)
                if s3_flag:
                    s3tempoutdir = os.path.join(s3tempoutdir, d)

            # flattening list
            if files and isinstance(files, list):
                if isinstance(files[0], list):
                    files = [item for sublist in files for item in sublist]

            # Iterate through passed-in source files
            for src in ensure_list(files):
                # Format src and dst files
                src = os.path.abspath(src)
                if not os.path.isfile(src):
                    src = os.path.join(src, '')
                dst = self._get_dst(src)
                if s3_flag:
                    s3dst = os.path.join(s3tempoutdir, dst)
                    s3dst = self._substitute(s3dst)
                dst = os.path.join(tempoutdir, dst)
                dst = self._substitute(dst)
                path, _ = os.path.split(dst)

                # If we're uploading to S3
                if s3_flag:
                    self._upload_to_s3(bucket, src, s3dst)
                    out_files.append(s3dst)
                # Otherwise, copy locally src -> dst
                if not s3_flag or isdefined(self.inputs.local_copy):
                    # Create output directory if it doesn't exist
                    if not os.path.exists(path):
                        try:
                            os.makedirs(path)
                        except OSError as inst:
                            if 'File exists' in inst.strerror:
                                pass
                            else:
                                raise (inst)
                    # If src == dst, it's already home
                    if (not os.path.exists(dst)) or (os.stat(src) !=
                                                     os.stat(dst)):
                        # If src is a file, copy it to dst
                        if os.path.isfile(src):
                            iflogger.debug(f'copyfile: {src} {dst}')
                            copyfile(src,
                                     dst,
                                     copy=True,
                                     hashmethod='content',
                                     use_hardlink=use_hardlink)
                        # If src is a directory, copy
                        # entire contents to dst dir
                        elif os.path.isdir(src):
                            if (os.path.exists(dst)
                                    and self.inputs.remove_dest_dir):
                                iflogger.debug('removing: %s', dst)
                                shutil.rmtree(dst)
                            iflogger.debug('copydir: %s %s', src, dst)
                            copytree(src, dst)
                            out_files.append(dst)

        # Return outputs dictionary
        outputs['out_file'] = out_files

        return outputs
Beispiel #12
0
def test_str2bool(string, expected):
    assert str2bool(string) == expected
Beispiel #13
0
def run_workflow(workflow, parser=None, qsubargs=None):
    """

    A common function to use by all pipelines to run workflow

    :param workflow: workflow to run
    :param parser: the parser from which to take the ret of the arguments:
        n_proc: the (maximum) amount of processors to be used in the MultiProc mode
        username: the username to use as login in the cluster, to avoid the workflow to stop when not connected
        use_qsub: boolean to force *not* to use the cluster mode
        openmp_core: the number of cores to parse in the environment for OPENMP, used in niftyreg and niftyseg
        remove_tmp: Remove/Delete the directory containing all nodes files after execution.
        Default is false
    :param qsubargs: argument to parse to the cluster in the qsub

    :return: None
    """
    """
    Force the datasink to copy files rather than creating simlink when using a renamer
    """
    config.update_config({'execution': {'try_hard_link_datasink': 'false'}})
    """
    Modify the base_dir of the workflow in case the user has required it
    """
    if parser and parser.working_dir:
        if not os.path.exists(os.path.abspath(parser.working_dir)):
            os.mkdir(os.path.abspath(parser.working_dir))
        workflow.base_dir = os.path.abspath(parser.working_dir)
    """
    If provided by the user Set the number of openmp core to use
    """

    if parser and parser.openmp_core > 0:
        os.environ['OMP_NUM_THREADS'] = str(parser.openmp_core)
    """
    We do not want a report creation in the working directory as we do not know if we have right access to it
    """

    config.update_config({'execution': {'create_report': 'false'}})
    """
    By default we estimate the user wants to use the cluster capabilities if available
    """

    run_qsub = True
    """
    If it has been set by the user, grab env variable "RUN_QSUB" and parse it
    """

    if os.getenv('RUN_QSUB'):
        run_qsub = str2bool(os.getenv('RUN_QSUB'))
    """
    Only go through the cluster if all requirements are fullfilled
    """

    if run_qsub and parser and parser.use_qsub and spawn.find_executable(
            'qsub'):
        """
        Default qsub arguments to parse
        """

        qargs = '-l h_rt=00:05:00 -l tmem=1.8G -l h_vmem=1.8G -l vf=1.8G ' + \
                '-l s_stack=10240 -j y -b y -S /bin/csh -V'
        """
        If provided in the arguments or in the env variable, grab the qsub arguments
        """

        if qsubargs:
            qargs = qsubargs
        elif os.getenv('QSUB_OPTIONS'):
            qargs = os.getenv('QSUB_OPTIONS')
        """
        Parse the openmp number of cores into the qsub arguments
        """

        if parser and parser.openmp_core > 1:
            qargs = qargs + ' -pe smp ' + str(parser.openmp_core)
        """
        The plugin arguments include the qsubargs
        """

        pargs = {'qsub_args': qargs}
        if parser and parser.username:
            pargs = {'qsub_args': qsubargs, 'username': parser.username}
        """
        Run the workflow using the SGE plugin
        """

        workflow.run(plugin='SGE', plugin_args=pargs)

    elif parser:
        """
        If there is a 'parser' in the arguments, grab the arguments from the command line and parse them
        to see which plugin to use and how many procs in case of 'MultiProc'
        """

        plugin = 'MultiProc'  # By default, we assume the user wants to use all procs in the machine: 'MultiProc'
        pargs = {}
        if parser.n_procs == 1:  # Only if the user requests a single proc to use, we use the 'Linear' plugin
            plugin = 'Linear'
        elif parser.n_procs > 1:  # If the user requires a specific amount of procs we parse this to the plugin args
            pargs = {'n_procs': parser.n_procs}
        workflow.run(plugin=plugin, plugin_args=pargs)

    else:
        """
        If no 'parser' was used in the arguments we run the workflow with default behaviour (or the one in .nipype.cfg)
        """

        workflow.run()
    """
    After the successful run of the workflow (if unsuccessful it raises an error and exits),
    we attempt to remove the temporary directories created by the run.
    """
    to_remove = ''
    if workflow.base_dir and os.path.exists(workflow.base_dir):
        to_remove = os.path.abspath(
            os.path.join(workflow.base_dir, workflow.name))
    if parser and parser.remove_tmp and workflow.base_dir and os.path.exists(
            to_remove):
        print 'removing the temporary directory: %s' % to_remove
        shutil.rmtree(to_remove)
        print 'done.'

    return