Exemple #1
0
    def connect_input(self,
                      spec_name,
                      node,
                      node_input,
                      format=None,
                      **kwargs):
        """
        Connects a analysis fileset_spec as an input to the provided node

        Parameters
        ----------
        spec_name : str
            Name of the analysis data spec (or one of the IDs from the iterator
            nodes, 'subject_id' or 'visit_id') to connect to the node
        node : arcana.Node
            The node to connect the input to
        node_input : str
            Name of the input on the node to connect the fileset spec to
        format : FileFormat | None
            The file format the input is expected in. If it differs
            from the format in data spec or of analysis input then an implicit
            conversion is performed. If None the file format in the data spec
            is assumed
        """
        if spec_name in self.analysis.ITERFIELDS:
            self._iterator_conns[spec_name].append((node, node_input, format))
        else:
            name = self._map_name(spec_name, self._input_map)
            if name not in self.analysis.data_spec_names():
                raise ArcanaDesignError(
                    "Proposed input '{}' to {} is not a valid spec name ('{}')"
                    .format(name, self._error_msg_loc,
                            "', '".join(self.analysis.data_spec_names())))
            self._check_valid_trait_name(node, node_input, 'input')
            self._input_conns[name].append((node, node_input, format, kwargs))
Exemple #2
0
    def branch(self, name, values=None):  # noqa: E501 @UnusedVariable
        """
        Checks whether the given switch matches the value provided

        Parameters
        ----------
        name : str
            The name of the parameter to retrieve
        value : str | None
            The value(s) of the switch to match if a non-boolean switch
        """
        if isinstance(values, basestring):
            values = [values]
        spec = self.param_spec(name)
        if not isinstance(spec, SwitchSpec):
            raise ArcanaUsageError(
                "{} is standard parameter not a switch".format(spec))
        switch = self._get_parameter(name)
        if spec.is_boolean:
            if values is not None:
                raise ArcanaDesignError(
                    "Should not provide values ({}) to boolean switch "
                    "'{}' in {}".format(values, name,
                                        self._param_error_location))
            in_branch = switch.value
        else:
            if values is None:
                raise ArcanaDesignError(
                    "Value(s) need(s) to be provided non-boolean switch"
                    " '{}' in {}".format(name, self._param_error_location))
            values = set(values)
            # Register parameter as being used by the pipeline
            unrecognised_values = values - set(spec.choices)
            if unrecognised_values:
                raise ArcanaDesignError(
                    "Provided value(s) ('{}') for switch '{}' in {} "
                    "is not a valid option ('{}')".format(
                        "', '".join(unrecognised_values), name,
                        self._param_error_location, "', '".join(spec.choices)))
            in_branch = switch.value in values
            if not in_branch:
                try:
                    in_branch = spec.fallbacks[switch.value] in values
                except KeyError:
                    pass
        return in_branch
Exemple #3
0
 def _list_outputs(self):
     outputs = self._outputs().get()
     if len(self.inputs.subject_ids) != len(self.inputs.inlist):
         raise ArcanaDesignError(
             "Length of subject IDs ({}) doesn't match that of input items "
             "({})".format(len(self.inputs.subject_ids),
                           len(self.inputs.inlist)))
     if len(self.inputs.visit_ids) != len(self.inputs.inlist):
         raise ArcanaDesignError(
             "Length of visit IDs ({}) doesn't match that of input items "
             "({})".format(len(self.inputs.visit_ids),
                           len(self.inputs.inlist)))
     session_ids = list(zip(self.inputs.subject_ids, self.inputs.visit_ids))
     index = session_ids.index(
         (self.inputs.subject_id, self.inputs.visit_id))
     outputs['out'] = self.inputs.inlist[index]
     return outputs
Exemple #4
0
 def _list_outputs(self):
     outputs = self.output_spec().get()
     # Connect iterables (i.e. subject_id and visit_id)
     subject_id = (self.inputs.subject_id
                   if isdefined(self.inputs.subject_id) else None)
     visit_id = (self.inputs.visit_id
                 if isdefined(self.inputs.visit_id) else None)
     missing_inputs = []
     # Collate input checksums into a dictionary
     input_checksums = {
         n: getattr(self.inputs, n + CHECKSUM_SUFFIX)
         for n in self._pipeline_input_filesets
     }
     input_checksums.update({
         n: getattr(self.inputs, n + FIELD_SUFFIX)
         for n in self._pipeline_input_fields
     })
     output_checksums = {}
     with ExitStack() as stack:
         # Connect to set of repositories that the collections come from
         for repository in self.repositories:
             stack.enter_context(repository)
         for fileset_slice in self.fileset_collections:
             fileset = fileset_slice.item(subject_id, visit_id)
             path = getattr(self.inputs, fileset_slice.name + PATH_SUFFIX)
             if not isdefined(path):
                 if fileset.name in self._required:
                     missing_inputs.append(fileset.name)
                 continue  # skip the upload for this fileset
             fileset.path = path  # Push to repository
             output_checksums[fileset.name] = fileset.checksums
         for field_slice in self.field_collections:
             field = field_slice.item(subject_id, visit_id)
             value = getattr(self.inputs, field_slice.name + FIELD_SUFFIX)
             if not isdefined(value):
                 if field.name in self._required:
                     missing_inputs.append(field.name)
                 continue  # skip the upload for this field
             field.value = value  # Push to repository
             output_checksums[field.name] = field.value
         # Add input and output checksums to provenance record and sink to
         # all repositories that have received data (typically only one)
         prov = copy(self._prov)
         prov['inputs'] = input_checksums
         prov['outputs'] = output_checksums
         record = Record(self._pipeline_name, self.frequency, subject_id,
                         visit_id, self._from_analysis, prov)
         for dataset in self.datasets:
             dataset.put_record(record)
     if missing_inputs:
         raise ArcanaDesignError(
             "Required derivatives '{}' to were not created by upstream "
             "nodes connected to sink {}".format(
                 "', '".join(missing_inputs), self))
     # Return cache file paths
     outputs['checksums'] = output_checksums
     return outputs
Exemple #5
0
 def _check_valid_trait_name(self, node, trait_name, conn_type):
     if conn_type == 'output':
         trait_spec = node.interface._outputs()
     elif conn_type == 'input':
         trait_spec = node.interface.inputs
     else:
         assert False, "unknown conn_type {}".format(conn_type)
     valid_trait_names = trait_spec.trait_names()
     if trait_name not in valid_trait_names:
         raise ArcanaDesignError(
             ("Node {} '{}' is not a valid trait of {} used for '{}' "
              "node of '{}' pipeline. Valid traits are '{}'").format(
                  conn_type, trait_name, node.interface, node.name,
                  self.name, "', '".join(valid_trait_names)))
Exemple #6
0
    def unhandled_branch(self, name):
        """
        Convenient method for raising exception if a pipeline doesn't
        handle a particular switch value

        Parameters
        ----------
        name : str
            Name of the switch
        value : str
            Value of the switch which hasn't been handled
        """
        raise ArcanaDesignError(
            "'{}' value of '{}' switch in {} is not handled".format(
                self.parameter(name), name, self._param_error_location))
Exemple #7
0
 def __init__(self, name, desc=None, optional=False, default=None):
     if optional and default is not None:
         raise ArcanaUsageError(
             "'optional' doesn't make sense for specs ('{}') with default "
             "values".format(name))
     self._desc = desc
     self._analysis = None
     self._optional = optional
     # Set the name of the default slice-like object so it matches
     # the name of the spec
     if default is not None:
         if default.frequency != self.frequency:
             raise ArcanaDesignError(
                 "Frequency of default slice-like object passed to "
                 "'{}' spec ('{}'), does not match spec ('{}')".format(
                     name, default.frequency, self.frequency))
         default = deepcopy(default)
     self._default = default
Exemple #8
0
    def connect_output(self,
                       spec_name,
                       node,
                       node_output,
                       format=None,
                       **kwargs):
        """
        Connects an output to a analysis fileset spec

        Parameters
        ----------
        spec_name : str
            Name of the analysis fileset spec to connect to
        node : arcana.Node
            The node to connect the output from
        node_output : str
            Name of the output on the node to connect to the fileset
        format : FileFormat | None
            The file format the output is returned in. If it differs
            from the format in data spec then an implicit conversion is
            performed. If None the it is assumed to be returned in the file
            format of the entry the data spec
        """
        name = self._map_name(spec_name, self._output_map)
        if name not in self.analysis.data_spec_names():
            raise ArcanaDesignError(
                "Proposed output '{}' to {} is not a valid spec name ('{}')".
                format(name, self._error_msg_loc,
                       "', '".join(self.analysis.data_spec_names())))
        if name in self._output_conns:
            prev_node, prev_node_output, _, _ = self._output_conns[name]
            logger.info(
                "Reassigning '{}' output from {}:{} to {}:{} in {}".format(
                    name, prev_node.name, prev_node_output, node.name,
                    node_output, self._error_msg_loc))

        self._check_valid_trait_name(node, node_output, 'output')
        self._output_conns[name] = (node, node_output, format, kwargs)
Exemple #9
0
    def _unwrap_maps(self, name_maps, name, analysis=None, **inner_maps):
        """
        Unwraps potentially nested name-mapping dictionaries to get values
        for name, input_map, output_map and analysis. Unsed in __init__.

        Parameters
        ----------
        name_maps : dict
            A dictionary containing the name_maps to apply to the values
        name : str
            Name passed from inner pipeline constructor
        analysis : Analysis
            The analysis to bind the pipeline to. Will be overridden by any values
            in the mods dict
        inner_maps : dict[str, dict[str,str]]
            input and output maps from inner pipeline constructors

        Returns
        -------
        name : str
            Potentially modified name of the pipeline
        analysis : Analysis
            Potentially modified analysis
        maps : dict[str, dict[str,str]]
            Potentially modifed input and output maps
        """
        # Set values of name and analysis
        name = name_maps.get('name', name)
        name = name_maps.get('prefix', '') + name
        analysis = name_maps.get('analysis', analysis)
        # Flatten input and output maps, combining maps from inner nests with
        # those in the "mods" dictionary
        maps = {}
        for mtype in ('input_map', 'output_map'):
            try:
                inner_map = inner_maps[mtype]
            except KeyError:
                try:
                    maps[mtype] = name_maps[mtype]  # Only outer map
                except KeyError:
                    pass  # No maps
            else:
                try:
                    outer_map = name_maps[mtype]
                except KeyError:
                    maps[mtype] = inner_map  # Only inner map
                else:
                    # Work through different combinations of  inner and outer
                    # map types (i.e. str & str, str & dict, dict & str, and
                    # dict & dict) and combine into a single map
                    if isinstance(outer_map, basestring):
                        if isinstance(inner_map, basestring):
                            # Concatenate prefixes
                            maps[mtype] = outer_map + inner_map
                        elif isinstance(inner_map, dict):
                            # Add outer_map prefix to all values in inner map
                            # dictionary
                            maps[mtype] = {
                                k: outer_map + v
                                for k, v in inner_map.items()
                            }
                        else:
                            raise ArcanaDesignError(
                                "Unrecognised type for name map in '{}' "
                                "pipeline can be str or dict[str,str]: {}".
                                format(name, inner_map))
                    elif isinstance(outer_map, dict):
                        if isinstance(inner_map, basestring):
                            # Strip inner map prefix from outer dictionary
                            # (which should have prefix included). This should
                            # be an unlikely case I imagine
                            maps[mtype] = {
                                k[len(inner_map):]: v
                                for k, v in outer_map.items()
                            }
                        elif isinstance(inner_map, dict):
                            # Chain outer_map dictionary to inner map
                            # dictionary
                            maps[mtype] = deepcopy(outer_map)
                            maps[mtype].update({
                                k: outer_map.get(v, v)
                                for k, v in inner_map.items()
                            })
                        else:
                            raise ArcanaDesignError(
                                "Unrecognised type for name map in '{}' "
                                "pipeline can be str or dict[str,str]: {}".
                                format(name, inner_map))
                    else:
                        raise ArcanaDesignError(
                            "Unrecognised type for name map in '{}' "
                            "pipeline can be str or dict[str,str]: {}".format(
                                name, outer_map))
        try:
            outer_maps = name_maps['name_maps']
        except KeyError:
            pass
        else:
            name, analysis, maps = self._unwrap_maps(outer_maps,
                                                     name=name,
                                                     analysis=analysis,
                                                     **maps)
        return name, analysis, maps
Exemple #10
0
    def add(self,
            name,
            interface,
            inputs=None,
            outputs=None,
            requirements=None,
            wall_time=None,
            annotations=None,
            **kwargs):
        """
        Adds a processing Node to the pipeline

        Parameters
        ----------
        name : str
            Name for the node
        interface : nipype.Interface
            The interface to use for the node
        inputs : dict[str, (str, FileFormat) | (Node, str)]
            Connections from inputs of the pipeline and outputs of other nodes
            to inputs of node. The keys of the dictionary are the field names
            and the values are 2-tuple containing either the name of the data
            spec and the data format it is expected in for pipeline inputs or
            the sending Node and the the name of an output of the sending Node.
            Note that pipeline inputs can be specified outside this method
            using the 'connect_input' method and connections between nodes with
            the the 'connect' method.
        outputs : dict[str, (str, FileFormat)]
            Connections to outputs of the pipeline from fields of the
            interface. The keys of the dictionary are the names of the data
            specs that will be written to and the values are the interface
            field name and the data format it is produced in. Note that output
            connections can also be specified using the 'connect_output'
            method.
        requirements : list(Requirement)
            List of required packages need for the node to run (default: [])
        wall_time : float
            Time required to execute the node in minutes (default: 1)
        mem_gb : int
            Required memory for the node in GB
        n_procs : int
            Preferred number of threads to run the node on (default: 1)
        annotations : dict[str, *]
            Additional annotations to add to the node, which may be used by
            the Processor node to optimise execution (e.g. 'gpu': True)
        iterfield : str
            Name of field to be passed an iterable to iterator over.
            If present, a MapNode will be created instead of a regular node
        joinsource : str
            Name of iterator field to join. Typically one of the implicit
            iterators (i.e. Analysis.SUBJECT_ID or Analysis.VISIT_ID)
            to join over the subjects and/or visits
        joinfield : str
            Name of field to pass the joined list when creating a JoinNode

        Returns
        -------
        node : Node
            The Node object that has been added to the pipeline
        """
        if annotations is None:
            annotations = {}
        if requirements is None:
            requirements = []
        if wall_time is None:
            wall_time = self.analysis.processor.default_wall_time
        if 'mem_gb' not in kwargs or kwargs['mem_gb'] is None:
            kwargs['mem_gb'] = self.analysis.processor.default_mem_gb
        if 'iterfield' in kwargs:
            if 'joinfield' in kwargs or 'joinsource' in kwargs:
                raise ArcanaDesignError(
                    "Cannot provide both joinsource and iterfield to when "
                    "attempting to add '{}' node to {}".format(
                        name, self._error_msg_loc))
            node_cls = self.analysis.environment.node_types['map']
        elif 'joinsource' in kwargs or 'joinfield' in kwargs:
            if not ('joinfield' in kwargs and 'joinsource' in kwargs):
                raise ArcanaDesignError(
                    ("Both joinsource and joinfield kwargs are required to " +
                     "create a JoinNode (see {})").format(
                         name, self._error_msg_loc))
            joinsource = kwargs['joinsource']
            if joinsource in self.analysis.ITERFIELDS:
                self._iterator_joins.add(joinsource)
            node_cls = self.analysis.environment.node_types['join']
            # Prepend name of pipeline of joinsource to match name of nodes
            kwargs['joinsource'] = '{}_{}'.format(self.name, joinsource)
        else:
            node_cls = self.analysis.environment.node_types['base']
        # Create node
        node = node_cls(self.analysis.environment,
                        interface,
                        name="{}_{}".format(self._name, name),
                        requirements=requirements,
                        wall_time=wall_time,
                        annotations=annotations,
                        **kwargs)
        # Ensure node is added to workflow
        self._workflow.add_nodes([node])
        # Connect inputs, outputs and internal connections
        if inputs is not None:
            if not isinstance(inputs, dict):
                raise ArcanaDesignError(
                    "inputs of {} node in {} needs to be a dictionary "
                    "(not {})".format(name, self, inputs))
            for node_input, connect_from in inputs.items():
                if isinstance(connect_from[0], basestring):
                    input_spec, input_format = connect_from
                    self.connect_input(input_spec, node, node_input,
                                       input_format)
                else:
                    conn_node, conn_field = connect_from
                    self.connect(conn_node, conn_field, node, node_input)
        if outputs is not None:
            if not isinstance(outputs, dict):
                raise ArcanaDesignError(
                    "outputs of {} node in {} needs to be a dictionary "
                    "(not {})".format(name, self, outputs))
            for output_spec, (node_output, output_format) in outputs.items():
                self.connect_output(output_spec, node, node_output,
                                    output_format)
        return node
Exemple #11
0
    def pipeline(self, getter_name, required_outputs=None, pipeline_args=None):
        """
        Returns a pipeline from a study by getting the method corresponding to
        the given name and checking that the required outputs are generated
        given the parameters of the study

        Parameters
        ----------
        getter_name : str
            Name of the method that constructs the pipeline
        required_outputs : list[str] | None
            The list of outputs that are expected of the pipeline
        pipeline_args : dict[str, *] | None
            Any arguments that should be passed to the method to produce the
            pipeline.
        """
        if pipeline_args is None:
            pipeline_args = ()
        elif isinstance(pipeline_args, dict):
            pipeline_args = tuple(pipeline_args.items())
        try:
            pipeline = self._pipelines_cache[(getter_name, pipeline_args)]
        except KeyError:
            try:
                getter = getattr(self, getter_name)
            except AttributeError:
                raise ArcanaDesignError(
                    "There is no pipeline constructor method named '{}' in "
                    "present in '{}' study".format(getter_name, self))
            self._pipeline_to_generate = getter_name
            try:
                pipeline = getter(**dict(pipeline_args))
            except ArcanaMissingDataException as e:
                e.msg += ("{}, which is required as an input when calling the "
                          "pipeline constructor method '{}' to create a "
                          "pipeline to produce '{}'".format(
                              e, getter_name, "', '".join(required_outputs)))
                raise e
            finally:
                self._pipeline_to_generate = None
            if pipeline is None:
                raise ArcanaDesignError(
                    "'{}' pipeline constructor in {} is missing return "
                    "statement (should return a Pipeline object)".format(
                        getter_name, self))
            elif not isinstance(pipeline, Pipeline):
                raise ArcanaDesignError(
                    "'{}' pipeline constructor in {} doesn't return a Pipeline"
                    " object ({})".format(getter_name, self, pipeline))
            # Check to see if the pipeline is equivalent to previously
            # generated pipelines (if two getter methods return equivalent
            # pipelines) and whether any outputs are to be generated twice
            # by different pipelines within the same workflow
            for prev_pipeline in self._pipelines_cache.values():
                if pipeline == prev_pipeline:
                    pipeline = prev_pipeline
                    break
                elif any(o in prev_pipeline.outputs for o in pipeline.outputs):
                    raise ArcanaDesignError(
                        "'{}' outputs are produced by more than one pipeline "
                        "({} and {})".format(
                            set(pipeline.output_names).intersection(
                                prev_pipeline.output_names), prev_pipeline,
                            pipeline))
            self._pipelines_cache[(getter_name, pipeline_args)] = pipeline
        if required_outputs is not None:
            # Check that the required outputs are created with the given
            # parameters
            missing_outputs = (set(required_outputs) -
                               set(pipeline.output_names))
            if missing_outputs:
                raise ArcanaOutputNotProducedException(
                    "Required output(s) '{}', will not be created by the '{}' "
                    "pipeline constructed by '{}' method in {} given the "
                    "missing study inputs:\n{}\n"
                    "and the provided switches:\n{}".format(
                        "', '".join(missing_outputs), pipeline.name,
                        getter_name, self, "\n".join(self.missing_inputs),
                        '\n'.join('{}={}'.format(s.name, s.value)
                                  for s in self.switches)))
        return pipeline
Exemple #12
0
 def __new__(metacls, name, bases, dct):  # @NoSelf @UnusedVariable
     if not any(issubclass(b, Study) for b in bases):
         raise ArcanaUsageError(
             "StudyMetaClass can only be used for classes that "
             "have Study as a base class")
     try:
         add_data_specs = dct['add_data_specs']
     except KeyError:
         add_data_specs = []
     try:
         add_param_specs = dct['add_param_specs']
     except KeyError:
         add_param_specs = []
     combined_attrs = set()
     combined_data_specs = {}
     combined_param_specs = {}
     for base in reversed(bases):
         # Get the combined class dictionary including base dicts
         # excluding auto-added properties for data and parameter specs
         combined_attrs.update(
             a for a in dir(base)
             if (not issubclass(base, Study) or a not in base.spec_names()))
         # TODO: need to check that fields are not overridden by filesets
         #       and vice-versa
         try:
             combined_data_specs.update(
                 (d.name, d) for d in base.data_specs())
         except AttributeError:
             pass  # Not a Study class
         try:
             combined_param_specs.update(
                 (p.name, p) for p in base.param_specs())
         except AttributeError:
             pass  # Not a Study class
     combined_attrs.update(list(dct.keys()))
     combined_data_specs.update((d.name, d) for d in add_data_specs)
     combined_param_specs.update((p.name, p) for p in add_param_specs)
     # Check that the pipeline names in data specs correspond to a
     # pipeline method in the class and that if a pipeline is called with
     # arguments (for parameterizing a range of metrics for example) then
     # the same argument names are consistent across every call.
     pipeline_arg_names = {}
     for spec in add_data_specs:
         if spec.derived:
             if spec.pipeline_getter in ('pipeline', 'new_pipeline'):
                 raise ArcanaDesignError(
                     "Cannot use the names 'pipeline' or  'new_pipeline' "
                     "('{}') for the name of a pipeline constructor in "
                     "class {} as it clashes with base method to create "
                     "pipelines".format(spec.pipeline_getter, name))
             if spec.pipeline_getter not in combined_attrs:
                 raise ArcanaDesignError(
                     "Pipeline to generate '{}', '{}', is not present"
                     " in '{}' class".format(spec.name,
                                             spec.pipeline_getter, name))
             try:
                 if pipeline_arg_names[
                         spec.pipeline_getter] != spec.pipeline_arg_names:
                     raise ArcanaDesignError(
                         "Inconsistent pipeline argument names used for "
                         "'{}' pipeline getter {} and {}".format(
                             spec.pipeline_getter,
                             pipeline_arg_names[spec.pipeline_getter],
                             spec.pipeline_arg_names))
             except KeyError:
                 pipeline_arg_names[
                     spec.pipeline_getter] = spec.pipeline_arg_names
     # Check for name clashes between data and parameter specs
     spec_name_clashes = (set(combined_data_specs)
                          & set(combined_param_specs))
     if spec_name_clashes:
         raise ArcanaDesignError(
             "'{}' name both data and parameter specs in '{}' class".format(
                 "', '".join(spec_name_clashes), name))
     reserved_clashes = [
         n for n in combined_data_specs if n in Study.ITERFIELDS
     ]
     if reserved_clashes:
         raise ArcanaDesignError(
             "'{}' data spec names clash with reserved names in {}".format(
                 "', '".join(reserved_clashes), name))
     dct['_data_specs'] = combined_data_specs
     dct['_param_specs'] = combined_param_specs
     if '__metaclass__' not in dct:
         dct['__metaclass__'] = metacls
     # Append description of Study parameters to class
     try:
         docstring = dct['__doc__']
     except KeyError:
         docstring = '{} Study class'.format(name)
     if 'Parameters' not in docstring:
         docstring += Study.__doc__
     dct['__doc__'] = docstring
     return type(name, bases, dct)
Exemple #13
0
    def preprocess_pipeline(self, **name_maps):  # @UnusedVariable @IgnorePep8
        """
        Performs a series of FSL preprocessing steps, including Eddy and Topup

        Parameters
        ----------
        phase_dir : str{AP|LR|IS}
            The phase encode direction
        """

        # Determine whether we can correct for distortion, i.e. if reference
        # scans are provided
        # Include all references
        references = [fsl_cite, eddy_cite, topup_cite,
                      distort_correct_cite]
        if self.branch('preproc_denoise'):
            references.extend(dwidenoise_cites)

        pipeline = self.new_pipeline(
            name='preprocess',
            name_maps=name_maps,
            desc=(
                "Preprocess dMRI studies using distortion correction"),
            references=references)

        # Create nodes to gradients to FSL format
        if self.input('magnitude').format == dicom_format:
            extract_grad = pipeline.add(
                "extract_grad",
                ExtractFSLGradients(),
                inputs={
                    'in_file': ('magnitude', dicom_format)},
                outputs={
                    'bvecs_file': ('grad_dirs', fsl_bvecs_format),
                    'bvals_file': ('bvalues', fsl_bvals_format)},
                requirements=[mrtrix_req.v('3.0rc3')])
            grad_fsl_kwargs = {
                'connect': {'in1': (extract_grad, 'bvecs_file'),
                            'in2': (extract_grad, 'bvals_file')}}
        elif self.provided('grad_dirs') and self.provided('bvalues'):
            grad_fsl_kwargs = {
                'inputs': {'in1': ('grad_dirs', fsl_bvecs_format),
                           'in2': ('bvalues', fsl_bvals_format)}}
        else:
            raise ArcanaDesignError(
                "Either input 'magnitude' image needs to be in DICOM format "
                "or gradient directions and b-values need to be explicitly "
                "provided to {}".format(self))

        # Gradient merge node
        grad_fsl = pipeline.add(
            "grad_fsl",
            MergeTuple(2),
            **grad_fsl_kwargs)

        # Denoise the dwi-scan
        if self.branch('preproc_denoise'):
            # Run denoising
            denoise = pipeline.add(
                'denoise',
                DWIDenoise(),
                inputs={
                    'in_file': ('magnitude', nifti_gz_format)},
                requirements=[mrtrix_req.v('3.0rc3')])

            # Calculate residual noise
            subtract_operands = pipeline.add(
                'subtract_operands',
                Merge(2),
                inputs={
                    'in1': ('magnitude', nifti_gz_format)},
                connect={
                    'in2': (denoise, 'noise')})

            pipeline.add(
                'subtract',
                MRCalc(
                    operation='subtract'),
                connect={
                    'operands': (subtract_operands, 'out')},
                outputs={
                    'out_file': ('noise_residual', mrtrix_format)},
                requirements=[mrtrix_req.v('3.0rc3')])

        # Preproc kwargs
        preproc_kwargs = {}

        if (self.provided('dwi_reference') or
                self.provided('reverse_phase')):
            # Extract b=0 volumes
            dwiextract = pipeline.add(
                'dwiextract',
                ExtractDWIorB0(
                    bzero=True,
                    out_ext='.nii.gz'),
                inputs={
                    'in_file': ('magnitude', dicom_format)},
                requirements=[mrtrix_req.v('3.0rc3')])

            # Get first b=0 from dwi b=0 volumes
            mrconvert = pipeline.add(
                "mrconvert",
                MRConvert(
                    coord=(3, 0)),
                connect={
                    'in_file': (dwiextract, 'out_file')},
                requirements=[mrtrix_req.v('3.0rc3')])

            # Concatenate extracted forward rpe with reverse rpe
            mrcat = pipeline.add(
                'mrcat',
                MRCat(),
                inputs={
                    'second_scan': ((
                        'dwi_reference' if self.provided('dwi_reference')
                        else 'reverse_phase'), mrtrix_format)},
                connect={
                    'first_scan': (mrconvert, 'out_file')},
                requirements=[mrtrix_req.v('3.0rc3')])

            # Create node to assign the right PED to the diffusion
            prep_dwi = pipeline.add(
                'prepare_dwi',
                PrepareDWI(),
                inputs={
                    'pe_dir': ('ped', float),
                    'ped_polarity': ('pe_angle', float)})

            preproc_kwargs['rpe_pair'] = True

            distortion_correction = True
            preproc_conns = {'connect': {'se_epi': (mrcat, 'out_file')}}
        else:
            distortion_correction = False
            preproc_kwargs['rpe_none'] = True
            preproc_conns = {}

        if self.parameter('preproc_pe_dir') is not None:
            preproc_kwargs['pe_dir'] = self.parameter('preproc_pe_dir')

        preproc = pipeline.add(
            'dwipreproc',
            DWIPreproc(
                no_clean_up=True,
                out_file_ext='.nii.gz',
                # FIXME: Need to determine this programmatically
                # eddy_parameters = '--data_is_shelled '
                temp_dir='dwipreproc_tempdir',
                **preproc_kwargs),
            connect={
                'grad_fsl': (grad_fsl, 'out')},
            outputs={
                'eddy_parameters': ('eddy_par', eddy_par_format)},
            requirements=[mrtrix_req.v('3.0rc3'), fsl_req.v('5.0.10')],
            wall_time=60,
            **preproc_conns)
        if self.branch('preproc_denoise'):
            pipeline.connect(denoise, 'out_file', preproc, 'in_file')
        else:
            pipeline.connect_input('magnitude', preproc, 'in_file',
                                   nifti_gz_format)
        if distortion_correction:
            pipeline.connect(prep_dwi, 'pe', preproc, 'pe_dir')

        # Create node to reorient preproc out_file
        pipeline.add(
            'fslreorient2std',
            fsl.utils.Reorient2Std(),
            connect={
                'in_file': (preproc, 'out_file')},
            outputs={
                'out_file': ('preproc', nifti_gz_format)},
            requirements=[fsl_req.v('5.0.9')])

        return pipeline
Exemple #14
0
    def _connect_pipeline(self,
                          pipeline,
                          required_outputs,
                          workflow,
                          subject_inds,
                          visit_inds,
                          filter_array,
                          force=False):
        """
        Connects a pipeline to a overarching workflow that sets up iterators
        over subjects|visits present in the repository (if required) and
        repository source and sink nodes

        Parameters
        ----------
        pipeline : Pipeline
            The pipeline to connect
        required_outputs : set[str] | None
            The outputs required to be produced by this pipeline. If None all
            are deemed to be required
        workflow : nipype.pipeline.engine.Workflow
            The overarching workflow to connect the pipeline to
        subject_inds : dct[str, int]
            A mapping of subject ID to row index in the filter array
        visit_inds : dct[str, int]
            A mapping of visit ID to column index in the filter array
        filter_array : 2-D numpy.array[bool]
            A two-dimensional boolean array, where rows correspond to
            subjects and columns correspond to visits in the repository. True
            values represent a combination of subject & visit ID to include
            in the current round of processing. Note that if the 'force'
            flag is not set, sessions won't be reprocessed unless the
            save provenance doesn't match that of the given pipeline.
        force : bool | 'all'
            A flag to force the processing of all sessions in the filter
            array, regardless of whether the parameters|pipeline used
            to generate existing data matches the given pipeline
        """
        if self.reprocess == 'force':
            force = True
        # Close-off construction of the pipeline and created, input and output
        # nodes and provenance dictionary
        pipeline.cap()
        # Prepend prerequisite pipelines to complete workflow if they need
        # to be (re)processed
        final_nodes = []
        # The array that represents the subject/visit pairs for which any
        # prerequisite pipeline will be (re)processed, and which therefore
        # needs to be included in the processing of the current pipeline. Row
        # indices correspond to subjects and column indices visits
        prqs_to_process_array = np.zeros((len(subject_inds), len(visit_inds)),
                                         dtype=bool)
        # The array that represents the subject/visit pairs for which any
        # prerequisite pipeline will be skipped due to missing inputs. Row
        # indices correspond to subjects and column indices visits
        prqs_to_skip_array = np.zeros((len(subject_inds), len(visit_inds)),
                                      dtype=bool)
        for getter_name in pipeline.prerequisites:
            prereq = pipeline.study.pipeline(getter_name)
            if prereq.to_process_array.any():
                final_nodes.append(prereq.node('final'))
                prqs_to_process_array |= prereq.to_process_array
            prqs_to_skip_array |= prereq.to_skip_array
        # Get list of sessions that need to be processed (i.e. if
        # they don't contain the outputs of this pipeline)
        to_process_array, to_protect_array, to_skip_array = self._to_process(
            pipeline, required_outputs, prqs_to_process_array,
            prqs_to_skip_array, filter_array, subject_inds, visit_inds, force)
        # Store the arrays signifying which nodes to process, protect or skip
        # so they can be passed to downstream pipelines
        pipeline.to_process_array = to_process_array
        pipeline.to_protect_array = to_protect_array
        pipeline.to_skip_array = to_skip_array
        # Check to see if there are any sessions to process
        if not to_process_array.any():
            raise ArcanaNoRunRequiredException(
                "No sessions to process for '{}' pipeline".format(
                    pipeline.name))
        # Set up workflow to run the pipeline, loading and saving from the
        # repository
        workflow.add_nodes([pipeline._workflow])
        # If prerequisite pipelines need to be processed, connect their
        # "final" nodes to the initial node of this pipeline to ensure that
        # they are all processed before this pipeline is run.
        if final_nodes:
            prereqs = pipeline.add('prereqs', Merge(len(final_nodes)))
            for i, final_node in enumerate(final_nodes, start=1):
                workflow.connect(final_node, 'out', prereqs, 'in{}'.format(i))
        else:
            prereqs = None
        # Construct iterator structure over subjects and sessions to be
        # processed
        iter_nodes = self._iterate(pipeline, to_process_array, subject_inds,
                                   visit_inds)
        sources = {}
        # Loop through each frequency present in the pipeline inputs and
        # create a corresponding source node
        for freq in pipeline.input_frequencies:
            try:
                inputs = list(pipeline.frequency_inputs(freq))
            except ArcanaMissingDataException as e:
                raise ArcanaMissingDataException(
                    str(e) + ", which is required for pipeline '{}'".format(
                        pipeline.name))
            inputnode = pipeline.inputnode(freq)
            sources[freq] = source = pipeline.add(
                '{}_source'.format(freq),
                RepositorySource(i.collection for i in inputs),
                inputs=({
                    'prereqs': (prereqs, 'out')
                } if prereqs is not None else {}))
            # Connect iter_nodes to source and input nodes
            for iterator in pipeline.iterators(freq):
                pipeline.connect(iter_nodes[iterator], iterator, source,
                                 iterator)
                pipeline.connect(source, iterator, inputnode, iterator)
            for input in inputs:
                pipeline.connect(source, input.suffixed_name, inputnode,
                                 input.name)
        deiter_nodes = {}

        def deiter_node_sort_key(it):
            """
            If there are two iter_nodes (i.e. both subject and visit ID) and
            one depends on the other (i.e. if the visit IDs per subject
            vary and vice-versa) we need to ensure that the dependent
            iterator is deiterated (joined) first.
            """
            return iter_nodes[it].itersource is None

        # Connect all outputs to the repository sink, creating a new sink for
        # each frequency level (i.e 'per_session', 'per_subject', 'per_visit',
        # or 'per_study')
        for freq in pipeline.output_frequencies:
            outputs = list(pipeline.frequency_outputs(freq))
            if pipeline.iterators(freq) - pipeline.iterators():
                raise ArcanaDesignError(
                    "Doesn't make sense to output '{}', which are of '{}' "
                    "frequency, when the pipeline only iterates over '{}'".
                    format("', '".join(o.name for o in outputs), freq,
                           "', '".join(pipeline.iterators())))
            outputnode = pipeline.outputnode(freq)
            # Connect filesets/fields to sink to sink node, skipping outputs
            # that are study inputs
            to_connect = {
                o.suffixed_name: (outputnode, o.name)
                for o in outputs if o.is_spec
            }
            # Connect iterators to sink node
            to_connect.update(
                {i: (iter_nodes[i], i)
                 for i in pipeline.iterators()})
            # Connect checksums/values from sources to sink node in order to
            # save in provenance, joining where necessary
            for input_freq in pipeline.input_frequencies:
                checksums_to_connect = [
                    i.checksum_suffixed_name
                    for i in pipeline.frequency_inputs(input_freq)
                ]
                if not checksums_to_connect:
                    # Rare case of a pipeline with no inputs only iter_nodes
                    # that will only occur in unittests in all likelihood
                    continue
                # Loop over iterators that need to be joined, i.e. that are
                # present in the input frequency but not the output frequency
                # and create join nodes
                source = sources[input_freq]
                for iterator in (pipeline.iterators(input_freq) -
                                 pipeline.iterators(freq)):
                    join = pipeline.add(
                        '{}_to_{}_{}_checksum_join'.format(
                            input_freq, freq, iterator),
                        IdentityInterface(checksums_to_connect),
                        inputs={
                            tc: (source, tc)
                            for tc in checksums_to_connect
                        },
                        joinsource=iterator,
                        joinfield=checksums_to_connect)
                    source = join
                to_connect.update(
                    {tc: (source, tc)
                     for tc in checksums_to_connect})
            # Add sink node
            sink = pipeline.add('{}_sink'.format(freq),
                                RepositorySink((o.collection for o in outputs),
                                               pipeline, required_outputs),
                                inputs=to_connect)
            # "De-iterate" (join) over iterators to get back to single child
            # node by the time we connect to the final node of the pipeline Set
            # the sink and subject_id as the default deiterator if there are no
            # deiterates (i.e. per_study) or to use as the upstream node to
            # connect the first deiterator for every frequency
            deiter_nodes[freq] = sink  # for per_study the "deiterator" == sink
            for iterator in sorted(pipeline.iterators(freq),
                                   key=deiter_node_sort_key):
                # Connect to previous deiterator or sink
                # NB: we only need to keep a reference to the last one in the
                # chain in order to connect with the "final" node, so we can
                # overwrite the entry in the 'deiter_nodes' dict
                deiter_nodes[freq] = pipeline.add(
                    '{}_{}_deiter'.format(freq, iterator),
                    IdentityInterface(['checksums']),
                    inputs={'checksums': (deiter_nodes[freq], 'checksums')},
                    joinsource=iterator,
                    joinfield='checksums')
        # Create a final node, which is used to connect with downstream
        # pipelines
        pipeline.add('final',
                     Merge(len(deiter_nodes)),
                     inputs={
                         'in{}'.format(i): (di, 'checksums')
                         for i, di in enumerate(deiter_nodes.values(), start=1)
                     })
Exemple #15
0
        def push_on_stack(pipeline, filt_array, req_outputs, downstream=()):
            """
            Push a pipeline onto the stack of pipelines to be processed,
            detecting common upstream pipelines and resolving them to a single
            pipeline

            Parameters
            ----------
            pipeline : Pipeline
                The pipeline to add to the stack
            filt_array : 2-D numpy.ndarray
                An array of sessions that are to be processed for the given
                pipeline
            req_outputs : list[str]
                The outputs of the pipeline that are actually required (non-
                required outputs can be ignored if they are not generated)
            downstream : tuple[Pipeline]
                The pipelines directly downstream of the pipeline to be added.
                Used to detect circular dependencies
            """
            if req_outputs is None:
                req_outputs = pipeline.output_names
            # Check downstream piplines for circular dependencies
            downstream_pipelines = [p for p, _ in downstream]
            if pipeline in downstream_pipelines:
                recur_index = downstream_pipelines.index(pipeline)
                raise ArcanaDesignError(
                    "{} cannot be a dependency of itself. Call-stack:\n{}".
                    format(
                        pipeline, '\n'.join(
                            '{} ({})'.format(p, ', '.join(ro))
                            for p, ro in (((pipeline, req_outputs), ) +
                                          downstream[:(recur_index + 1)]))))
            if pipeline.name in stack:
                # Pop pipeline from stack in order to add it to the end of the
                # stack and ensure it is run before all downstream pipelines
                prev_pipeline, prev_req_outputs, prev_filt_array = stack.pop(
                    pipeline.name)
                if pipeline is not prev_pipeline and pipeline != prev_pipeline:
                    raise ArcanaDesignError(
                        "Attempting to run two different pipelines with the "
                        "same name, {} and {}".format(pipeline, prev_pipeline))
                # Combined required outputs and filter array with previous
                # references to the pipeline
                req_outputs = copy(req_outputs)
                req_outputs.update(prev_req_outputs)
                filt_array = filt_array | prev_filt_array
            # If the pipeline to process contains summary outputs (i.e. 'per-
            # subject|visit|study' frequency), then we need to "dialate" the
            # filter array to include IDs across the scope of the study, e.g.
            # all subjects for per-vist, or all visits for per-subject.
            output_freqs = set(pipeline.output_frequencies)
            dialated_filt_array = self._dialate_array(filt_array,
                                                      pipeline.joins)
            added = dialated_filt_array ^ filt_array
            if added.any():
                filt_array = dialated_filt_array
                # Invert the index dictionaries to get index-to-ID maps
                inv_subject_inds = {v: k for k, v in subject_inds.items()}
                inv_visit_inds = {v: k for k, v in visit_inds.items()}
                logger.warning(
                    "Dialated filter array used to process '{}' pipeline to "
                    "include {} subject/visit IDs due to its '{}' summary "
                    "outputs ".format(
                        pipeline.name,
                        ', '.join('({},{})'.format(inv_subject_inds[s],
                                                   inv_visit_inds[v])
                                  for s, v in zip(*np.nonzero(added))),
                        "' and '".join(output_freqs)))

            stack[pipeline.name] = pipeline, req_outputs, filt_array
            # Recursively add all prerequisites to stack
            try:
                for (prq_getter,
                     prq_req_outputs) in pipeline.prerequisites.items():
                    prereq = pipeline.study.pipeline(prq_getter,
                                                     prq_req_outputs)
                    push_on_stack(prereq, filt_array, prq_req_outputs,
                                  ((pipeline, req_outputs), ) + downstream)
            except (ArcanaMissingDataException,
                    ArcanaOutputNotProducedException) as e:
                e.msg += ("\nwhich are required as inputs to the '{}' "
                          "pipeline to produce '{}'".format(
                              pipeline.name, "', '".join(req_outputs)))
                raise e