Beispiel #1
0
    def get(self, path=None):
        args = self.rp.parse_args()
        # either use value from routing, or from request
        path = path or args.path
        if path is None or '*' in path:
            path = path if path else '*'
            # no path, give list of available files
            return jsonify({
                'files': [f for f in self.ds.repo.get_indexed_files()
                          if fnmatch(f, path)],
            })

        file_abspath = self._validate_file_path(path)
        if not self.read_only:
            # in read only mode we cannot do this, as it might cause
            # more datasets to be install etc...
            self.ds.get(file_abspath)
        # TODO proper error reporting when loading/decoding fails
        if args.json == 'stream':
            content = list(json_py.load_stream(file_abspath))
        elif args.json == 'yes':
            content = json_py.load(file_abspath)
        else:
            content = open(file_abspath, 'r').read()

        return jsonify({
            'path': path,
            'content': content,
        })
Beispiel #2
0
def add_name_fixing(path):
    import datalad.support.json_py as json_py

    spec = [r for r in json_py.load_stream(path)]

    # Note: We append the procedure to dicomseries:all, since we do not
    # technically act upon a particular series. This is because the procedure
    # concerns the outcome of the conversion, not the raw data. The file
    # selection has to be done within the procedure and can't be controlled by
    # the spec or hirni-spec2bids ATM.
    for snippet in spec:
        if snippet['type'] == 'dicomseries:all':
            snippet['procedures'].append({
                'procedure-name': {
                    'value': 'change-dwi-run-to-acq_fix_all',
                    'approved': True
                },
                'procedure-name': {
                    'value': 'fieldmaps-to-phase-or-magnitude_fix_all',
                    'approved': True
                },
                'on-anonymize': {
                    'value': False,
                    'approved': True
                },
            })

    json_py.dump2stream(spec, path)
Beispiel #3
0
def test_dump2stream(path):
    stream = [dict(a=5), dict(b=4)]
    dump2stream([dict(a=5), dict(b=4)], path)
    eq_(list(load_stream(path)), stream)

    # the same for compression
    dump2xzstream([dict(a=5), dict(b=4)], path)
    eq_(list(load_xzstream(path)), stream)
Beispiel #4
0
def get_run_info(dset, message):
    """Extract run information from `message`

    Parameters
    ----------
    message : str
        A commit message.

    Returns
    -------
    A tuple with the command's message and a dict with run information. Both
    these values are None if `message` doesn't have a run command.

    Raises
    ------
    A ValueError if the information in `message` is invalid.
    """
    cmdrun_regex = r'\[DATALAD RUNCMD\] (.*)=== Do not change lines below ' \
                   r'===\n(.*)\n\^\^\^ Do not change lines above \^\^\^'
    runinfo = re.match(cmdrun_regex, message, re.MULTILINE | re.DOTALL)
    if not runinfo:
        return None, None

    rec_msg, runinfo = runinfo.groups()

    try:
        runinfo = json.loads(runinfo)
    except Exception as e:
        raise ValueError(
            'cannot rerun command, command specification is not valid JSON: '
            '%s' % exc_str(e)
        )
    if not isinstance(runinfo, (list, dict)):
        # this is a run record ID -> load the beast
        record_dir = dset.config.get(
            'datalad.run.record-directory',
            default=op.join('.datalad', 'runinfo'))
        record_path = op.join(dset.path, record_dir, runinfo)
        if not op.lexists(record_path):
            # too harsh IMHO, but same harshness as few lines further down
            raise ValueError("Run record sidecar file not found: {}".format(record_path))
        # TODO `get` the file
        recs = load_stream(record_path, compressed=True)
        # TODO check if there is a record
        runinfo = next(recs)
    if 'cmd' not in runinfo:
        raise ValueError("Looks like a run commit but does not have a command")
    return rec_msg.rstrip(), runinfo
Beispiel #5
0
def get_run_info(dset, message):
    """Extract run information from `message`

    Parameters
    ----------
    message : str
        A commit message.

    Returns
    -------
    A tuple with the command's message and a dict with run information. Both
    these values are None if `message` doesn't have a run command.

    Raises
    ------
    A ValueError if the information in `message` is invalid.
    """
    cmdrun_regex = r'\[DATALAD RUNCMD\] (.*)=== Do not change lines below ' \
                   r'===\n(.*)\n\^\^\^ Do not change lines above \^\^\^'
    runinfo = re.match(cmdrun_regex, message, re.MULTILINE | re.DOTALL)
    if not runinfo:
        return None, None

    rec_msg, runinfo = runinfo.groups()

    try:
        runinfo = json.loads(runinfo)
    except Exception as e:
        raise ValueError(
            'cannot rerun command, command specification is not valid JSON: '
            '%s' % exc_str(e)
        )
    if not isinstance(runinfo, (list, dict)):
        # this is a run record ID -> load the beast
        record_dir = dset.config.get(
            'datalad.run.record-directory',
            default=op.join('.datalad', 'runinfo'))
        record_path = op.join(dset.path, record_dir, runinfo)
        if not op.lexists(record_path):
            raise ValueError("Run record sidecar file not found: {}".format(record_path))
        # TODO `get` the file
        recs = load_stream(record_path, compressed=True)
        # TODO check if there is a record
        runinfo = next(recs)
    if 'cmd' not in runinfo:
        raise ValueError("Looks like a run commit but does not have a command")
    return rec_msg.rstrip(), runinfo
Beispiel #6
0
def test_default_rules(path):

    # ## SETUP a raw ds
    ds = install(source=test_raw_ds.get_raw_dataset(), path=path)
    # ## END SETUP

    # create specs for dicomseries w/ default rules:
    # TODO: spec path should prob. relate to `path` via (derived) acquisition!
    ds.hirni_dicom2spec(path=op.join("func_acq", "dicoms"),
                        spec=op.join("func_acq", "studyspec.json"))
    ds.hirni_dicom2spec(path=op.join("struct_acq", "dicoms"),
                        spec=op.join("struct_acq", "studyspec.json"))

    func_spec = [
        s for s in load_stream(op.join(path, "func_acq", "studyspec.json"))
    ]

    for snippet in func_spec:
        # type
        assert_in("type", snippet.keys())
        assert_in(snippet["type"], ["dicomseries", "dicomseries:all"])

        # no comment in default spec
        assert not has_specval(snippet, 'comment') or not get_specval(
            snippet, 'comment')
        # description
        assert has_specval(snippet, 'description')
        assert_equal(get_specval(snippet, 'description'),
                     "func_task-oneback_run-1")
        # subject
        assert has_specval(snippet, 'subject')
        assert_equal(get_specval(snippet, 'subject'), '02')
        # modality
        assert has_specval(snippet, 'bids-modality')
        assert_equal(get_specval(snippet, 'bids-modality'), 'bold')
        # task
        assert has_specval(snippet, "bids-task")
        assert_equal(get_specval(snippet, "bids-task"), "oneback")
        # run
        assert has_specval(snippet, "bids-run")
        assert_equal(get_specval(snippet, "bids-run"), "01")
        # id
        assert has_specval(snippet, "id")
        assert_equal(get_specval(snippet, "id"), 401)

    # should have 1 snippet of type dicomseries + 1 of type dicomseries:all
    assert_equal(len(func_spec), 2)
    assert_in("dicomseries", [s['type'] for s in func_spec])
    assert_in("dicomseries:all", [s['type'] for s in func_spec])

    struct_spec = [
        s for s in load_stream(op.join(path, "struct_acq", "studyspec.json"))
    ]

    for snippet in struct_spec:

        # type
        assert "type" in snippet.keys()
        assert snippet["type"] in ["dicomseries", "dicomseries:all"]
        # no comment in default spec
        assert not has_specval(snippet, 'comment') or not get_specval(
            snippet, 'comment')
        # description
        assert has_specval(snippet, 'description')
        assert_equal(get_specval(snippet, 'description'), "anat-T1w")
        # subject
        assert has_specval(snippet, 'subject')
        assert_equal(get_specval(snippet, 'subject'), '02')
        # modality
        assert has_specval(snippet, 'bids-modality')
        assert_equal(get_specval(snippet, 'bids-modality'), 't1w')
        # run
        assert has_specval(snippet, "bids-run")
        assert_equal(get_specval(snippet, "bids-run"), "1")

    # should have 1 snippet of type dicomseries + 1 of type dicomseries:all
    assert_equal(len(struct_spec), 2)
    assert_in("dicomseries", [s['type'] for s in struct_spec])
    assert_in("dicomseries:all", [s['type'] for s in struct_spec])
Beispiel #7
0
def test_custom_rules(path):

    # ## SETUP a raw ds
    ds = install(source=test_raw_ds.get_raw_dataset(), path=path)
    # ## END SETUP

    # 1. simply default rules
    ds.hirni_dicom2spec(path=op.join("struct_acq", "dicoms"),
                        spec=op.join("struct_acq", "studyspec.json"))
    struct_spec = [
        s for s in load_stream(op.join(path, "struct_acq", "studyspec.json"))
    ]

    for spec_snippet in struct_spec:

        # no comment in default spec
        assert not has_specval(spec_snippet, 'comment') or not get_specval(
            spec_snippet, 'comment')
        # subject
        assert has_specval(spec_snippet, 'subject')
        assert_equal(get_specval(spec_snippet, 'subject'), '02')
        # modality
        assert has_specval(spec_snippet, 'bids-modality')
        assert_equal(get_specval(spec_snippet, 'bids-modality'), 't1w')
    # should have 1 snippet of type dicomseries + 1 of type dicomseries:all
    assert_equal(len(struct_spec), 2)
    assert_in("dicomseries", [s['type'] for s in struct_spec])
    assert_in("dicomseries:all", [s['type'] for s in struct_spec])

    # set config to use custom rules
    import datalad_hirni
    ds.config.add(
        "datalad.hirni.dicom2spec.rules",
        op.join(op.dirname(datalad_hirni.__file__), 'resources', 'rules',
                'test_rules.py'),
    )

    # 2. do again with configured rules (rules 1)
    import os
    os.unlink(op.join(path, 'struct_acq', 'studyspec.json'))

    ds.hirni_dicom2spec(path=op.join("struct_acq", "dicoms"),
                        spec=op.join("struct_acq", "studyspec.json"))
    struct_spec = [
        s for s in load_stream(op.join(path, "struct_acq", "studyspec.json"))
    ]

    # assertions wrt spec
    for spec_snippet in struct_spec:

        # now there's a comment in spec
        assert has_specval(spec_snippet, 'comment')
        assert_equal(get_specval(spec_snippet, 'comment'),
                     "Rules1: These rules are for unit testing only")

    # should have 1 snippet of type dicomseries + 1 of type dicomseries:all
    assert_equal(len(struct_spec), 2)
    assert_in("dicomseries", [s['type'] for s in struct_spec])
    assert_in("dicomseries:all", [s['type'] for s in struct_spec])

    # 3. once again with two configured rule sets (rules 1 and 2)
    ds.config.add(
        "datalad.hirni.dicom2spec.rules",
        op.join(op.dirname(datalad_hirni.__file__), 'resources', 'rules',
                'test_rules2.py'),
    )
    rule_files = ds.config.get("datalad.hirni.dicom2spec.rules")
    # ensure assumption about order (dicom2spec relies on it):

    assert_equal(rule_files, (op.join(op.dirname(datalad_hirni.__file__),
                                      'resources', 'rules', 'test_rules.py'),
                              op.join(op.dirname(datalad_hirni.__file__),
                                      'resources', 'rules', 'test_rules2.py')))

    os.unlink(op.join(path, 'struct_acq', 'studyspec.json'))
    ds.hirni_dicom2spec(path=op.join("struct_acq", "dicoms"),
                        spec=op.join("struct_acq", "studyspec.json"))
    struct_spec = [
        s for s in load_stream(op.join(path, "struct_acq", "studyspec.json"))
    ]

    # assertions wrt spec
    for spec_snippet in struct_spec:

        # Rule2 should have overwritten Rule1's comment:
        assert has_specval(spec_snippet, 'comment')
        assert_equal(get_specval(spec_snippet, 'comment'),
                     "Rules2: These rules are for unit testing only")

    # should have 1 snippet of type dicomseries + 1 of type dicomseries:all
    assert_equal(len(struct_spec), 2)
    assert_in("dicomseries", [s['type'] for s in struct_spec])
    assert_in("dicomseries:all", [s['type'] for s in struct_spec])
Beispiel #8
0
    def __call__(path=None,
                 spec=None,
                 dataset=None,
                 subject=None,
                 anon_subject=None,
                 acquisition=None,
                 properties=None):

        # TODO: acquisition can probably be removed (or made an alternative to
        # derive spec and/or dicom location from)

        # Change, so path needs to point directly to dicom ds?
        # Or just use acq and remove path?

        dataset = require_dataset(dataset,
                                  check_installed=True,
                                  purpose="spec from dicoms")

        from datalad.utils import assure_list
        if path is not None:
            path = assure_list(path)
            path = [resolve_path(p, dataset) for p in path]
        else:
            raise InsufficientArgumentsError(
                "insufficient arguments for dicom2spec: a path is required")

        # TODO: We should be able to deal with several paths at once
        #       ATM we aren't (see also commit + message of actual spec)
        assert len(path) == 1

        if not spec:
            raise InsufficientArgumentsError(
                "insufficient arguments for dicom2spec: a spec file is required"
            )

            # TODO: That's prob. wrong. We can derive default spec from acquisition
        else:
            spec = resolve_path(spec, dataset)

        spec_series_list = \
            [r for r in json_py.load_stream(spec)] if op.exists(spec) else list()

        # get dataset level metadata:
        found_some = False
        for meta in dataset.meta_dump(
                path,
                recursive=False,  # always False?
                reporton='datasets',
                return_type='generator',
                result_renderer='disabled'):
            if meta.get('status', None) not in ['ok', 'notneeded']:
                yield meta
                continue

            if 'dicom' not in meta['metadata']:

                # TODO: Really "notneeded" or simply not a result at all?
                yield dict(status='notneeded',
                           message=("found no DICOM metadata for %s",
                                    meta['path']),
                           path=meta['path'],
                           type='dataset',
                           action='dicom2spec',
                           logger=lgr)
                continue

            if 'Series' not in meta['metadata']['dicom'] or \
                    not meta['metadata']['dicom']['Series']:
                yield dict(
                    status='impossible',
                    message=("no image series detected in DICOM metadata of"
                             " %s", meta['path']),
                    path=meta['path'],
                    type='dataset',
                    action='dicom2spec',
                    logger=lgr)
                continue

            found_some = True

            overrides = dict()
            if properties:
                # load from file or json string
                props = json_py.load(properties) \
                        if op.exists(properties) else json_py.loads(properties)
                # turn into editable, pre-approved records
                props = {
                    k: dict(value=v, approved=True)
                    for k, v in props.items()
                }
                overrides.update(props)

            spec_series_list = add_to_spec(
                meta,
                spec_series_list,
                op.dirname(spec),
                subject=subject,
                anon_subject=anon_subject,
                # session=session,
                # TODO: parameter "session" was what
                # we now call acquisition. This is
                # NOT a good default for bids_session!
                # Particularly wrt to anonymization
                overrides=overrides,
                dataset=dataset)

        if not found_some:
            yield dict(
                status='impossible',
                message="found no DICOM metadata",
                path=path,
                type=
                'file',  # TODO: arguable should be 'file' or 'dataset', depending on path
                action='dicom2spec',
                logger=lgr)
            return

        # TODO: RF needed. This rule should go elsewhere:
        # ignore duplicates (prob. reruns of aborted runs)
        # -> convert highest id only
        # Note: This sorting is a q&d hack!
        # TODO: Sorting needs to become more sophisticated + include notion of :all
        spec_series_list = sorted(spec_series_list,
                                  key=lambda x: get_specval(x, 'id')
                                  if 'id' in x.keys() else 0)
        for i in range(len(spec_series_list)):
            # Note: Removed the following line from condition below,
            # since it appears to be pointless. Value for 'converter'
            # used to be 'heudiconv' or 'ignore' for a 'dicomseries', so
            # it's not clear ATM what case this could possibly have catched:
            # heuristic.has_specval(spec_series_list[i], "converter") and \
            if spec_series_list[i]["type"] == "dicomseries" and \
                has_specval(spec_series_list[i], "bids-run") and \
                get_specval(spec_series_list[i], "bids-run") in \
                    [get_specval(s, "bids-run")
                     for s in spec_series_list[i + 1:]
                     if get_specval(
                            s,
                            "description") == get_specval(
                                spec_series_list[i], "description") and \
                     get_specval(s, "id") > get_specval(
                                             spec_series_list[i], "id")
                     ]:
                lgr.debug("Ignore SeriesNumber %s for conversion" % i)
                spec_series_list[i]["tags"].append(
                    'hirni-dicom-converter-ignore')

        lgr.debug("Storing specification (%s)", spec)
        # store as a stream (one record per file) to be able to
        # easily concat files without having to parse them, or
        # process them line by line without having to fully parse them
        from datalad_hirni.support.spec_helpers import sort_spec
        # Note: Sorting paradigm needs to change. See above.
        # spec_series_list = sorted(spec_series_list, key=lambda x: sort_spec(x))
        json_py.dump2stream(spec_series_list, spec)

        # make sure spec is in git:
        dataset.repo.set_gitattributes([(spec, {
            'annex.largefiles': 'nothing'
        })], '.gitattributes')

        for r in Save.__call__(dataset=dataset,
                               path=[spec, '.gitattributes'],
                               to_git=True,
                               message="[HIRNI] Added study specification "
                               "snippet for %s" %
                               op.relpath(path[0], dataset.path),
                               return_type='generator',
                               result_renderer='disabled'):
            if r.get('status', None) not in ['ok', 'notneeded']:
                yield r
            elif r['path'] in [spec, op.join(dataset.path, '.gitattributes')] \
                    and r['type'] == 'file':
                r['action'] = 'dicom2spec'
                r['logger'] = lgr
                yield r
            elif r['type'] == 'dataset':
                # 'ok' or 'notneeded' for a dataset is okay, since we commit
                # the spec. But it's not a result to yield
                continue
            else:
                # anything else shouldn't happen
                yield dict(
                    status='error',
                    message=("unexpected result from save: %s", r),
                    path=
                    spec,  # TODO: This actually isn't clear - get it from `r`
                    type='file',
                    action='dicom2spec',
                    logger=lgr)
Beispiel #9
0
    def __call__(path,
                 dataset=None,
                 spec_file=None,
                 properties=None,
                 replace=False):
        # TODO: message

        dataset = require_dataset(dataset,
                                  check_installed=True,
                                  purpose="hirni spec4anything")
        path = assure_list(path)
        path = [resolve_path(p, dataset) for p in path]

        res_kwargs = dict(action='hirni spec4anything', logger=lgr)
        res_kwargs['refds'] = Interface.get_refds_path(dataset)

        # ### This might become superfluous. See datalad-gh-2653
        ds_path = PathRI(dataset.path)
        # ###

        updated_files = []
        paths = []
        for ap in AnnotatePaths.__call__(
                dataset=dataset,
                path=path,
                action='hirni spec4anything',
                unavailable_path_status='impossible',
                nondataset_path_status='error',
                return_type='generator',
                # TODO: Check this one out:
                on_failure='ignore',
                # Note/TODO: Not sure yet whether and when we need those.
                # Generally we want to be able to create a spec for subdatasets,
                # too:
                # recursive=recursive,
                # recursion_limit=recursion_limit,
                # force_subds_discovery=True,
                # force_parentds_discovery=True,
        ):

            if ap.get('status', None) in ['error', 'impossible']:
                yield ap
                continue

            # ### This might become superfluous. See datalad-gh-2653
            ap_path = PathRI(ap['path'])
            # ###

            # find acquisition and respective specification file:
            rel_path = posixpath.relpath(ap_path.posixpath, ds_path.posixpath)

            path_parts = rel_path.split('/')

            # TODO: Note: Outcommented this warning for now. We used to not have
            # a spec file at the toplevel of the study dataset, but now we do.
            # The logic afterwards works, but should be revisited. At least,
            # `acq` should be called differently now.
            # if len(path_parts) < 2:
            #     lgr.warning("Not within an acquisition")
            acq = path_parts[0]

            # TODO: spec file specifiable or fixed path?
            #       if we want the former, what we actually need is an
            #       association of acquisition and its spec path
            #       => prob. not an option but a config

            spec_path = spec_file if spec_file \
                else posixpath.join(ds_path.posixpath, acq,
                                    dataset.config.get("datalad.hirni.studyspec.filename",
                                                       "studyspec.json"))

            spec = [r for r in json_py.load_stream(spec_path)] \
                if posixpath.exists(spec_path) else list()

            lgr.debug("Add specification snippet for %s", ap['path'])
            # XXX 'add' does not seem to be the thing we want to do
            # rather 'set', so we have to check whether a spec for a location
            # is already known and fail or replace it (maybe with --force)

            # go through all existing specs and extract unique value
            # and also assign them to the new record (subjects, ...), but only
            # editable fields!!
            uniques = dict()
            for s in spec:
                for k in s:
                    if isinstance(s[k], dict) and 'value' in s[k]:
                        if k not in uniques:
                            uniques[k] = set()
                        uniques[k].add(s[k]['value'])
            overrides = dict()
            for k in uniques:
                if len(uniques[k]) == 1:
                    overrides[k] = _get_edit_dict(value=uniques[k].pop(),
                                                  approved=False)

            if properties:

                # TODO: This entire reading of properties needs to be RF'd
                # into proper generalized functions.
                # spec got more complex. update() prob. can't simply override
                # (think: 'procedures' and 'tags' prob. need to be appended
                # instead)

                # load from file or json string
                if isinstance(properties, dict):
                    props = properties
                elif op.exists(properties):
                    props = json_py.load(properties)
                else:
                    props = json_py.loads(properties)
                # turn into editable, pre-approved records
                spec_props = {
                    k: dict(value=v, approved=True)
                    for k, v in props.items()
                    if k not in non_editables + ['tags', 'procedures']
                }
                spec_props.update({
                    k: v
                    for k, v in props.items() if k in non_editables + ['tags']
                })

                # TODO: still wrong. It's a list. Append or override? How to decide?
                spec_props.update({
                    o_k: [{
                        i_k: dict(value=i_v, approved=True)
                        for i_k, i_v in o_v.items()
                    }]
                    for o_k, o_v in props.items() if o_k in ['procedures']
                })

                overrides.update(spec_props)

            # TODO: It's probably wrong to use uniques for overwriting! At least
            # they cannot be used to overwrite values explicitly set in
            # _add_to_spec like "location", "type", etc.
            #
            # But then: This should concern non-editable fields only, right?

            spec = _add_to_spec(spec,
                                posixpath.split(spec_path)[0],
                                ap,
                                dataset,
                                overrides=overrides,
                                replace=replace)

            # Note: Not sure whether we really want one commit per snippet.
            #       If not - consider:
            #       - What if we fail amidst? => Don't write to file yet.
            #       - What about input paths from different acquisitions?
            #         => store specs per acquisition in memory
            # MIH: One commit per line seems silly. why not update all files
            # collect paths of updated files, and give them to a single `add`
            # at the very end?
            # MIH: if we fail, we fail and nothing is committed
            from datalad_hirni.support.spec_helpers import sort_spec
            json_py.dump2stream(sorted(spec, key=lambda x: sort_spec(x)),
                                spec_path)
            updated_files.append(spec_path)

            yield get_status_dict(status='ok',
                                  type=ap['type'],
                                  path=ap['path'],
                                  **res_kwargs)
            paths.append(ap)

        from datalad.dochelpers import single_or_plural
        from os import linesep
        message = "[HIRNI] Add specification {n_snippets} for: {paths}".format(
            n_snippets=single_or_plural("snippet", "snippets", len(paths)),
            paths=linesep.join(" - " + op.relpath(p['path'], dataset.path)
                               for p in paths)
            if len(paths) > 1 else op.relpath(paths[0]['path'], dataset.path))
        for r in dataset.save(updated_files,
                              to_git=True,
                              message=message,
                              return_type='generator',
                              result_renderer='disabled'):
            yield r
Beispiel #10
0
def test_load_unicode_line_separator(fname):
    # See gh-3523.
    result = list(load_stream(fname))
    eq_(len(result), 2)
    eq_(result[0]["key0"], u"a
b")
    eq_(result[1]["key1"], u"plain")
Beispiel #11
0
    def __call__(specfile, dataset=None, anonymize=False, only_type=None):

        dataset = require_dataset(dataset, check_installed=True,
                                  purpose="spec2bids")

        specfile = assure_list(specfile)
        specfile = [resolve_path(p, dataset) for p in specfile]
        specfile = [str(p) for p in specfile]

        for spec_path in specfile:

            # Note/TODO: ran_procedure per spec file still isn't ideal. Could
            # be different spec files for same acquisition. It's actually about
            # the exact same call. How to best get around substitutions?
            # Also: per snippet isn't correct either.
            # substitutions is real issue. Example "copy {location} ."
            #
            # => datalad.interface.run.format_command / normalize_command ?

            # TODO: Also can we skip prepare_inputs within run? At least specify
            # more specifically. Note: Can be globbed!

            ran_procedure = dict()

            if not lexists(spec_path):
                yield get_status_dict(
                    action='spec2bids',
                    path=spec_path,
                    status='impossible',
                    message="{} not found".format(spec_path)
                )

            if op.isdir(spec_path):
                if op.realpath(op.join(spec_path, op.pardir)) == \
                        op.realpath(dataset.path):
                    spec_path = op.join(
                            spec_path,
                            dataset.config.get(
                                    "datalad.hirni.studyspec.filename",
                                    "studyspec.json")
                    )
                    # TODO: check existence of that file!
                else:
                    yield get_status_dict(
                        action='spec2bids',
                        path=spec_path,
                        status='impossible',
                        message="{} is neither a specification file nor an "
                                "acquisition directory".format(spec_path)
                    )

            # relative path to spec to be recorded:
            rel_spec_path = relpath(spec_path, dataset.path) \
                if isabs(spec_path) else spec_path

            # check each dict (snippet) in the specification for what to do
            # wrt conversion:
            for spec_snippet in load_stream(spec_path):

                if only_type and not spec_snippet['type'].startswith(only_type):
                    # ignore snippets not matching `only_type`
                    # Note/TODO: the .startswith part is meant for
                    # matching "dicomseries:all" to given "dicomseries" but not
                    # vice versa. This prob. needs refinement (and doc)
                    continue

                if 'procedures' not in spec_snippet:
                    # no conversion procedures defined at all:
                    yield get_status_dict(
                            action='spec2bids',
                            path=spec_path,
                            snippet=spec_snippet,
                            status='notneeded',
                    )
                    continue

                procedure_list = spec_snippet['procedures']
                if not procedure_list:
                    # no conversion procedures defined at all:
                    yield get_status_dict(
                            action='spec2bids',
                            path=spec_path,
                            snippet=spec_snippet,
                            status='notneeded',
                    )
                    continue

                # accept a single dict as a one item list:
                if isinstance(procedure_list, dict):
                    procedure_list = [procedure_list]

                # build a dict available for placeholders in format strings:
                # Note: This is flattening the structure since we don't need
                # value/approved for the substitutions. In addition 'subject'
                # and 'anon_subject' are not passed on, but a new key
                # 'bids_subject' instead the value of which depends on the
                # --anonymize switch.
                # Additionally 'location' is recomputed to be relative to
                # dataset.path, since this is where the procedures are running
                # from within.
                replacements = dict()
                for k, v in spec_snippet.items():
                    if k == 'subject':
                        if not anonymize:
                            replacements['bids-subject'] = v['value']
                    elif k == 'anon-subject':
                        if anonymize:
                            replacements['bids-subject'] = v['value']
                    elif k == 'location':
                        replacements[k] = op.join(op.dirname(rel_spec_path), v)
                    elif k == 'procedures':
                        # 'procedures' is a list of dicts (not suitable for
                        # substitutions) and it makes little sense to be
                        # referenced by converter format strings anyway:
                        continue
                    else:
                        replacements[k] = v['value'] if isinstance(v, dict) else v

                # build dict to patch os.environ with for passing
                # replacements on to procedures:
                env_subs = dict()
                for k, v in replacements.items():
                    env_subs['DATALAD_RUN_SUBSTITUTIONS_{}'
                             ''.format(k.upper().replace('-', '__'))] = str(v)
                env_subs['DATALAD_RUN_SUBSTITUTIONS_SPECPATH'] = rel_spec_path
                env_subs['DATALAD_RUN_SUBSTITUTIONS_ANONYMIZE'] = str(anonymize)

                # TODO: The above two blocks to build replacements dict and
                # env_subs should be joined eventually.

                for proc in procedure_list:
                    if has_specval(proc, 'procedure-name'):
                        proc_name = get_specval(proc, 'procedure-name')
                    else:
                        # invalid procedure spec
                        lgr.warning("conversion procedure missing key "
                                    "'procedure-name' in %s: %s",
                                    spec_path, proc)
                        # TODO: continue or yield impossible/error so it can be
                        # dealt with via on_failure?
                        continue

                    if has_specval(proc, 'on-anonymize') \
                        and anything2bool(
                            get_specval(proc, 'on-anonymize')
                            ) and not anonymize:
                        # don't run that procedure, if we weren't called with
                        # --anonymize while procedure is specified to be run on
                        # that switch only
                        continue

                    proc_call = get_specval(proc, 'procedure-call') \
                        if has_specval(proc, 'procedure-call') \
                        else None

                    if ran_procedure.get(hash((proc_name, proc_call)), None):
                        # if we ran the exact same call already,
                        # don't call it again
                        # TODO: notneeded?
                        continue

                    # if spec comes with call format string, it takes precedence
                    # over what is generally configured for the procedure
                    # TODO: Not sure yet whether this is how we should deal with it
                    if proc_call:
                        env_subs['DATALAD_PROCEDURES_{}_CALL__FORMAT'
                                 ''.format(proc_name.upper().replace('-', '__'))
                                 ] = proc_call

                    run_results = list()
                    # Note, that we can't use dataset.config.overrides to
                    # pass run-substitution config to procedures, since we
                    # leave python context and thereby loose the dataset
                    # instance. Use patched os.environ instead. Note also,
                    # that this requires names of substitutions to not
                    # contain underscores, since they would be translated to
                    # '.' by ConfigManager when reading them from within the
                    # procedure's datalad-run calls.
                    from mock import patch

                    # TODO: Reconsider that patching. Shouldn't it be an update?
                    with patch.dict('os.environ', env_subs):
                        # apparently reload is necessary to consider config
                        # overrides via env:
                        dataset.config.reload()
                        for r in dataset.run_procedure(
                                spec=proc_name,
                                return_type='generator'
                        ):

                            # # if there was an issue yield original result,
                            # # otherwise swallow:
                            # if r['status'] not in ['ok', 'notneeded']:
                            yield r
                            run_results.append(r)

                    if not all(r['status'] in ['ok', 'notneeded']
                               for r in run_results):
                        yield {'action': proc_name,
                               'path': spec_path,
                               'snippet': spec_snippet,
                               'status': 'error',
                               'message': "acquisition conversion failed. "
                                          "See previous message(s)."}

                    else:
                        yield {'action': proc_name,
                               'path': spec_path,
                               'snippet': spec_snippet,
                               'status': 'ok',
                               'message': "acquisition converted."}

                    # mark as a procedure we ran on this acquisition:
                    # TODO: rethink. Doesn't work that way. Disabled for now
                    # ran_procedure[hash((proc_name, proc_call))] = True




                    # elif proc_name != 'hirni-dicom-converter':
                    #     # specific converter procedure call
                    #
                    #     from mock import patch
                    #     with patch.dict('os.environ', env_subs):
                    #         # apparently reload is necessary to consider config
                    #         # overrides via env:
                    #         dataset.config.reload()
                    #
                    #         for r in dataset.run_procedure(
                    #                 spec=[proc_name, rel_spec_path, anonymize],
                    #                 return_type='generator'
                    #         ):
                    #
                    #             # if there was an issue with containers-run,
                    #             # yield original result, otherwise swallow:
                    #             if r['status'] not in ['ok', 'notneeded']:
                    #                 yield r
                    #
                    #             run_results.append(r)
                    #
                    #     if not all(r['status'] in ['ok', 'notneeded']
                    #                for r in run_results):
                    #         yield {'action': proc_name,
                    #                'path': spec_path,
                    #                'snippet': spec_snippet,
                    #                'status': 'error',
                    #                'message': "Conversion failed. "
                    #                           "See previous message(s)."}
                    #
                    #     else:
                    #         yield {'action': proc_name,
                    #                'path': spec_path,
                    #                'snippet': spec_snippet,
                    #                'status': 'ok',
                    #                'message': "specification converted."}

                    # elif ran_heudiconv and proc_name == 'hirni-dicom-converter':
                    #     # in this case we acted upon this snippet already and
                    #     # do not have to produce a result
                    #     pass
                    #
                    # else:
                    #     # this shouldn't happen!
                    #     raise RuntimeError

            yield {'action': 'spec2bids',
                   'path': spec_path,
                   'status': 'ok'}
Beispiel #12
0
def test_custom_rules(path, toolbox_url):

    # ## SETUP a raw ds
    with patch.dict('os.environ', {'DATALAD_HIRNI_TOOLBOX_URL': toolbox_url}):
        ds = install(source=test_raw_ds.get_raw_dataset(), path=path)
    # ## END SETUP

    # 1. simply default rules
    ds.hirni_dicom2spec(path=op.join("struct_acq", "dicoms"),
                        spec=op.join("struct_acq", "studyspec.json"))
    struct_spec = [
        s for s in load_stream(op.join(path, "struct_acq", "studyspec.json"))
    ]

    for spec_snippet in struct_spec:

        # no comment in default spec
        assert not has_specval(spec_snippet, 'comment') or not get_specval(
            spec_snippet, 'comment')
        # subject
        assert has_specval(spec_snippet, 'subject')
        assert_equal(get_specval(spec_snippet, 'subject'), '02')
        # modality
        assert has_specval(spec_snippet, 'bids-modality')
        assert_equal(get_specval(spec_snippet, 'bids-modality'), 't1w')
    # should have 1 snippet of type dicomseries + 1 of type dicomseries:all
    assert_equal(len(struct_spec), 2)
    assert_in("dicomseries", [s['type'] for s in struct_spec])
    assert_in("dicomseries:all", [s['type'] for s in struct_spec])

    # set config to use custom rules
    import datalad_hirni
    ds.config.add(
        "datalad.hirni.dicom2spec.rules",
        op.join(op.dirname(datalad_hirni.__file__), 'resources', 'rules',
                'test_rules.py'),
    )

    # 2. do again with configured rules (rules 1)
    import os
    os.unlink(op.join(path, 'struct_acq', 'studyspec.json'))

    ds.hirni_dicom2spec(path=op.join("struct_acq", "dicoms"),
                        spec=op.join("struct_acq", "studyspec.json"))
    struct_spec = [
        s for s in load_stream(op.join(path, "struct_acq", "studyspec.json"))
    ]

    # assertions wrt spec
    for spec_snippet in struct_spec:

        # now there's a comment in spec
        assert has_specval(spec_snippet, 'comment')
        assert_equal(get_specval(spec_snippet, 'comment'),
                     "Rules1: These rules are for unit testing only")

    # should have 1 snippet of type dicomseries + 1 of type dicomseries:all
    assert_equal(len(struct_spec), 2)
    assert_in("dicomseries", [s['type'] for s in struct_spec])
    assert_in("dicomseries:all", [s['type'] for s in struct_spec])

    # 3. once again with two configured rule sets (rules 1 and 2)
    ds.config.add(
        "datalad.hirni.dicom2spec.rules",
        op.join(op.dirname(datalad_hirni.__file__), 'resources', 'rules',
                'test_rules2.py'),
    )
    try:
        # Protect against older datalad version.
        # ATM this can't be done by checking version number, since this change
        # currently is in datalad's master branch but not in maint. maint,
        # however, has the same __version__ as master
        rule_files = ds.config.get("datalad.hirni.dicom2spec.rules",
                                   get_all=True)
    except TypeError as e:
        if "unexpected keyword argument 'get_all'" in str(e):
            # older datalad version should return multiple values out of the box
            rule_files = ds.config.get("datalad.hirni.dicom2spec.rules")
        else:
            raise

    # ensure assumption about order (dicom2spec relies on it):

    assert_equal(
        rule_files[0],
        op.join(op.dirname(datalad_hirni.__file__), 'resources', 'rules',
                'test_rules.py'))
    assert_equal(
        rule_files[1],
        op.join(op.dirname(datalad_hirni.__file__), 'resources', 'rules',
                'test_rules2.py'))

    os.unlink(op.join(path, 'struct_acq', 'studyspec.json'))
    ds.hirni_dicom2spec(path=op.join("struct_acq", "dicoms"),
                        spec=op.join("struct_acq", "studyspec.json"))
    struct_spec = [
        s for s in load_stream(op.join(path, "struct_acq", "studyspec.json"))
    ]

    # assertions wrt spec
    for spec_snippet in struct_spec:

        # Rule2 should have overwritten Rule1's comment:
        assert has_specval(spec_snippet, 'comment')
        assert_equal(get_specval(spec_snippet, 'comment'),
                     "Rules2: These rules are for unit testing only")

    # should have 1 snippet of type dicomseries + 1 of type dicomseries:all
    assert_equal(len(struct_spec), 2)
    assert_in("dicomseries", [s['type'] for s in struct_spec])
    assert_in("dicomseries:all", [s['type'] for s in struct_spec])