def get(self, path=None): args = self.rp.parse_args() # either use value from routing, or from request path = path or args.path if path is None or '*' in path: path = path if path else '*' # no path, give list of available files return jsonify({ 'files': [f for f in self.ds.repo.get_indexed_files() if fnmatch(f, path)], }) file_abspath = self._validate_file_path(path) if not self.read_only: # in read only mode we cannot do this, as it might cause # more datasets to be install etc... self.ds.get(file_abspath) # TODO proper error reporting when loading/decoding fails if args.json == 'stream': content = list(json_py.load_stream(file_abspath)) elif args.json == 'yes': content = json_py.load(file_abspath) else: content = open(file_abspath, 'r').read() return jsonify({ 'path': path, 'content': content, })
def add_name_fixing(path): import datalad.support.json_py as json_py spec = [r for r in json_py.load_stream(path)] # Note: We append the procedure to dicomseries:all, since we do not # technically act upon a particular series. This is because the procedure # concerns the outcome of the conversion, not the raw data. The file # selection has to be done within the procedure and can't be controlled by # the spec or hirni-spec2bids ATM. for snippet in spec: if snippet['type'] == 'dicomseries:all': snippet['procedures'].append({ 'procedure-name': { 'value': 'change-dwi-run-to-acq_fix_all', 'approved': True }, 'procedure-name': { 'value': 'fieldmaps-to-phase-or-magnitude_fix_all', 'approved': True }, 'on-anonymize': { 'value': False, 'approved': True }, }) json_py.dump2stream(spec, path)
def test_dump2stream(path): stream = [dict(a=5), dict(b=4)] dump2stream([dict(a=5), dict(b=4)], path) eq_(list(load_stream(path)), stream) # the same for compression dump2xzstream([dict(a=5), dict(b=4)], path) eq_(list(load_xzstream(path)), stream)
def get_run_info(dset, message): """Extract run information from `message` Parameters ---------- message : str A commit message. Returns ------- A tuple with the command's message and a dict with run information. Both these values are None if `message` doesn't have a run command. Raises ------ A ValueError if the information in `message` is invalid. """ cmdrun_regex = r'\[DATALAD RUNCMD\] (.*)=== Do not change lines below ' \ r'===\n(.*)\n\^\^\^ Do not change lines above \^\^\^' runinfo = re.match(cmdrun_regex, message, re.MULTILINE | re.DOTALL) if not runinfo: return None, None rec_msg, runinfo = runinfo.groups() try: runinfo = json.loads(runinfo) except Exception as e: raise ValueError( 'cannot rerun command, command specification is not valid JSON: ' '%s' % exc_str(e) ) if not isinstance(runinfo, (list, dict)): # this is a run record ID -> load the beast record_dir = dset.config.get( 'datalad.run.record-directory', default=op.join('.datalad', 'runinfo')) record_path = op.join(dset.path, record_dir, runinfo) if not op.lexists(record_path): # too harsh IMHO, but same harshness as few lines further down raise ValueError("Run record sidecar file not found: {}".format(record_path)) # TODO `get` the file recs = load_stream(record_path, compressed=True) # TODO check if there is a record runinfo = next(recs) if 'cmd' not in runinfo: raise ValueError("Looks like a run commit but does not have a command") return rec_msg.rstrip(), runinfo
def get_run_info(dset, message): """Extract run information from `message` Parameters ---------- message : str A commit message. Returns ------- A tuple with the command's message and a dict with run information. Both these values are None if `message` doesn't have a run command. Raises ------ A ValueError if the information in `message` is invalid. """ cmdrun_regex = r'\[DATALAD RUNCMD\] (.*)=== Do not change lines below ' \ r'===\n(.*)\n\^\^\^ Do not change lines above \^\^\^' runinfo = re.match(cmdrun_regex, message, re.MULTILINE | re.DOTALL) if not runinfo: return None, None rec_msg, runinfo = runinfo.groups() try: runinfo = json.loads(runinfo) except Exception as e: raise ValueError( 'cannot rerun command, command specification is not valid JSON: ' '%s' % exc_str(e) ) if not isinstance(runinfo, (list, dict)): # this is a run record ID -> load the beast record_dir = dset.config.get( 'datalad.run.record-directory', default=op.join('.datalad', 'runinfo')) record_path = op.join(dset.path, record_dir, runinfo) if not op.lexists(record_path): raise ValueError("Run record sidecar file not found: {}".format(record_path)) # TODO `get` the file recs = load_stream(record_path, compressed=True) # TODO check if there is a record runinfo = next(recs) if 'cmd' not in runinfo: raise ValueError("Looks like a run commit but does not have a command") return rec_msg.rstrip(), runinfo
def test_default_rules(path): # ## SETUP a raw ds ds = install(source=test_raw_ds.get_raw_dataset(), path=path) # ## END SETUP # create specs for dicomseries w/ default rules: # TODO: spec path should prob. relate to `path` via (derived) acquisition! ds.hirni_dicom2spec(path=op.join("func_acq", "dicoms"), spec=op.join("func_acq", "studyspec.json")) ds.hirni_dicom2spec(path=op.join("struct_acq", "dicoms"), spec=op.join("struct_acq", "studyspec.json")) func_spec = [ s for s in load_stream(op.join(path, "func_acq", "studyspec.json")) ] for snippet in func_spec: # type assert_in("type", snippet.keys()) assert_in(snippet["type"], ["dicomseries", "dicomseries:all"]) # no comment in default spec assert not has_specval(snippet, 'comment') or not get_specval( snippet, 'comment') # description assert has_specval(snippet, 'description') assert_equal(get_specval(snippet, 'description'), "func_task-oneback_run-1") # subject assert has_specval(snippet, 'subject') assert_equal(get_specval(snippet, 'subject'), '02') # modality assert has_specval(snippet, 'bids-modality') assert_equal(get_specval(snippet, 'bids-modality'), 'bold') # task assert has_specval(snippet, "bids-task") assert_equal(get_specval(snippet, "bids-task"), "oneback") # run assert has_specval(snippet, "bids-run") assert_equal(get_specval(snippet, "bids-run"), "01") # id assert has_specval(snippet, "id") assert_equal(get_specval(snippet, "id"), 401) # should have 1 snippet of type dicomseries + 1 of type dicomseries:all assert_equal(len(func_spec), 2) assert_in("dicomseries", [s['type'] for s in func_spec]) assert_in("dicomseries:all", [s['type'] for s in func_spec]) struct_spec = [ s for s in load_stream(op.join(path, "struct_acq", "studyspec.json")) ] for snippet in struct_spec: # type assert "type" in snippet.keys() assert snippet["type"] in ["dicomseries", "dicomseries:all"] # no comment in default spec assert not has_specval(snippet, 'comment') or not get_specval( snippet, 'comment') # description assert has_specval(snippet, 'description') assert_equal(get_specval(snippet, 'description'), "anat-T1w") # subject assert has_specval(snippet, 'subject') assert_equal(get_specval(snippet, 'subject'), '02') # modality assert has_specval(snippet, 'bids-modality') assert_equal(get_specval(snippet, 'bids-modality'), 't1w') # run assert has_specval(snippet, "bids-run") assert_equal(get_specval(snippet, "bids-run"), "1") # should have 1 snippet of type dicomseries + 1 of type dicomseries:all assert_equal(len(struct_spec), 2) assert_in("dicomseries", [s['type'] for s in struct_spec]) assert_in("dicomseries:all", [s['type'] for s in struct_spec])
def test_custom_rules(path): # ## SETUP a raw ds ds = install(source=test_raw_ds.get_raw_dataset(), path=path) # ## END SETUP # 1. simply default rules ds.hirni_dicom2spec(path=op.join("struct_acq", "dicoms"), spec=op.join("struct_acq", "studyspec.json")) struct_spec = [ s for s in load_stream(op.join(path, "struct_acq", "studyspec.json")) ] for spec_snippet in struct_spec: # no comment in default spec assert not has_specval(spec_snippet, 'comment') or not get_specval( spec_snippet, 'comment') # subject assert has_specval(spec_snippet, 'subject') assert_equal(get_specval(spec_snippet, 'subject'), '02') # modality assert has_specval(spec_snippet, 'bids-modality') assert_equal(get_specval(spec_snippet, 'bids-modality'), 't1w') # should have 1 snippet of type dicomseries + 1 of type dicomseries:all assert_equal(len(struct_spec), 2) assert_in("dicomseries", [s['type'] for s in struct_spec]) assert_in("dicomseries:all", [s['type'] for s in struct_spec]) # set config to use custom rules import datalad_hirni ds.config.add( "datalad.hirni.dicom2spec.rules", op.join(op.dirname(datalad_hirni.__file__), 'resources', 'rules', 'test_rules.py'), ) # 2. do again with configured rules (rules 1) import os os.unlink(op.join(path, 'struct_acq', 'studyspec.json')) ds.hirni_dicom2spec(path=op.join("struct_acq", "dicoms"), spec=op.join("struct_acq", "studyspec.json")) struct_spec = [ s for s in load_stream(op.join(path, "struct_acq", "studyspec.json")) ] # assertions wrt spec for spec_snippet in struct_spec: # now there's a comment in spec assert has_specval(spec_snippet, 'comment') assert_equal(get_specval(spec_snippet, 'comment'), "Rules1: These rules are for unit testing only") # should have 1 snippet of type dicomseries + 1 of type dicomseries:all assert_equal(len(struct_spec), 2) assert_in("dicomseries", [s['type'] for s in struct_spec]) assert_in("dicomseries:all", [s['type'] for s in struct_spec]) # 3. once again with two configured rule sets (rules 1 and 2) ds.config.add( "datalad.hirni.dicom2spec.rules", op.join(op.dirname(datalad_hirni.__file__), 'resources', 'rules', 'test_rules2.py'), ) rule_files = ds.config.get("datalad.hirni.dicom2spec.rules") # ensure assumption about order (dicom2spec relies on it): assert_equal(rule_files, (op.join(op.dirname(datalad_hirni.__file__), 'resources', 'rules', 'test_rules.py'), op.join(op.dirname(datalad_hirni.__file__), 'resources', 'rules', 'test_rules2.py'))) os.unlink(op.join(path, 'struct_acq', 'studyspec.json')) ds.hirni_dicom2spec(path=op.join("struct_acq", "dicoms"), spec=op.join("struct_acq", "studyspec.json")) struct_spec = [ s for s in load_stream(op.join(path, "struct_acq", "studyspec.json")) ] # assertions wrt spec for spec_snippet in struct_spec: # Rule2 should have overwritten Rule1's comment: assert has_specval(spec_snippet, 'comment') assert_equal(get_specval(spec_snippet, 'comment'), "Rules2: These rules are for unit testing only") # should have 1 snippet of type dicomseries + 1 of type dicomseries:all assert_equal(len(struct_spec), 2) assert_in("dicomseries", [s['type'] for s in struct_spec]) assert_in("dicomseries:all", [s['type'] for s in struct_spec])
def __call__(path=None, spec=None, dataset=None, subject=None, anon_subject=None, acquisition=None, properties=None): # TODO: acquisition can probably be removed (or made an alternative to # derive spec and/or dicom location from) # Change, so path needs to point directly to dicom ds? # Or just use acq and remove path? dataset = require_dataset(dataset, check_installed=True, purpose="spec from dicoms") from datalad.utils import assure_list if path is not None: path = assure_list(path) path = [resolve_path(p, dataset) for p in path] else: raise InsufficientArgumentsError( "insufficient arguments for dicom2spec: a path is required") # TODO: We should be able to deal with several paths at once # ATM we aren't (see also commit + message of actual spec) assert len(path) == 1 if not spec: raise InsufficientArgumentsError( "insufficient arguments for dicom2spec: a spec file is required" ) # TODO: That's prob. wrong. We can derive default spec from acquisition else: spec = resolve_path(spec, dataset) spec_series_list = \ [r for r in json_py.load_stream(spec)] if op.exists(spec) else list() # get dataset level metadata: found_some = False for meta in dataset.meta_dump( path, recursive=False, # always False? reporton='datasets', return_type='generator', result_renderer='disabled'): if meta.get('status', None) not in ['ok', 'notneeded']: yield meta continue if 'dicom' not in meta['metadata']: # TODO: Really "notneeded" or simply not a result at all? yield dict(status='notneeded', message=("found no DICOM metadata for %s", meta['path']), path=meta['path'], type='dataset', action='dicom2spec', logger=lgr) continue if 'Series' not in meta['metadata']['dicom'] or \ not meta['metadata']['dicom']['Series']: yield dict( status='impossible', message=("no image series detected in DICOM metadata of" " %s", meta['path']), path=meta['path'], type='dataset', action='dicom2spec', logger=lgr) continue found_some = True overrides = dict() if properties: # load from file or json string props = json_py.load(properties) \ if op.exists(properties) else json_py.loads(properties) # turn into editable, pre-approved records props = { k: dict(value=v, approved=True) for k, v in props.items() } overrides.update(props) spec_series_list = add_to_spec( meta, spec_series_list, op.dirname(spec), subject=subject, anon_subject=anon_subject, # session=session, # TODO: parameter "session" was what # we now call acquisition. This is # NOT a good default for bids_session! # Particularly wrt to anonymization overrides=overrides, dataset=dataset) if not found_some: yield dict( status='impossible', message="found no DICOM metadata", path=path, type= 'file', # TODO: arguable should be 'file' or 'dataset', depending on path action='dicom2spec', logger=lgr) return # TODO: RF needed. This rule should go elsewhere: # ignore duplicates (prob. reruns of aborted runs) # -> convert highest id only # Note: This sorting is a q&d hack! # TODO: Sorting needs to become more sophisticated + include notion of :all spec_series_list = sorted(spec_series_list, key=lambda x: get_specval(x, 'id') if 'id' in x.keys() else 0) for i in range(len(spec_series_list)): # Note: Removed the following line from condition below, # since it appears to be pointless. Value for 'converter' # used to be 'heudiconv' or 'ignore' for a 'dicomseries', so # it's not clear ATM what case this could possibly have catched: # heuristic.has_specval(spec_series_list[i], "converter") and \ if spec_series_list[i]["type"] == "dicomseries" and \ has_specval(spec_series_list[i], "bids-run") and \ get_specval(spec_series_list[i], "bids-run") in \ [get_specval(s, "bids-run") for s in spec_series_list[i + 1:] if get_specval( s, "description") == get_specval( spec_series_list[i], "description") and \ get_specval(s, "id") > get_specval( spec_series_list[i], "id") ]: lgr.debug("Ignore SeriesNumber %s for conversion" % i) spec_series_list[i]["tags"].append( 'hirni-dicom-converter-ignore') lgr.debug("Storing specification (%s)", spec) # store as a stream (one record per file) to be able to # easily concat files without having to parse them, or # process them line by line without having to fully parse them from datalad_hirni.support.spec_helpers import sort_spec # Note: Sorting paradigm needs to change. See above. # spec_series_list = sorted(spec_series_list, key=lambda x: sort_spec(x)) json_py.dump2stream(spec_series_list, spec) # make sure spec is in git: dataset.repo.set_gitattributes([(spec, { 'annex.largefiles': 'nothing' })], '.gitattributes') for r in Save.__call__(dataset=dataset, path=[spec, '.gitattributes'], to_git=True, message="[HIRNI] Added study specification " "snippet for %s" % op.relpath(path[0], dataset.path), return_type='generator', result_renderer='disabled'): if r.get('status', None) not in ['ok', 'notneeded']: yield r elif r['path'] in [spec, op.join(dataset.path, '.gitattributes')] \ and r['type'] == 'file': r['action'] = 'dicom2spec' r['logger'] = lgr yield r elif r['type'] == 'dataset': # 'ok' or 'notneeded' for a dataset is okay, since we commit # the spec. But it's not a result to yield continue else: # anything else shouldn't happen yield dict( status='error', message=("unexpected result from save: %s", r), path= spec, # TODO: This actually isn't clear - get it from `r` type='file', action='dicom2spec', logger=lgr)
def __call__(path, dataset=None, spec_file=None, properties=None, replace=False): # TODO: message dataset = require_dataset(dataset, check_installed=True, purpose="hirni spec4anything") path = assure_list(path) path = [resolve_path(p, dataset) for p in path] res_kwargs = dict(action='hirni spec4anything', logger=lgr) res_kwargs['refds'] = Interface.get_refds_path(dataset) # ### This might become superfluous. See datalad-gh-2653 ds_path = PathRI(dataset.path) # ### updated_files = [] paths = [] for ap in AnnotatePaths.__call__( dataset=dataset, path=path, action='hirni spec4anything', unavailable_path_status='impossible', nondataset_path_status='error', return_type='generator', # TODO: Check this one out: on_failure='ignore', # Note/TODO: Not sure yet whether and when we need those. # Generally we want to be able to create a spec for subdatasets, # too: # recursive=recursive, # recursion_limit=recursion_limit, # force_subds_discovery=True, # force_parentds_discovery=True, ): if ap.get('status', None) in ['error', 'impossible']: yield ap continue # ### This might become superfluous. See datalad-gh-2653 ap_path = PathRI(ap['path']) # ### # find acquisition and respective specification file: rel_path = posixpath.relpath(ap_path.posixpath, ds_path.posixpath) path_parts = rel_path.split('/') # TODO: Note: Outcommented this warning for now. We used to not have # a spec file at the toplevel of the study dataset, but now we do. # The logic afterwards works, but should be revisited. At least, # `acq` should be called differently now. # if len(path_parts) < 2: # lgr.warning("Not within an acquisition") acq = path_parts[0] # TODO: spec file specifiable or fixed path? # if we want the former, what we actually need is an # association of acquisition and its spec path # => prob. not an option but a config spec_path = spec_file if spec_file \ else posixpath.join(ds_path.posixpath, acq, dataset.config.get("datalad.hirni.studyspec.filename", "studyspec.json")) spec = [r for r in json_py.load_stream(spec_path)] \ if posixpath.exists(spec_path) else list() lgr.debug("Add specification snippet for %s", ap['path']) # XXX 'add' does not seem to be the thing we want to do # rather 'set', so we have to check whether a spec for a location # is already known and fail or replace it (maybe with --force) # go through all existing specs and extract unique value # and also assign them to the new record (subjects, ...), but only # editable fields!! uniques = dict() for s in spec: for k in s: if isinstance(s[k], dict) and 'value' in s[k]: if k not in uniques: uniques[k] = set() uniques[k].add(s[k]['value']) overrides = dict() for k in uniques: if len(uniques[k]) == 1: overrides[k] = _get_edit_dict(value=uniques[k].pop(), approved=False) if properties: # TODO: This entire reading of properties needs to be RF'd # into proper generalized functions. # spec got more complex. update() prob. can't simply override # (think: 'procedures' and 'tags' prob. need to be appended # instead) # load from file or json string if isinstance(properties, dict): props = properties elif op.exists(properties): props = json_py.load(properties) else: props = json_py.loads(properties) # turn into editable, pre-approved records spec_props = { k: dict(value=v, approved=True) for k, v in props.items() if k not in non_editables + ['tags', 'procedures'] } spec_props.update({ k: v for k, v in props.items() if k in non_editables + ['tags'] }) # TODO: still wrong. It's a list. Append or override? How to decide? spec_props.update({ o_k: [{ i_k: dict(value=i_v, approved=True) for i_k, i_v in o_v.items() }] for o_k, o_v in props.items() if o_k in ['procedures'] }) overrides.update(spec_props) # TODO: It's probably wrong to use uniques for overwriting! At least # they cannot be used to overwrite values explicitly set in # _add_to_spec like "location", "type", etc. # # But then: This should concern non-editable fields only, right? spec = _add_to_spec(spec, posixpath.split(spec_path)[0], ap, dataset, overrides=overrides, replace=replace) # Note: Not sure whether we really want one commit per snippet. # If not - consider: # - What if we fail amidst? => Don't write to file yet. # - What about input paths from different acquisitions? # => store specs per acquisition in memory # MIH: One commit per line seems silly. why not update all files # collect paths of updated files, and give them to a single `add` # at the very end? # MIH: if we fail, we fail and nothing is committed from datalad_hirni.support.spec_helpers import sort_spec json_py.dump2stream(sorted(spec, key=lambda x: sort_spec(x)), spec_path) updated_files.append(spec_path) yield get_status_dict(status='ok', type=ap['type'], path=ap['path'], **res_kwargs) paths.append(ap) from datalad.dochelpers import single_or_plural from os import linesep message = "[HIRNI] Add specification {n_snippets} for: {paths}".format( n_snippets=single_or_plural("snippet", "snippets", len(paths)), paths=linesep.join(" - " + op.relpath(p['path'], dataset.path) for p in paths) if len(paths) > 1 else op.relpath(paths[0]['path'], dataset.path)) for r in dataset.save(updated_files, to_git=True, message=message, return_type='generator', result_renderer='disabled'): yield r
def test_load_unicode_line_separator(fname): # See gh-3523. result = list(load_stream(fname)) eq_(len(result), 2) eq_(result[0]["key0"], u"a b") eq_(result[1]["key1"], u"plain")
def __call__(specfile, dataset=None, anonymize=False, only_type=None): dataset = require_dataset(dataset, check_installed=True, purpose="spec2bids") specfile = assure_list(specfile) specfile = [resolve_path(p, dataset) for p in specfile] specfile = [str(p) for p in specfile] for spec_path in specfile: # Note/TODO: ran_procedure per spec file still isn't ideal. Could # be different spec files for same acquisition. It's actually about # the exact same call. How to best get around substitutions? # Also: per snippet isn't correct either. # substitutions is real issue. Example "copy {location} ." # # => datalad.interface.run.format_command / normalize_command ? # TODO: Also can we skip prepare_inputs within run? At least specify # more specifically. Note: Can be globbed! ran_procedure = dict() if not lexists(spec_path): yield get_status_dict( action='spec2bids', path=spec_path, status='impossible', message="{} not found".format(spec_path) ) if op.isdir(spec_path): if op.realpath(op.join(spec_path, op.pardir)) == \ op.realpath(dataset.path): spec_path = op.join( spec_path, dataset.config.get( "datalad.hirni.studyspec.filename", "studyspec.json") ) # TODO: check existence of that file! else: yield get_status_dict( action='spec2bids', path=spec_path, status='impossible', message="{} is neither a specification file nor an " "acquisition directory".format(spec_path) ) # relative path to spec to be recorded: rel_spec_path = relpath(spec_path, dataset.path) \ if isabs(spec_path) else spec_path # check each dict (snippet) in the specification for what to do # wrt conversion: for spec_snippet in load_stream(spec_path): if only_type and not spec_snippet['type'].startswith(only_type): # ignore snippets not matching `only_type` # Note/TODO: the .startswith part is meant for # matching "dicomseries:all" to given "dicomseries" but not # vice versa. This prob. needs refinement (and doc) continue if 'procedures' not in spec_snippet: # no conversion procedures defined at all: yield get_status_dict( action='spec2bids', path=spec_path, snippet=spec_snippet, status='notneeded', ) continue procedure_list = spec_snippet['procedures'] if not procedure_list: # no conversion procedures defined at all: yield get_status_dict( action='spec2bids', path=spec_path, snippet=spec_snippet, status='notneeded', ) continue # accept a single dict as a one item list: if isinstance(procedure_list, dict): procedure_list = [procedure_list] # build a dict available for placeholders in format strings: # Note: This is flattening the structure since we don't need # value/approved for the substitutions. In addition 'subject' # and 'anon_subject' are not passed on, but a new key # 'bids_subject' instead the value of which depends on the # --anonymize switch. # Additionally 'location' is recomputed to be relative to # dataset.path, since this is where the procedures are running # from within. replacements = dict() for k, v in spec_snippet.items(): if k == 'subject': if not anonymize: replacements['bids-subject'] = v['value'] elif k == 'anon-subject': if anonymize: replacements['bids-subject'] = v['value'] elif k == 'location': replacements[k] = op.join(op.dirname(rel_spec_path), v) elif k == 'procedures': # 'procedures' is a list of dicts (not suitable for # substitutions) and it makes little sense to be # referenced by converter format strings anyway: continue else: replacements[k] = v['value'] if isinstance(v, dict) else v # build dict to patch os.environ with for passing # replacements on to procedures: env_subs = dict() for k, v in replacements.items(): env_subs['DATALAD_RUN_SUBSTITUTIONS_{}' ''.format(k.upper().replace('-', '__'))] = str(v) env_subs['DATALAD_RUN_SUBSTITUTIONS_SPECPATH'] = rel_spec_path env_subs['DATALAD_RUN_SUBSTITUTIONS_ANONYMIZE'] = str(anonymize) # TODO: The above two blocks to build replacements dict and # env_subs should be joined eventually. for proc in procedure_list: if has_specval(proc, 'procedure-name'): proc_name = get_specval(proc, 'procedure-name') else: # invalid procedure spec lgr.warning("conversion procedure missing key " "'procedure-name' in %s: %s", spec_path, proc) # TODO: continue or yield impossible/error so it can be # dealt with via on_failure? continue if has_specval(proc, 'on-anonymize') \ and anything2bool( get_specval(proc, 'on-anonymize') ) and not anonymize: # don't run that procedure, if we weren't called with # --anonymize while procedure is specified to be run on # that switch only continue proc_call = get_specval(proc, 'procedure-call') \ if has_specval(proc, 'procedure-call') \ else None if ran_procedure.get(hash((proc_name, proc_call)), None): # if we ran the exact same call already, # don't call it again # TODO: notneeded? continue # if spec comes with call format string, it takes precedence # over what is generally configured for the procedure # TODO: Not sure yet whether this is how we should deal with it if proc_call: env_subs['DATALAD_PROCEDURES_{}_CALL__FORMAT' ''.format(proc_name.upper().replace('-', '__')) ] = proc_call run_results = list() # Note, that we can't use dataset.config.overrides to # pass run-substitution config to procedures, since we # leave python context and thereby loose the dataset # instance. Use patched os.environ instead. Note also, # that this requires names of substitutions to not # contain underscores, since they would be translated to # '.' by ConfigManager when reading them from within the # procedure's datalad-run calls. from mock import patch # TODO: Reconsider that patching. Shouldn't it be an update? with patch.dict('os.environ', env_subs): # apparently reload is necessary to consider config # overrides via env: dataset.config.reload() for r in dataset.run_procedure( spec=proc_name, return_type='generator' ): # # if there was an issue yield original result, # # otherwise swallow: # if r['status'] not in ['ok', 'notneeded']: yield r run_results.append(r) if not all(r['status'] in ['ok', 'notneeded'] for r in run_results): yield {'action': proc_name, 'path': spec_path, 'snippet': spec_snippet, 'status': 'error', 'message': "acquisition conversion failed. " "See previous message(s)."} else: yield {'action': proc_name, 'path': spec_path, 'snippet': spec_snippet, 'status': 'ok', 'message': "acquisition converted."} # mark as a procedure we ran on this acquisition: # TODO: rethink. Doesn't work that way. Disabled for now # ran_procedure[hash((proc_name, proc_call))] = True # elif proc_name != 'hirni-dicom-converter': # # specific converter procedure call # # from mock import patch # with patch.dict('os.environ', env_subs): # # apparently reload is necessary to consider config # # overrides via env: # dataset.config.reload() # # for r in dataset.run_procedure( # spec=[proc_name, rel_spec_path, anonymize], # return_type='generator' # ): # # # if there was an issue with containers-run, # # yield original result, otherwise swallow: # if r['status'] not in ['ok', 'notneeded']: # yield r # # run_results.append(r) # # if not all(r['status'] in ['ok', 'notneeded'] # for r in run_results): # yield {'action': proc_name, # 'path': spec_path, # 'snippet': spec_snippet, # 'status': 'error', # 'message': "Conversion failed. " # "See previous message(s)."} # # else: # yield {'action': proc_name, # 'path': spec_path, # 'snippet': spec_snippet, # 'status': 'ok', # 'message': "specification converted."} # elif ran_heudiconv and proc_name == 'hirni-dicom-converter': # # in this case we acted upon this snippet already and # # do not have to produce a result # pass # # else: # # this shouldn't happen! # raise RuntimeError yield {'action': 'spec2bids', 'path': spec_path, 'status': 'ok'}
def test_custom_rules(path, toolbox_url): # ## SETUP a raw ds with patch.dict('os.environ', {'DATALAD_HIRNI_TOOLBOX_URL': toolbox_url}): ds = install(source=test_raw_ds.get_raw_dataset(), path=path) # ## END SETUP # 1. simply default rules ds.hirni_dicom2spec(path=op.join("struct_acq", "dicoms"), spec=op.join("struct_acq", "studyspec.json")) struct_spec = [ s for s in load_stream(op.join(path, "struct_acq", "studyspec.json")) ] for spec_snippet in struct_spec: # no comment in default spec assert not has_specval(spec_snippet, 'comment') or not get_specval( spec_snippet, 'comment') # subject assert has_specval(spec_snippet, 'subject') assert_equal(get_specval(spec_snippet, 'subject'), '02') # modality assert has_specval(spec_snippet, 'bids-modality') assert_equal(get_specval(spec_snippet, 'bids-modality'), 't1w') # should have 1 snippet of type dicomseries + 1 of type dicomseries:all assert_equal(len(struct_spec), 2) assert_in("dicomseries", [s['type'] for s in struct_spec]) assert_in("dicomseries:all", [s['type'] for s in struct_spec]) # set config to use custom rules import datalad_hirni ds.config.add( "datalad.hirni.dicom2spec.rules", op.join(op.dirname(datalad_hirni.__file__), 'resources', 'rules', 'test_rules.py'), ) # 2. do again with configured rules (rules 1) import os os.unlink(op.join(path, 'struct_acq', 'studyspec.json')) ds.hirni_dicom2spec(path=op.join("struct_acq", "dicoms"), spec=op.join("struct_acq", "studyspec.json")) struct_spec = [ s for s in load_stream(op.join(path, "struct_acq", "studyspec.json")) ] # assertions wrt spec for spec_snippet in struct_spec: # now there's a comment in spec assert has_specval(spec_snippet, 'comment') assert_equal(get_specval(spec_snippet, 'comment'), "Rules1: These rules are for unit testing only") # should have 1 snippet of type dicomseries + 1 of type dicomseries:all assert_equal(len(struct_spec), 2) assert_in("dicomseries", [s['type'] for s in struct_spec]) assert_in("dicomseries:all", [s['type'] for s in struct_spec]) # 3. once again with two configured rule sets (rules 1 and 2) ds.config.add( "datalad.hirni.dicom2spec.rules", op.join(op.dirname(datalad_hirni.__file__), 'resources', 'rules', 'test_rules2.py'), ) try: # Protect against older datalad version. # ATM this can't be done by checking version number, since this change # currently is in datalad's master branch but not in maint. maint, # however, has the same __version__ as master rule_files = ds.config.get("datalad.hirni.dicom2spec.rules", get_all=True) except TypeError as e: if "unexpected keyword argument 'get_all'" in str(e): # older datalad version should return multiple values out of the box rule_files = ds.config.get("datalad.hirni.dicom2spec.rules") else: raise # ensure assumption about order (dicom2spec relies on it): assert_equal( rule_files[0], op.join(op.dirname(datalad_hirni.__file__), 'resources', 'rules', 'test_rules.py')) assert_equal( rule_files[1], op.join(op.dirname(datalad_hirni.__file__), 'resources', 'rules', 'test_rules2.py')) os.unlink(op.join(path, 'struct_acq', 'studyspec.json')) ds.hirni_dicom2spec(path=op.join("struct_acq", "dicoms"), spec=op.join("struct_acq", "studyspec.json")) struct_spec = [ s for s in load_stream(op.join(path, "struct_acq", "studyspec.json")) ] # assertions wrt spec for spec_snippet in struct_spec: # Rule2 should have overwritten Rule1's comment: assert has_specval(spec_snippet, 'comment') assert_equal(get_specval(spec_snippet, 'comment'), "Rules2: These rules are for unit testing only") # should have 1 snippet of type dicomseries + 1 of type dicomseries:all assert_equal(len(struct_spec), 2) assert_in("dicomseries", [s['type'] for s in struct_spec]) assert_in("dicomseries:all", [s['type'] for s in struct_spec])