def test_default_rules(path): # ## SETUP a raw ds ds = install(source=test_raw_ds.get_raw_dataset(), path=path) # ## END SETUP # create specs for dicomseries w/ default rules: # TODO: spec path should prob. relate to `path` via (derived) acquisition! ds.hirni_dicom2spec(path=op.join("func_acq", "dicoms"), spec=op.join("func_acq", "studyspec.json")) ds.hirni_dicom2spec(path=op.join("struct_acq", "dicoms"), spec=op.join("struct_acq", "studyspec.json")) func_spec = [ s for s in load_stream(op.join(path, "func_acq", "studyspec.json")) ] for snippet in func_spec: # type assert_in("type", snippet.keys()) assert_in(snippet["type"], ["dicomseries", "dicomseries:all"]) # no comment in default spec assert not has_specval(snippet, 'comment') or not get_specval( snippet, 'comment') # description assert has_specval(snippet, 'description') assert_equal(get_specval(snippet, 'description'), "func_task-oneback_run-1") # subject assert has_specval(snippet, 'subject') assert_equal(get_specval(snippet, 'subject'), '02') # modality assert has_specval(snippet, 'bids-modality') assert_equal(get_specval(snippet, 'bids-modality'), 'bold') # task assert has_specval(snippet, "bids-task") assert_equal(get_specval(snippet, "bids-task"), "oneback") # run assert has_specval(snippet, "bids-run") assert_equal(get_specval(snippet, "bids-run"), "01") # id assert has_specval(snippet, "id") assert_equal(get_specval(snippet, "id"), 401) # should have 1 snippet of type dicomseries + 1 of type dicomseries:all assert_equal(len(func_spec), 2) assert_in("dicomseries", [s['type'] for s in func_spec]) assert_in("dicomseries:all", [s['type'] for s in func_spec]) struct_spec = [ s for s in load_stream(op.join(path, "struct_acq", "studyspec.json")) ] for snippet in struct_spec: # type assert "type" in snippet.keys() assert snippet["type"] in ["dicomseries", "dicomseries:all"] # no comment in default spec assert not has_specval(snippet, 'comment') or not get_specval( snippet, 'comment') # description assert has_specval(snippet, 'description') assert_equal(get_specval(snippet, 'description'), "anat-T1w") # subject assert has_specval(snippet, 'subject') assert_equal(get_specval(snippet, 'subject'), '02') # modality assert has_specval(snippet, 'bids-modality') assert_equal(get_specval(snippet, 'bids-modality'), 't1w') # run assert has_specval(snippet, "bids-run") assert_equal(get_specval(snippet, "bids-run"), "1") # should have 1 snippet of type dicomseries + 1 of type dicomseries:all assert_equal(len(struct_spec), 2) assert_in("dicomseries", [s['type'] for s in struct_spec]) assert_in("dicomseries:all", [s['type'] for s in struct_spec])
def test_custom_rules(path): # ## SETUP a raw ds ds = install(source=test_raw_ds.get_raw_dataset(), path=path) # ## END SETUP # 1. simply default rules ds.hirni_dicom2spec(path=op.join("struct_acq", "dicoms"), spec=op.join("struct_acq", "studyspec.json")) struct_spec = [ s for s in load_stream(op.join(path, "struct_acq", "studyspec.json")) ] for spec_snippet in struct_spec: # no comment in default spec assert not has_specval(spec_snippet, 'comment') or not get_specval( spec_snippet, 'comment') # subject assert has_specval(spec_snippet, 'subject') assert_equal(get_specval(spec_snippet, 'subject'), '02') # modality assert has_specval(spec_snippet, 'bids-modality') assert_equal(get_specval(spec_snippet, 'bids-modality'), 't1w') # should have 1 snippet of type dicomseries + 1 of type dicomseries:all assert_equal(len(struct_spec), 2) assert_in("dicomseries", [s['type'] for s in struct_spec]) assert_in("dicomseries:all", [s['type'] for s in struct_spec]) # set config to use custom rules import datalad_hirni ds.config.add( "datalad.hirni.dicom2spec.rules", op.join(op.dirname(datalad_hirni.__file__), 'resources', 'rules', 'test_rules.py'), ) # 2. do again with configured rules (rules 1) import os os.unlink(op.join(path, 'struct_acq', 'studyspec.json')) ds.hirni_dicom2spec(path=op.join("struct_acq", "dicoms"), spec=op.join("struct_acq", "studyspec.json")) struct_spec = [ s for s in load_stream(op.join(path, "struct_acq", "studyspec.json")) ] # assertions wrt spec for spec_snippet in struct_spec: # now there's a comment in spec assert has_specval(spec_snippet, 'comment') assert_equal(get_specval(spec_snippet, 'comment'), "Rules1: These rules are for unit testing only") # should have 1 snippet of type dicomseries + 1 of type dicomseries:all assert_equal(len(struct_spec), 2) assert_in("dicomseries", [s['type'] for s in struct_spec]) assert_in("dicomseries:all", [s['type'] for s in struct_spec]) # 3. once again with two configured rule sets (rules 1 and 2) ds.config.add( "datalad.hirni.dicom2spec.rules", op.join(op.dirname(datalad_hirni.__file__), 'resources', 'rules', 'test_rules2.py'), ) rule_files = ds.config.get("datalad.hirni.dicom2spec.rules") # ensure assumption about order (dicom2spec relies on it): assert_equal(rule_files, (op.join(op.dirname(datalad_hirni.__file__), 'resources', 'rules', 'test_rules.py'), op.join(op.dirname(datalad_hirni.__file__), 'resources', 'rules', 'test_rules2.py'))) os.unlink(op.join(path, 'struct_acq', 'studyspec.json')) ds.hirni_dicom2spec(path=op.join("struct_acq", "dicoms"), spec=op.join("struct_acq", "studyspec.json")) struct_spec = [ s for s in load_stream(op.join(path, "struct_acq", "studyspec.json")) ] # assertions wrt spec for spec_snippet in struct_spec: # Rule2 should have overwritten Rule1's comment: assert has_specval(spec_snippet, 'comment') assert_equal(get_specval(spec_snippet, 'comment'), "Rules2: These rules are for unit testing only") # should have 1 snippet of type dicomseries + 1 of type dicomseries:all assert_equal(len(struct_spec), 2) assert_in("dicomseries", [s['type'] for s in struct_spec]) assert_in("dicomseries:all", [s['type'] for s in struct_spec])
def __call__(specfile, dataset=None, anonymize=False, only_type=None): dataset = require_dataset(dataset, check_installed=True, purpose="spec2bids") specfile = assure_list(specfile) specfile = [resolve_path(p, dataset) for p in specfile] specfile = [str(p) for p in specfile] for spec_path in specfile: # Note/TODO: ran_procedure per spec file still isn't ideal. Could # be different spec files for same acquisition. It's actually about # the exact same call. How to best get around substitutions? # Also: per snippet isn't correct either. # substitutions is real issue. Example "copy {location} ." # # => datalad.interface.run.format_command / normalize_command ? # TODO: Also can we skip prepare_inputs within run? At least specify # more specifically. Note: Can be globbed! ran_procedure = dict() if not lexists(spec_path): yield get_status_dict( action='spec2bids', path=spec_path, status='impossible', message="{} not found".format(spec_path) ) if op.isdir(spec_path): if op.realpath(op.join(spec_path, op.pardir)) == \ op.realpath(dataset.path): spec_path = op.join( spec_path, dataset.config.get( "datalad.hirni.studyspec.filename", "studyspec.json") ) # TODO: check existence of that file! else: yield get_status_dict( action='spec2bids', path=spec_path, status='impossible', message="{} is neither a specification file nor an " "acquisition directory".format(spec_path) ) # relative path to spec to be recorded: rel_spec_path = relpath(spec_path, dataset.path) \ if isabs(spec_path) else spec_path # check each dict (snippet) in the specification for what to do # wrt conversion: for spec_snippet in load_stream(spec_path): if only_type and not spec_snippet['type'].startswith(only_type): # ignore snippets not matching `only_type` # Note/TODO: the .startswith part is meant for # matching "dicomseries:all" to given "dicomseries" but not # vice versa. This prob. needs refinement (and doc) continue if 'procedures' not in spec_snippet: # no conversion procedures defined at all: yield get_status_dict( action='spec2bids', path=spec_path, snippet=spec_snippet, status='notneeded', ) continue procedure_list = spec_snippet['procedures'] if not procedure_list: # no conversion procedures defined at all: yield get_status_dict( action='spec2bids', path=spec_path, snippet=spec_snippet, status='notneeded', ) continue # accept a single dict as a one item list: if isinstance(procedure_list, dict): procedure_list = [procedure_list] # build a dict available for placeholders in format strings: # Note: This is flattening the structure since we don't need # value/approved for the substitutions. In addition 'subject' # and 'anon_subject' are not passed on, but a new key # 'bids_subject' instead the value of which depends on the # --anonymize switch. # Additionally 'location' is recomputed to be relative to # dataset.path, since this is where the procedures are running # from within. replacements = dict() for k, v in spec_snippet.items(): if k == 'subject': if not anonymize: replacements['bids-subject'] = v['value'] elif k == 'anon-subject': if anonymize: replacements['bids-subject'] = v['value'] elif k == 'location': replacements[k] = op.join(op.dirname(rel_spec_path), v) elif k == 'procedures': # 'procedures' is a list of dicts (not suitable for # substitutions) and it makes little sense to be # referenced by converter format strings anyway: continue else: replacements[k] = v['value'] if isinstance(v, dict) else v # build dict to patch os.environ with for passing # replacements on to procedures: env_subs = dict() for k, v in replacements.items(): env_subs['DATALAD_RUN_SUBSTITUTIONS_{}' ''.format(k.upper().replace('-', '__'))] = str(v) env_subs['DATALAD_RUN_SUBSTITUTIONS_SPECPATH'] = rel_spec_path env_subs['DATALAD_RUN_SUBSTITUTIONS_ANONYMIZE'] = str(anonymize) # TODO: The above two blocks to build replacements dict and # env_subs should be joined eventually. for proc in procedure_list: if has_specval(proc, 'procedure-name'): proc_name = get_specval(proc, 'procedure-name') else: # invalid procedure spec lgr.warning("conversion procedure missing key " "'procedure-name' in %s: %s", spec_path, proc) # TODO: continue or yield impossible/error so it can be # dealt with via on_failure? continue if has_specval(proc, 'on-anonymize') \ and anything2bool( get_specval(proc, 'on-anonymize') ) and not anonymize: # don't run that procedure, if we weren't called with # --anonymize while procedure is specified to be run on # that switch only continue proc_call = get_specval(proc, 'procedure-call') \ if has_specval(proc, 'procedure-call') \ else None if ran_procedure.get(hash((proc_name, proc_call)), None): # if we ran the exact same call already, # don't call it again # TODO: notneeded? continue # if spec comes with call format string, it takes precedence # over what is generally configured for the procedure # TODO: Not sure yet whether this is how we should deal with it if proc_call: env_subs['DATALAD_PROCEDURES_{}_CALL__FORMAT' ''.format(proc_name.upper().replace('-', '__')) ] = proc_call run_results = list() # Note, that we can't use dataset.config.overrides to # pass run-substitution config to procedures, since we # leave python context and thereby loose the dataset # instance. Use patched os.environ instead. Note also, # that this requires names of substitutions to not # contain underscores, since they would be translated to # '.' by ConfigManager when reading them from within the # procedure's datalad-run calls. from mock import patch # TODO: Reconsider that patching. Shouldn't it be an update? with patch.dict('os.environ', env_subs): # apparently reload is necessary to consider config # overrides via env: dataset.config.reload() for r in dataset.run_procedure( spec=proc_name, return_type='generator' ): # # if there was an issue yield original result, # # otherwise swallow: # if r['status'] not in ['ok', 'notneeded']: yield r run_results.append(r) if not all(r['status'] in ['ok', 'notneeded'] for r in run_results): yield {'action': proc_name, 'path': spec_path, 'snippet': spec_snippet, 'status': 'error', 'message': "acquisition conversion failed. " "See previous message(s)."} else: yield {'action': proc_name, 'path': spec_path, 'snippet': spec_snippet, 'status': 'ok', 'message': "acquisition converted."} # mark as a procedure we ran on this acquisition: # TODO: rethink. Doesn't work that way. Disabled for now # ran_procedure[hash((proc_name, proc_call))] = True # elif proc_name != 'hirni-dicom-converter': # # specific converter procedure call # # from mock import patch # with patch.dict('os.environ', env_subs): # # apparently reload is necessary to consider config # # overrides via env: # dataset.config.reload() # # for r in dataset.run_procedure( # spec=[proc_name, rel_spec_path, anonymize], # return_type='generator' # ): # # # if there was an issue with containers-run, # # yield original result, otherwise swallow: # if r['status'] not in ['ok', 'notneeded']: # yield r # # run_results.append(r) # # if not all(r['status'] in ['ok', 'notneeded'] # for r in run_results): # yield {'action': proc_name, # 'path': spec_path, # 'snippet': spec_snippet, # 'status': 'error', # 'message': "Conversion failed. " # "See previous message(s)."} # # else: # yield {'action': proc_name, # 'path': spec_path, # 'snippet': spec_snippet, # 'status': 'ok', # 'message': "specification converted."} # elif ran_heudiconv and proc_name == 'hirni-dicom-converter': # # in this case we acted upon this snippet already and # # do not have to produce a result # pass # # else: # # this shouldn't happen! # raise RuntimeError yield {'action': 'spec2bids', 'path': spec_path, 'status': 'ok'}
def __call__(path=None, spec=None, dataset=None, subject=None, anon_subject=None, acquisition=None, properties=None): # TODO: acquisition can probably be removed (or made an alternative to # derive spec and/or dicom location from) # Change, so path needs to point directly to dicom ds? # Or just use acq and remove path? dataset = require_dataset(dataset, check_installed=True, purpose="spec from dicoms") from datalad.utils import assure_list if path is not None: path = assure_list(path) path = [resolve_path(p, dataset) for p in path] else: raise InsufficientArgumentsError( "insufficient arguments for dicom2spec: a path is required") # TODO: We should be able to deal with several paths at once # ATM we aren't (see also commit + message of actual spec) assert len(path) == 1 if not spec: raise InsufficientArgumentsError( "insufficient arguments for dicom2spec: a spec file is required" ) # TODO: That's prob. wrong. We can derive default spec from acquisition else: spec = resolve_path(spec, dataset) spec_series_list = \ [r for r in json_py.load_stream(spec)] if op.exists(spec) else list() # get dataset level metadata: found_some = False for meta in dataset.meta_dump( path, recursive=False, # always False? reporton='datasets', return_type='generator', result_renderer='disabled'): if meta.get('status', None) not in ['ok', 'notneeded']: yield meta continue if 'dicom' not in meta['metadata']: # TODO: Really "notneeded" or simply not a result at all? yield dict(status='notneeded', message=("found no DICOM metadata for %s", meta['path']), path=meta['path'], type='dataset', action='dicom2spec', logger=lgr) continue if 'Series' not in meta['metadata']['dicom'] or \ not meta['metadata']['dicom']['Series']: yield dict( status='impossible', message=("no image series detected in DICOM metadata of" " %s", meta['path']), path=meta['path'], type='dataset', action='dicom2spec', logger=lgr) continue found_some = True overrides = dict() if properties: # load from file or json string props = json_py.load(properties) \ if op.exists(properties) else json_py.loads(properties) # turn into editable, pre-approved records props = { k: dict(value=v, approved=True) for k, v in props.items() } overrides.update(props) spec_series_list = add_to_spec( meta, spec_series_list, op.dirname(spec), subject=subject, anon_subject=anon_subject, # session=session, # TODO: parameter "session" was what # we now call acquisition. This is # NOT a good default for bids_session! # Particularly wrt to anonymization overrides=overrides, dataset=dataset) if not found_some: yield dict( status='impossible', message="found no DICOM metadata", path=path, type= 'file', # TODO: arguable should be 'file' or 'dataset', depending on path action='dicom2spec', logger=lgr) return # TODO: RF needed. This rule should go elsewhere: # ignore duplicates (prob. reruns of aborted runs) # -> convert highest id only # Note: This sorting is a q&d hack! # TODO: Sorting needs to become more sophisticated + include notion of :all spec_series_list = sorted(spec_series_list, key=lambda x: get_specval(x, 'id') if 'id' in x.keys() else 0) for i in range(len(spec_series_list)): # Note: Removed the following line from condition below, # since it appears to be pointless. Value for 'converter' # used to be 'heudiconv' or 'ignore' for a 'dicomseries', so # it's not clear ATM what case this could possibly have catched: # heuristic.has_specval(spec_series_list[i], "converter") and \ if spec_series_list[i]["type"] == "dicomseries" and \ has_specval(spec_series_list[i], "bids-run") and \ get_specval(spec_series_list[i], "bids-run") in \ [get_specval(s, "bids-run") for s in spec_series_list[i + 1:] if get_specval( s, "description") == get_specval( spec_series_list[i], "description") and \ get_specval(s, "id") > get_specval( spec_series_list[i], "id") ]: lgr.debug("Ignore SeriesNumber %s for conversion" % i) spec_series_list[i]["tags"].append( 'hirni-dicom-converter-ignore') lgr.debug("Storing specification (%s)", spec) # store as a stream (one record per file) to be able to # easily concat files without having to parse them, or # process them line by line without having to fully parse them from datalad_hirni.support.spec_helpers import sort_spec # Note: Sorting paradigm needs to change. See above. # spec_series_list = sorted(spec_series_list, key=lambda x: sort_spec(x)) json_py.dump2stream(spec_series_list, spec) # make sure spec is in git: dataset.repo.set_gitattributes([(spec, { 'annex.largefiles': 'nothing' })], '.gitattributes') for r in Save.__call__(dataset=dataset, path=[spec, '.gitattributes'], to_git=True, message="[HIRNI] Added study specification " "snippet for %s" % op.relpath(path[0], dataset.path), return_type='generator', result_renderer='disabled'): if r.get('status', None) not in ['ok', 'notneeded']: yield r elif r['path'] in [spec, op.join(dataset.path, '.gitattributes')] \ and r['type'] == 'file': r['action'] = 'dicom2spec' r['logger'] = lgr yield r elif r['type'] == 'dataset': # 'ok' or 'notneeded' for a dataset is okay, since we commit # the spec. But it's not a result to yield continue else: # anything else shouldn't happen yield dict( status='error', message=("unexpected result from save: %s", r), path= spec, # TODO: This actually isn't clear - get it from `r` type='file', action='dicom2spec', logger=lgr)
def test_custom_rules(path, toolbox_url): # ## SETUP a raw ds with patch.dict('os.environ', {'DATALAD_HIRNI_TOOLBOX_URL': toolbox_url}): ds = install(source=test_raw_ds.get_raw_dataset(), path=path) # ## END SETUP # 1. simply default rules ds.hirni_dicom2spec(path=op.join("struct_acq", "dicoms"), spec=op.join("struct_acq", "studyspec.json")) struct_spec = [ s for s in load_stream(op.join(path, "struct_acq", "studyspec.json")) ] for spec_snippet in struct_spec: # no comment in default spec assert not has_specval(spec_snippet, 'comment') or not get_specval( spec_snippet, 'comment') # subject assert has_specval(spec_snippet, 'subject') assert_equal(get_specval(spec_snippet, 'subject'), '02') # modality assert has_specval(spec_snippet, 'bids-modality') assert_equal(get_specval(spec_snippet, 'bids-modality'), 't1w') # should have 1 snippet of type dicomseries + 1 of type dicomseries:all assert_equal(len(struct_spec), 2) assert_in("dicomseries", [s['type'] for s in struct_spec]) assert_in("dicomseries:all", [s['type'] for s in struct_spec]) # set config to use custom rules import datalad_hirni ds.config.add( "datalad.hirni.dicom2spec.rules", op.join(op.dirname(datalad_hirni.__file__), 'resources', 'rules', 'test_rules.py'), ) # 2. do again with configured rules (rules 1) import os os.unlink(op.join(path, 'struct_acq', 'studyspec.json')) ds.hirni_dicom2spec(path=op.join("struct_acq", "dicoms"), spec=op.join("struct_acq", "studyspec.json")) struct_spec = [ s for s in load_stream(op.join(path, "struct_acq", "studyspec.json")) ] # assertions wrt spec for spec_snippet in struct_spec: # now there's a comment in spec assert has_specval(spec_snippet, 'comment') assert_equal(get_specval(spec_snippet, 'comment'), "Rules1: These rules are for unit testing only") # should have 1 snippet of type dicomseries + 1 of type dicomseries:all assert_equal(len(struct_spec), 2) assert_in("dicomseries", [s['type'] for s in struct_spec]) assert_in("dicomseries:all", [s['type'] for s in struct_spec]) # 3. once again with two configured rule sets (rules 1 and 2) ds.config.add( "datalad.hirni.dicom2spec.rules", op.join(op.dirname(datalad_hirni.__file__), 'resources', 'rules', 'test_rules2.py'), ) try: # Protect against older datalad version. # ATM this can't be done by checking version number, since this change # currently is in datalad's master branch but not in maint. maint, # however, has the same __version__ as master rule_files = ds.config.get("datalad.hirni.dicom2spec.rules", get_all=True) except TypeError as e: if "unexpected keyword argument 'get_all'" in str(e): # older datalad version should return multiple values out of the box rule_files = ds.config.get("datalad.hirni.dicom2spec.rules") else: raise # ensure assumption about order (dicom2spec relies on it): assert_equal( rule_files[0], op.join(op.dirname(datalad_hirni.__file__), 'resources', 'rules', 'test_rules.py')) assert_equal( rule_files[1], op.join(op.dirname(datalad_hirni.__file__), 'resources', 'rules', 'test_rules2.py')) os.unlink(op.join(path, 'struct_acq', 'studyspec.json')) ds.hirni_dicom2spec(path=op.join("struct_acq", "dicoms"), spec=op.join("struct_acq", "studyspec.json")) struct_spec = [ s for s in load_stream(op.join(path, "struct_acq", "studyspec.json")) ] # assertions wrt spec for spec_snippet in struct_spec: # Rule2 should have overwritten Rule1's comment: assert has_specval(spec_snippet, 'comment') assert_equal(get_specval(spec_snippet, 'comment'), "Rules2: These rules are for unit testing only") # should have 1 snippet of type dicomseries + 1 of type dicomseries:all assert_equal(len(struct_spec), 2) assert_in("dicomseries", [s['type'] for s in struct_spec]) assert_in("dicomseries:all", [s['type'] for s in struct_spec])