def acquisition_to_heudiconv(client, acq, context): """Create a list of sequence objects for all convertable files in the acquistion.""" to_convert = [] # Get the nifti file dicoms = [f for f in acq.files if f.type == 'dicom'] if dicoms: dicom = dicoms[0] try: zip_info = acq.get_file_zip_info(dicom.name) context['total'] += len(zip_info.members) dcm_info = dicom.info except: except_subj = client.get(acq.parents.subject) except_sess = client.get(acq.parents.session) log.debug( 'Dicom could not be processed:\n\t%s\n\tSubject Label: %s\n\tSession Label: %s' .format(dicom.name, except_subj.label, except_sess.label)) zip_info = None dcm_info = {} else: zip_info = None dcm_info = {} # Make it a nicom wrapper to handle all sorts of different dicom styles mw = wrapper_from_data(dcm_info) num_dicoms = len(zip_info.members) if zip_info else -1 image_shape = mw.image_shape if image_shape is None: image_shape = (-1, -1, -1, -1) else: image_shape = mw.image_shape + (num_dicoms, ) while len(image_shape) < 4: image_shape = image_shape + (-1, ) for fileobj in acq.files: log.debug('filename: %s', fileobj.name) if fileobj.type not in CONVERTABLE_TYPES: continue info = fileobj.info # Make it a nicom wrapper to handle all sorts of different dicom styles mw = wrapper_from_data(info) log.debug('uid: %s', info.get("SeriesInstanceUID")) to_convert.append( utils.SeqInfo( context['total'], zip_info.members[0].path if zip_info else None, acq.id, fileobj.name, '-', '-', image_shape[0], image_shape[1], image_shape[2], image_shape[3], # We can use the number of files in the # Or a corresponding dicom header field info.get("RepetitionTime"), info.get("EchoTime"), info.get("ProtocolName", ""), "MOCO" in info.get("ImageType", []), "DERIVED" in info.get("ImageType", []), info.get("PatientID", context['subject'].code), info.get("StudyDescription"), info.get("ReferringPhysicianName", ""), info.get("SeriesDescription", ""), info.get("SequenceName"), tuple(info.get("ImageType", [])), info.get("AccessionNumber"), info.get("PatientAge"), info.get("PatientSex"), info.get("AcquisitionDate"), info.get("SeriesInstanceUID"))) # We could possible add a context field which would contain flywheel # hierarchy information like the subject code and session label # or the information fields within them return to_convert
def add_dcm(self, dcm, meta=None): '''Add a pydicom dataset to the stack. Parameters ---------- dcm : dicom.dataset.Dataset The data set being added to the stack meta : dict The extracted meta data for the DICOM data set `dcm`. If None extract.default_extractor will be used. Raises ------ IncongruentImageError The provided `dcm` does not match the orientation or dimensions of those already in the stack. ImageCollisionError The provided `dcm` has the same slice location and time/vector values. ''' if meta is None: from .extract import default_extractor meta = default_extractor(dcm) dw = wrapper_from_data(dcm) is_dummy = self._chk_congruent(meta) self._phase_enc_dirs.add(meta.get('InPlanePhaseEncodingDirection')) self._repetition_times.add(meta.get('RepetitionTime')) #Pull the info used for sorting slice_pos = dw.slice_indicator self._slice_pos_vals.add(slice_pos) time_val = None if self._time_order: time_val = self._time_order.get_ordinate(meta) self._time_vals.add(time_val) vector_val = None if self._vector_order: vector_val = self._vector_order.get_ordinate(meta) self._vector_vals.add(vector_val) #Create a tuple with the sorting values sorting_tuple = (vector_val, time_val, slice_pos) #If a explicit order was specified, raise an exception if image #collides with another already in the stack if ((not self._time_order is None or not self._vector_order is None) and sorting_tuple in self._sorting_tuples ): raise ImageCollisionError() self._sorting_tuples.add(sorting_tuple) #Create a NiftiWrapper for this input if possible nii_wrp = None if not is_dummy: nii_wrp = NiftiWrapper.from_dicom_wrapper(dw, meta) if self._ref_input is None: #We don't have a reference input yet, use this one self._ref_input = nii_wrp #Convert any dummies that we have stashed previously for dummy_meta, dummy_tuple, iop in self._dummies: dummy_wrp = _make_dummy(self._ref_input, dummy_meta, iop) self._files_info.append((dummy_wrp, dummy_tuple)) else: if self._ref_input is None: #We don't have a reference input, so stash the dummy for now self._dummies.append((meta, sorting_tuple, dcm.ImagePositionPatient)) else: #Convert dummy using the reference input nii_wrp = _make_dummy(self._ref_input, meta, dcm.ImagePositionPatient) #If we made a NiftiWrapper add it to the stack if not nii_wrp is None: self._files_info.append((nii_wrp, sorting_tuple)) #Set the dirty flags self._shape_dirty = True self._meta_dirty = True
def group_dicoms_into_seqinfos(files, file_filter, dcmfilter, grouping): """Process list of dicoms and return seqinfo and file group `seqinfo` contains per-sequence extract of fields from DICOMs which will be later provided into heuristics to decide on filenames Parameters ---------- files : list of str List of files to consider file_filter : callable, optional Applied to each item of filenames. Should return True if file needs to be kept, False otherwise. dcmfilter : callable, optional If called on dcm_data and returns True, it is used to set series_id grouping : {'studyUID', 'accession_number', None}, optional what to group by: studyUID or accession_number Returns ------- seqinfo : list of list `seqinfo` is a list of info entries per each sequence (some entry there defines a key for `filegrp`) filegrp : dict `filegrp` is a dictionary with files groupped per each sequence """ allowed_groupings = ['studyUID', 'accession_number', None] if grouping not in allowed_groupings: raise ValueError('I do not know how to group by {0}'.format(grouping)) per_studyUID = grouping == 'studyUID' per_accession_number = grouping == 'accession_number' lgr.info("Analyzing %d dicoms", len(files)) groups = [[], []] mwgroup = [] studyUID = None # for sanity check that all DICOMs came from the same # "study". If not -- what is the use-case? (interrupted acquisition?) # and how would then we deal with series numbers # which would differ already if file_filter: nfl_before = len(files) files = list(filter(file_filter, files)) nfl_after = len(files) lgr.info('Filtering out {0} dicoms based on their filename'.format( nfl_before-nfl_after)) for fidx, filename in enumerate(files): import nibabel.nicom.dicomwrappers as dw # TODO after getting a regression test check if the same behavior # with stop_before_pixels=True mw = dw.wrapper_from_data(dcm.read_file(filename, force=True)) for sig in ('iop', 'ICE_Dims', 'SequenceName'): try: del mw.series_signature[sig] except: pass try: file_studyUID = mw.dcm_data.StudyInstanceUID except AttributeError: lgr.info("File {} is missing any StudyInstanceUID".format(filename)) file_studyUID = None # Workaround for protocol name in private siemens csa header try: mw.dcm_data.ProtocolName except AttributeError: if not getattr(mw.dcm_data, 'ProtocolName', '').strip(): mw.dcm_data.ProtocolName = parse_private_csa_header( mw.dcm_data, 'ProtocolName', 'tProtocolName' ) if mw.is_csa else '' try: series_id = (int(mw.dcm_data.SeriesNumber), mw.dcm_data.ProtocolName) file_studyUID = mw.dcm_data.StudyInstanceUID if not per_studyUID: # verify that we are working with a single study if studyUID is None: studyUID = file_studyUID elif not per_accession_number: if studyUID != file_studyUID: lgr.warning("Conflicting study identifiers found [{}, {}].".format(studyUID, file_studyUID)) except AttributeError as exc: lgr.warning('Ignoring %s since not quite a "normal" DICOM: %s', filename, exc) series_id = (-1, 'none') file_studyUID = None if not series_id[0] < 0: if dcmfilter is not None and dcmfilter(mw.dcm_data): series_id = (-1, mw.dcm_data.ProtocolName) # filter out unwanted non-image-data DICOMs by assigning # a series number < 0 (see test below) if not series_id[0] < 0 and mw.dcm_data[0x0008, 0x0016].repval in ( 'Raw Data Storage', 'GrayscaleSoftcopyPresentationStateStorage'): series_id = (-1, mw.dcm_data.ProtocolName) if per_studyUID: series_id = series_id + (file_studyUID,) ingrp = False for idx in range(len(mwgroup)): # same = mw.is_same_series(mwgroup[idx]) if mw.is_same_series(mwgroup[idx]): # the same series should have the same study uuid assert (mwgroup[idx].dcm_data.get('StudyInstanceUID', None) == file_studyUID) ingrp = True if series_id[0] >= 0: series_id = (mwgroup[idx].dcm_data.SeriesNumber, mwgroup[idx].dcm_data.ProtocolName) if per_studyUID: series_id = series_id + (file_studyUID,) groups[0].append(series_id) groups[1].append(idx) if not ingrp: mwgroup.append(mw) groups[0].append(series_id) groups[1].append(len(mwgroup) - 1) group_map = dict(zip(groups[0], groups[1])) total = 0 seqinfo = OrderedDict() # for the next line to make any sense the series_id needs to # be sortable in a way that preserves the series order for series_id, mwidx in sorted(group_map.items()): if series_id[0] < 0: # skip our fake series with unwanted files continue mw = mwgroup[mwidx] if mw.image_shape is None: # this whole thing has now image data (maybe just PSg DICOMs) # nothing to see here, just move on continue dcminfo = mw.dcm_data series_files = [files[i] for i, s in enumerate(groups[0]) if s == series_id] # turn the series_id into a human-readable string -- string is needed # for JSON storage later on if per_studyUID: studyUID = series_id[2] series_id = series_id[:2] accession_number = dcminfo.get('AccessionNumber') series_id = '-'.join(map(str, series_id)) size = list(mw.image_shape) + [len(series_files)] total += size[-1] if len(size) < 4: size.append(1) # MG - refactor into util function try: TR = float(dcminfo.RepetitionTime) / 1000. except (AttributeError, ValueError): TR = -1 try: TE = float(dcminfo.EchoTime) except (AttributeError, ValueError): TE = -1 try: refphys = str(dcminfo.ReferringPhysicianName) except AttributeError: refphys = '' try: image_type = tuple(dcminfo.ImageType) except AttributeError: image_type = '' try: series_desc = dcminfo.SeriesDescription except AttributeError: series_desc = '' motion_corrected = 'MOCO' in image_type if dcminfo.get([0x18,0x24], None): # GE and Philips scanners sequence_name = dcminfo[0x18,0x24].value elif dcminfo.get([0x19, 0x109c], None): # Siemens scanners sequence_name = dcminfo[0x19, 0x109c].value else: sequence_name = 'Not found' info = SeqInfo( total, op.split(series_files[0])[1], series_id, op.basename(op.dirname(series_files[0])), '-', '-', size[0], size[1], size[2], size[3], TR, TE, dcminfo.ProtocolName, motion_corrected, 'derived' in [x.lower() for x in dcminfo.get('ImageType', [])], dcminfo.get('PatientID'), dcminfo.get('StudyDescription'), refphys, series_desc, # We try to set this further up. sequence_name, image_type, accession_number, # For demographics to populate BIDS participants.tsv dcminfo.get('PatientAge'), dcminfo.get('PatientSex'), dcminfo.get('AcquisitionDate'), dcminfo.get('SeriesInstanceUID') ) # candidates # dcminfo.AccessionNumber # len(dcminfo.ReferencedImageSequence) # len(dcminfo.SourceImageSequence) # FOR demographics if per_studyUID: key = studyUID.split('.')[-1] elif per_accession_number: key = accession_number else: key = '' lgr.debug("%30s %30s %27s %27s %5s nref=%-2d nsrc=%-2d %s" % ( key, info.series_id, series_desc, dcminfo.ProtocolName, info.is_derived, len(dcminfo.get('ReferencedImageSequence', '')), len(dcminfo.get('SourceImageSequence', '')), info.image_type )) if per_studyUID: if studyUID not in seqinfo: seqinfo[studyUID] = OrderedDict() seqinfo[studyUID][info] = series_files elif per_accession_number: if accession_number not in seqinfo: seqinfo[accession_number] = OrderedDict() seqinfo[accession_number][info] = series_files else: seqinfo[info] = series_files if per_studyUID: lgr.info("Generated sequence info for %d studies with %d entries total", len(seqinfo), sum(map(len, seqinfo.values()))) elif per_accession_number: lgr.info("Generated sequence info for %d accession numbers with %d " "entries total", len(seqinfo), sum(map(len, seqinfo.values()))) else: lgr.info("Generated sequence info with %d entries", len(seqinfo)) return seqinfo