Exemple #1
0
def acquisition_to_heudiconv(client, acq, context):
    """Create a list of sequence objects for all convertable files in the acquistion."""
    to_convert = []
    # Get the nifti file
    dicoms = [f for f in acq.files if f.type == 'dicom']
    if dicoms:
        dicom = dicoms[0]
        try:
            zip_info = acq.get_file_zip_info(dicom.name)
            context['total'] += len(zip_info.members)
            dcm_info = dicom.info

        except:
            except_subj = client.get(acq.parents.subject)
            except_sess = client.get(acq.parents.session)

            log.debug(
                'Dicom could not be processed:\n\t%s\n\tSubject Label: %s\n\tSession Label: %s'
                .format(dicom.name, except_subj.label, except_sess.label))
            zip_info = None
            dcm_info = {}

    else:
        zip_info = None
        dcm_info = {}
    # Make it a nicom wrapper to handle all sorts of different dicom styles
    mw = wrapper_from_data(dcm_info)
    num_dicoms = len(zip_info.members) if zip_info else -1
    image_shape = mw.image_shape
    if image_shape is None:
        image_shape = (-1, -1, -1, -1)
    else:
        image_shape = mw.image_shape + (num_dicoms, )
        while len(image_shape) < 4:
            image_shape = image_shape + (-1, )

    for fileobj in acq.files:
        log.debug('filename: %s', fileobj.name)
        if fileobj.type not in CONVERTABLE_TYPES:
            continue
        info = fileobj.info

        # Make it a nicom wrapper to handle all sorts of different dicom styles
        mw = wrapper_from_data(info)

        log.debug('uid: %s', info.get("SeriesInstanceUID"))
        to_convert.append(
            utils.SeqInfo(
                context['total'],
                zip_info.members[0].path if zip_info else None,
                acq.id,
                fileobj.name,
                '-',
                '-',
                image_shape[0],
                image_shape[1],
                image_shape[2],
                image_shape[3],
                # We can use the number of files in the
                # Or a corresponding dicom header field
                info.get("RepetitionTime"),
                info.get("EchoTime"),
                info.get("ProtocolName", ""),
                "MOCO" in info.get("ImageType", []),
                "DERIVED" in info.get("ImageType", []),
                info.get("PatientID", context['subject'].code),
                info.get("StudyDescription"),
                info.get("ReferringPhysicianName", ""),
                info.get("SeriesDescription", ""),
                info.get("SequenceName"),
                tuple(info.get("ImageType", [])),
                info.get("AccessionNumber"),
                info.get("PatientAge"),
                info.get("PatientSex"),
                info.get("AcquisitionDate"),
                info.get("SeriesInstanceUID")))
        # We could possible add a context field which would contain flywheel
        # hierarchy information like the subject code and session label
        # or the information fields within them
    return to_convert
Exemple #2
0
    def add_dcm(self, dcm, meta=None):
        '''Add a pydicom dataset to the stack.

        Parameters
        ----------
        dcm : dicom.dataset.Dataset
            The data set being added to the stack

        meta : dict
            The extracted meta data for the DICOM data set `dcm`. If None
            extract.default_extractor will be used.

        Raises
        ------
        IncongruentImageError
            The provided `dcm` does not match the orientation or dimensions of
            those already in the stack.

        ImageCollisionError
            The provided `dcm` has the same slice location and time/vector
            values.

        '''
        if meta is None:
            from .extract import default_extractor
            meta = default_extractor(dcm)

        dw = wrapper_from_data(dcm)

        is_dummy = self._chk_congruent(meta)

        self._phase_enc_dirs.add(meta.get('InPlanePhaseEncodingDirection'))
        self._repetition_times.add(meta.get('RepetitionTime'))

        #Pull the info used for sorting
        slice_pos = dw.slice_indicator
        self._slice_pos_vals.add(slice_pos)
        time_val = None
        if self._time_order:
            time_val = self._time_order.get_ordinate(meta)
        self._time_vals.add(time_val)
        vector_val = None
        if self._vector_order:
            vector_val = self._vector_order.get_ordinate(meta)
        self._vector_vals.add(vector_val)

        #Create a tuple with the sorting values
        sorting_tuple = (vector_val, time_val, slice_pos)

        #If a explicit order was specified, raise an exception if image
        #collides with another already in the stack
        if ((not self._time_order is None or
             not self._vector_order is None) and
            sorting_tuple in self._sorting_tuples
           ):
            raise ImageCollisionError()
        self._sorting_tuples.add(sorting_tuple)

        #Create a NiftiWrapper for this input if possible
        nii_wrp = None
        if not is_dummy:
            nii_wrp = NiftiWrapper.from_dicom_wrapper(dw, meta)
            if self._ref_input is None:
                #We don't have a reference input yet, use this one
                self._ref_input = nii_wrp
                #Convert any dummies that we have stashed previously
                for dummy_meta, dummy_tuple, iop in self._dummies:
                    dummy_wrp = _make_dummy(self._ref_input, dummy_meta, iop)
                    self._files_info.append((dummy_wrp, dummy_tuple))
        else:
            if self._ref_input is None:
                #We don't have a reference input, so stash the dummy for now
                self._dummies.append((meta, sorting_tuple, dcm.ImagePositionPatient))
            else:
                #Convert dummy using the reference input
                nii_wrp = _make_dummy(self._ref_input, meta, dcm.ImagePositionPatient)

        #If we made a NiftiWrapper add it to the stack
        if not nii_wrp is None:
            self._files_info.append((nii_wrp, sorting_tuple))

        #Set the dirty flags
        self._shape_dirty = True
        self._meta_dirty = True
Exemple #3
0
def group_dicoms_into_seqinfos(files, file_filter, dcmfilter, grouping):
    """Process list of dicoms and return seqinfo and file group
    `seqinfo` contains per-sequence extract of fields from DICOMs which
    will be later provided into heuristics to decide on filenames
    Parameters
    ----------
    files : list of str
      List of files to consider
    file_filter : callable, optional
      Applied to each item of filenames. Should return True if file needs to be
      kept, False otherwise.
    dcmfilter : callable, optional
      If called on dcm_data and returns True, it is used to set series_id
    grouping : {'studyUID', 'accession_number', None}, optional
        what to group by: studyUID or accession_number
    Returns
    -------
    seqinfo : list of list
      `seqinfo` is a list of info entries per each sequence (some entry
      there defines a key for `filegrp`)
    filegrp : dict
      `filegrp` is a dictionary with files groupped per each sequence
    """
    allowed_groupings = ['studyUID', 'accession_number', None]
    if grouping not in allowed_groupings:
        raise ValueError('I do not know how to group by {0}'.format(grouping))
    per_studyUID = grouping == 'studyUID'
    per_accession_number = grouping == 'accession_number'
    lgr.info("Analyzing %d dicoms", len(files))

    groups = [[], []]
    mwgroup = []

    studyUID = None
    # for sanity check that all DICOMs came from the same
    # "study".  If not -- what is the use-case? (interrupted acquisition?)
    # and how would then we deal with series numbers
    # which would differ already
    if file_filter:
        nfl_before = len(files)
        files = list(filter(file_filter, files))
        nfl_after = len(files)
        lgr.info('Filtering out {0} dicoms based on their filename'.format(
            nfl_before-nfl_after))
    for fidx, filename in enumerate(files):
        import nibabel.nicom.dicomwrappers as dw
        # TODO after getting a regression test check if the same behavior
        #      with stop_before_pixels=True
        mw = dw.wrapper_from_data(dcm.read_file(filename, force=True))

        for sig in ('iop', 'ICE_Dims', 'SequenceName'):
            try:
                del mw.series_signature[sig]
            except:
                pass

        try:
            file_studyUID = mw.dcm_data.StudyInstanceUID
        except AttributeError:
            lgr.info("File {} is missing any StudyInstanceUID".format(filename))
            file_studyUID = None

        # Workaround for protocol name in private siemens csa header
        try:
            mw.dcm_data.ProtocolName
        except AttributeError:
            if not getattr(mw.dcm_data, 'ProtocolName', '').strip():
                mw.dcm_data.ProtocolName = parse_private_csa_header(
                    mw.dcm_data, 'ProtocolName', 'tProtocolName'
                    ) if mw.is_csa else ''

        try:
            series_id = (int(mw.dcm_data.SeriesNumber),
                         mw.dcm_data.ProtocolName)
            file_studyUID = mw.dcm_data.StudyInstanceUID

            if not per_studyUID:
                # verify that we are working with a single study
                if studyUID is None:
                    studyUID = file_studyUID
                elif not per_accession_number:
                    if studyUID != file_studyUID:
                        lgr.warning("Conflicting study identifiers found [{}, {}].".format(studyUID, file_studyUID))
        except AttributeError as exc:
            lgr.warning('Ignoring %s since not quite a "normal" DICOM: %s',
                        filename, exc)
            series_id = (-1, 'none')
            file_studyUID = None

        if not series_id[0] < 0:
            if dcmfilter is not None and dcmfilter(mw.dcm_data):
                series_id = (-1, mw.dcm_data.ProtocolName)

        # filter out unwanted non-image-data DICOMs by assigning
        # a series number < 0 (see test below)
        if not series_id[0] < 0 and mw.dcm_data[0x0008, 0x0016].repval in (
                'Raw Data Storage',
                'GrayscaleSoftcopyPresentationStateStorage'):
            series_id = (-1, mw.dcm_data.ProtocolName)

        if per_studyUID:
            series_id = series_id + (file_studyUID,)

        ingrp = False
        for idx in range(len(mwgroup)):
            # same = mw.is_same_series(mwgroup[idx])
            if mw.is_same_series(mwgroup[idx]):
                # the same series should have the same study uuid
                assert (mwgroup[idx].dcm_data.get('StudyInstanceUID', None)
                        == file_studyUID)
                ingrp = True
                if series_id[0] >= 0:
                    series_id = (mwgroup[idx].dcm_data.SeriesNumber,
                                 mwgroup[idx].dcm_data.ProtocolName)
                    if per_studyUID:
                        series_id = series_id + (file_studyUID,)
                groups[0].append(series_id)
                groups[1].append(idx)

        if not ingrp:
            mwgroup.append(mw)
            groups[0].append(series_id)
            groups[1].append(len(mwgroup) - 1)

    group_map = dict(zip(groups[0], groups[1]))

    total = 0
    seqinfo = OrderedDict()

    # for the next line to make any sense the series_id needs to
    # be sortable in a way that preserves the series order
    for series_id, mwidx in sorted(group_map.items()):
        if series_id[0] < 0:
            # skip our fake series with unwanted files
            continue
        mw = mwgroup[mwidx]
        if mw.image_shape is None:
            # this whole thing has now image data (maybe just PSg DICOMs)
            # nothing to see here, just move on
            continue
        dcminfo = mw.dcm_data
        series_files = [files[i] for i, s in enumerate(groups[0])
                        if s == series_id]
        # turn the series_id into a human-readable string -- string is needed
        # for JSON storage later on
        if per_studyUID:
            studyUID = series_id[2]
            series_id = series_id[:2]
        accession_number = dcminfo.get('AccessionNumber')

        series_id = '-'.join(map(str, series_id))

        size = list(mw.image_shape) + [len(series_files)]
        total += size[-1]
        if len(size) < 4:
            size.append(1)

        # MG - refactor into util function
        try:
            TR = float(dcminfo.RepetitionTime) / 1000.
        except (AttributeError, ValueError):
            TR = -1
        try:
            TE = float(dcminfo.EchoTime)
        except (AttributeError, ValueError):
            TE = -1
        try:
            refphys = str(dcminfo.ReferringPhysicianName)
        except AttributeError:
            refphys = ''
        try:
            image_type = tuple(dcminfo.ImageType)
        except AttributeError:
            image_type = ''
        try:
            series_desc = dcminfo.SeriesDescription
        except AttributeError:
            series_desc = ''

        motion_corrected = 'MOCO' in image_type

        if dcminfo.get([0x18,0x24], None):
            # GE and Philips scanners
            sequence_name = dcminfo[0x18,0x24].value
        elif dcminfo.get([0x19, 0x109c], None):
            # Siemens scanners
            sequence_name = dcminfo[0x19, 0x109c].value
        else:
            sequence_name = 'Not found'

        info = SeqInfo(
            total,
            op.split(series_files[0])[1],
            series_id,
            op.basename(op.dirname(series_files[0])),
            '-', '-',
            size[0], size[1], size[2], size[3],
            TR, TE,
            dcminfo.ProtocolName,
            motion_corrected,
            'derived' in [x.lower() for x in dcminfo.get('ImageType', [])],
            dcminfo.get('PatientID'),
            dcminfo.get('StudyDescription'),
            refphys,
            series_desc,  # We try to set this further up.
            sequence_name,
            image_type,
            accession_number,
            # For demographics to populate BIDS participants.tsv
            dcminfo.get('PatientAge'),
            dcminfo.get('PatientSex'),
            dcminfo.get('AcquisitionDate'),
            dcminfo.get('SeriesInstanceUID')
        )
        # candidates
        # dcminfo.AccessionNumber
        #   len(dcminfo.ReferencedImageSequence)
        #   len(dcminfo.SourceImageSequence)
        # FOR demographics
        if per_studyUID:
            key = studyUID.split('.')[-1]
        elif per_accession_number:
            key = accession_number
        else:
            key = ''
        lgr.debug("%30s %30s %27s %27s %5s nref=%-2d nsrc=%-2d %s" % (
            key,
            info.series_id,
            series_desc,
            dcminfo.ProtocolName,
            info.is_derived,
            len(dcminfo.get('ReferencedImageSequence', '')),
            len(dcminfo.get('SourceImageSequence', '')),
            info.image_type
        ))
        if per_studyUID:
            if studyUID not in seqinfo:
                seqinfo[studyUID] = OrderedDict()
            seqinfo[studyUID][info] = series_files
        elif per_accession_number:
            if accession_number not in seqinfo:
                seqinfo[accession_number] = OrderedDict()
            seqinfo[accession_number][info] = series_files
        else:
            seqinfo[info] = series_files

    if per_studyUID:
        lgr.info("Generated sequence info for %d studies with %d entries total",
                 len(seqinfo), sum(map(len, seqinfo.values())))
    elif per_accession_number:
        lgr.info("Generated sequence info for %d accession numbers with %d "
                 "entries total", len(seqinfo), sum(map(len, seqinfo.values())))
    else:
        lgr.info("Generated sequence info with %d entries", len(seqinfo))
    return seqinfo