Example #1
0
def get_formatted_scans_key_row(item):
    """
    Parameters
    ----------
    item

    Returns
    -------
    row: list
        [ISO acquisition time, performing physician name, random string]

    """
    dcm_fn = item[-1][0]
    mw = ds.wrapper_from_data(dcm.read_file(dcm_fn,
                                            stop_before_pixels=True,
                                            force=True))
    # we need to store filenames and acquisition times
    # parse date and time and get it into isoformat
    date = mw.dcm_data.ContentDate
    time = mw.dcm_data.ContentTime.split('.')[0]
    td = time + date
    acq_time = datetime.strptime(td, '%H%M%S%Y%m%d').isoformat()
    # add random string
    randstr = ''.join(map(chr, sample(k=8, population=range(33, 127))))
    try:
        perfphys = mw.dcm_data.PerformingPhysicianName
    except AttributeError:
        perfphys = ''
    row = [acq_time, perfphys, randstr]
    # empty entries should be 'n/a'
    # https://github.com/dartmouth-pbs/heudiconv/issues/32
    row = ['n/a' if not str(e) else e for e in row]
    return row
Example #2
0
def get_slice_timing(root):
    for path, subdirs, files in os.walk(root):
        if files:
            dcm_files = glob(op.join(path,'*.dcm'))
            num_files = len(dcm_files)
            slice_time = [None]*num_files
            for i in range(num_files):
                dcm_fn = dcm_files[i]
                dcm_filepath = os.path.join(path, dcm_fn)
                mw = ds.wrapper_from_data(dcm.read_file(dcm_filepath,
                                                stop_before_pixels=True,
                                                force=True))
                dcminfo = mw.dcm_data   
                aquision_time = float(dcminfo.TriggerTime)/1000
                slice_time[i] = aquision_time
            return (slice_time)
        else:
            print('Nan file path')
Example #3
0
def group_dicoms_into_seqinfos(files, file_filter, dcmfilter, grouping):
    """Process list of dicoms and return seqinfo and file group
    `seqinfo` contains per-sequence extract of fields from DICOMs which
    will be later provided into heuristics to decide on filenames
    Parameters
    ----------
    files : list of str
      List of files to consider
    file_filter : callable, optional
      Applied to each item of filenames. Should return True if file needs to be
      kept, False otherwise.
    dcmfilter : callable, optional
      If called on dcm_data and returns True, it is used to set series_id
    grouping : {'studyUID', 'accession_number', None}, optional
        what to group by: studyUID or accession_number
    Returns
    -------
    seqinfo : list of list
      `seqinfo` is a list of info entries per each sequence (some entry
      there defines a key for `filegrp`)
    filegrp : dict
      `filegrp` is a dictionary with files groupped per each sequence
    """
    allowed_groupings = ['studyUID', 'accession_number', None]
    if grouping not in allowed_groupings:
        raise ValueError('I do not know how to group by {0}'.format(grouping))
    per_studyUID = grouping == 'studyUID'
    per_accession_number = grouping == 'accession_number'
    lgr.info("Analyzing %d dicoms", len(files))
    import dcmstack as ds
    import dicom as dcm

    groups = [[], []]
    mwgroup = []

    studyUID = None
    # for sanity check that all DICOMs came from the same
    # "study".  If not -- what is the use-case? (interrupted acquisition?)
    # and how would then we deal with series numbers
    # which would differ already
    if file_filter:
        nfl_before = len(files)
        files = list(filter(file_filter, files))
        nfl_after = len(files)
        lgr.info('Filtering out {0} dicoms based on their filename'.format(
            nfl_before - nfl_after))
    for fidx, filename in enumerate(files):
        # TODO after getting a regression test check if the same behavior
        #      with stop_before_pixels=True
        mw = ds.wrapper_from_data(dcm.read_file(filename, force=True))

        for sig in ('iop', 'ICE_Dims', 'SequenceName'):
            try:
                del mw.series_signature[sig]
            except:
                pass

        try:
            file_studyUID = mw.dcm_data.StudyInstanceUID
        except AttributeError:
            lgr.info(
                "File {} is missing any StudyInstanceUID".format(filename))
            file_studyUID = None

        try:
            series_id = (int(mw.dcm_data.SeriesNumber),
                         mw.dcm_data.ProtocolName)
            file_studyUID = mw.dcm_data.StudyInstanceUID

            if not per_studyUID:
                # verify that we are working with a single study
                if studyUID is None:
                    studyUID = file_studyUID
                elif not per_accession_number:
                    assert studyUID == file_studyUID, (
                        "Conflicting study identifiers found [{}, {}].".format(
                            studyUID, file_studyUID))
        except AttributeError as exc:
            lgr.warning('Ignoring %s since not quite a "normal" DICOM: %s',
                        filename, exc)
            series_id = (-1, 'none')
            file_studyUID = None

        if not series_id[0] < 0:
            if dcmfilter is not None and dcmfilter(mw.dcm_data):
                series_id = (-1, mw.dcm_data.ProtocolName)

        # filter out unwanted non-image-data DICOMs by assigning
        # a series number < 0 (see test below)
        if not series_id[0] < 0 and mw.dcm_data[0x0008, 0x0016].repval in (
                'Raw Data Storage',
                'GrayscaleSoftcopyPresentationStateStorage'):
            series_id = (-1, mw.dcm_data.ProtocolName)

        if per_studyUID:
            series_id = series_id + (file_studyUID, )

        ingrp = False
        for idx in range(len(mwgroup)):
            # same = mw.is_same_series(mwgroup[idx])
            if mw.is_same_series(mwgroup[idx]):
                # the same series should have the same study uuid
                assert (mwgroup[idx].dcm_data.get('StudyInstanceUID',
                                                  None) == file_studyUID)
                ingrp = True
                if series_id[0] >= 0:
                    series_id = (mwgroup[idx].dcm_data.SeriesNumber,
                                 mwgroup[idx].dcm_data.ProtocolName)
                    if per_studyUID:
                        series_id = series_id + (file_studyUID, )
                groups[0].append(series_id)
                groups[1].append(idx)

        if not ingrp:
            mwgroup.append(mw)
            groups[0].append(series_id)
            groups[1].append(len(mwgroup) - 1)

    group_map = dict(zip(groups[0], groups[1]))

    total = 0
    seqinfo = OrderedDict()

    # for the next line to make any sense the series_id needs to
    # be sortable in a way that preserves the series order
    for series_id, mwidx in sorted(group_map.items()):
        if series_id[0] < 0:
            # skip our fake series with unwanted files
            continue
        mw = mwgroup[mwidx]
        if mw.image_shape is None:
            # this whole thing has now image data (maybe just PSg DICOMs)
            # nothing to see here, just move on
            continue
        dcminfo = mw.dcm_data
        series_files = [
            files[i] for i, s in enumerate(groups[0]) if s == series_id
        ]
        # turn the series_id into a human-readable string -- string is needed
        # for JSON storage later on
        if per_studyUID:
            studyUID = series_id[2]
            series_id = series_id[:2]
        accession_number = dcminfo.get('AccessionNumber')

        series_id = '-'.join(map(str, series_id))

        size = list(mw.image_shape) + [len(series_files)]
        total += size[-1]
        if len(size) < 4:
            size.append(1)

        # MG - refactor into util function
        try:
            TR = float(dcminfo.RepetitionTime) / 1000.
        except (AttributeError, ValueError):
            TR = -1
        try:
            TE = float(dcminfo.EchoTime)
        except (AttributeError, ValueError):
            TE = -1
        try:
            refphys = str(dcminfo.ReferringPhysicianName)
        except AttributeError:
            refphys = ''
        try:
            image_type = tuple(dcminfo.ImageType)
        except AttributeError:
            image_type = ''
        try:
            series_desc = dcminfo.SeriesDescription
        except AttributeError:
            series_desc = ''

        motion_corrected = 'MOCO' in image_type

        if dcminfo.get([0x18, 0x24], None):
            # GE and Philips scanners
            sequence_name = dcminfo[0x18, 0x24].value
        elif dcminfo.get([0x19, 0x109c], None):
            # Siemens scanners
            sequence_name = dcminfo[0x19, 0x109c].value
        else:
            sequence_name = 'Not found'

        info = SeqInfo(
            total,
            op.split(series_files[0])[1],
            series_id,
            op.basename(op.dirname(series_files[0])),
            '-',
            '-',
            size[0],
            size[1],
            size[2],
            size[3],
            TR,
            TE,
            dcminfo.ProtocolName,
            motion_corrected,
            'derived' in [x.lower() for x in dcminfo.get('ImageType', [])],
            dcminfo.get('PatientID'),
            dcminfo.get('StudyDescription'),
            refphys,
            dcminfo.get('SeriesDescription'),
            sequence_name,
            image_type,
            accession_number,
            # For demographics to populate BIDS participants.tsv
            dcminfo.get('PatientAge'),
            dcminfo.get('PatientSex'),
            dcminfo.get('AcquisitionDate'),
        )
        # candidates
        # dcminfo.AccessionNumber
        #   len(dcminfo.ReferencedImageSequence)
        #   len(dcminfo.SourceImageSequence)
        # FOR demographics
        if per_studyUID:
            key = studyUID.split('.')[-1]
        elif per_accession_number:
            key = accession_number
        else:
            key = ''
        lgr.debug(
            "%30s %30s %27s %27s %5s nref=%-2d nsrc=%-2d %s" %
            (key, info.series_id, dcminfo.SeriesDescription,
             dcminfo.ProtocolName, info.is_derived,
             len(dcminfo.get('ReferencedImageSequence', '')),
             len(dcminfo.get('SourceImageSequence', '')), info.image_type))
        if per_studyUID:
            if studyUID not in seqinfo:
                seqinfo[studyUID] = OrderedDict()
            seqinfo[studyUID][info] = series_files
        elif per_accession_number:
            if accession_number not in seqinfo:
                seqinfo[accession_number] = OrderedDict()
            seqinfo[accession_number][info] = series_files
        else:
            seqinfo[info] = series_files

    if per_studyUID:
        lgr.info(
            "Generated sequence info for %d studies with %d entries total",
            len(seqinfo), sum(map(len, seqinfo.values())))
    elif per_accession_number:
        lgr.info(
            "Generated sequence info for %d accession numbers with %d "
            "entries total", len(seqinfo), sum(map(len, seqinfo.values())))
    else:
        lgr.info("Generated sequence info with %d entries", len(seqinfo))
    return seqinfo
Example #4
0
def process_dicoms(fl):
    groups = [[], []]
    mwgroup = []
    hays = open("/scratch/PSB6351_2017/week4/hays/debug1.txt", 'w')
    hays.write("Here\n")
    hays.flush()
    for fidx, filename in enumerate(fl):
        mw = ds.wrapper_from_data(dcm.read_file(filename, force=True))
        try:
            del mw.series_signature['iop']
        except:
            pass
        try:
            del mw.series_signature['ICE_Dims']
        except:
            pass
        try:
            del mw.series_signature['SequenceName']
        except:
            pass
        if not groups:
            if hasattr(mw.dcm_data, 'SeriesNumber'):
                mwgroup.append(mw)
                groups[0].append(int(mw.dcm_data.SeriesNumber))
                groups[1].append(len(mwgroup) - 1)
                continue
        N = len(mwgroup)
        #print fidx, N, filename
        ingrp = False
        for idx in range(N):
            same = mw.is_same_series(mwgroup[idx])
            #print idx, same, groups[idx][0] 
            if same:
                if hasattr(mw.dcm_data, 'SeriesNumber'):
                    groups[0].append(int(mwgroup[idx].dcm_data.SeriesNumber))
                    groups[1].append(idx)
                    ingrp = True
        if not ingrp:
            if hasattr(mw.dcm_data, 'SeriesNumber'):
                mwgroup.append(mw)
                groups[0].append(int(mw.dcm_data.SeriesNumber))
                groups[1].append(len(mwgroup) - 1)

    group_map = dict(zip(groups[0], groups[1]))
    
    if 20002 in group_map.keys():
        group_map.pop(20002, None)
    if 20004 in group_map.keys():
        group_map.pop(20004, None)
    if 300 in group_map.keys():
        group_map.pop(300, None)
    if 301 in group_map.keys():
        group_map.pop(301, None)
    if 302 in group_map.keys():
        group_map.pop(302, None)

    total = 0
    filegroup = {}
    seqinfo = []

    for series, mwidx in sorted(group_map.items()):
        mw = mwgroup[mwidx]
        dcminfo = mw.dcm_data
        files = np.array(fl)[np.array(groups[0]) == series].tolist()
        filegroup[series] = files
        size = list(mw.image_shape) + [dcminfo.ImagesinAcquisition]
        total += size[-1]
        if hasattr(dcminfo, 'NumberofTemporalPositions'):
            size.append(dcminfo.NumberofTemporalPositions)
        else:
            size.append(1)
        try:
            TR = float(dcminfo.RepetitionTime)/1000.
        except AttributeError:
            TR = -1
        try:
            TE = float(dcminfo.EchoTime)
        except AttributeError:
            TE = -1

        hays.write("Total: {0}\n".format(total))
        hays.write("series: {0}\n".format(series))
        hays.write("Split Path: {0}\n".format(os.path.split(files[0])[1]))
        hays.write("dcminfo.SeriesDescription: {0}\n".format(dcminfo.SeriesDescription))
        hays.write("size: {0}\n".format(size))
        hays.write("TR: {0}\n".format(TR))

        info = [total, os.path.split(files[0])[1], series, '-', '-', '-'] + \
               size + [TR, TE, dcminfo.SeriesDescription]
        seqinfo.append(info)
    hays.write(str(seqinfo))
    hays.flush()
    hays.close()
    return seqinfo, filegroup
def process_dicoms(fl):
    groups = [[], []]
    mwgroup = []
    for fidx, filename in enumerate(fl):
        mw = ds.wrapper_from_data(dcm.read_file(filename, force=True))
        try:
            del mw.series_signature['iop']
        except:
            pass
        try:
            del mw.series_signature['ICE_Dims']
        except:
            pass
        try:
            del mw.series_signature['SequenceName']
        except:
            pass
        if not groups:
            mwgroup.append(mw)
            groups[0].append(int(mw.dcm_data.SeriesNumber))
            groups[1].append(len(mwgroup) - 1)
            continue
        N = len(mwgroup)
        #print fidx, N, filename
        ingrp = False
        for idx in range(N):
            same = mw.is_same_series(mwgroup[idx])
            #print idx, same, groups[idx][0]
            if same:
                groups[0].append(int(mwgroup[idx].dcm_data.SeriesNumber))
                groups[1].append(idx)
                ingrp = True
        if not ingrp:
            mwgroup.append(mw)
            groups[0].append(int(mw.dcm_data.SeriesNumber))
            groups[1].append(len(mwgroup) - 1)

    group_map = dict(zip(groups[0], groups[1]))

    if 5001 in group_map.keys():
        group_map.pop(5001, None)

    total = 0
    filegroup = {}
    seqinfo = []
    for series, mwidx in sorted(group_map.items()):
        mw = mwgroup[mwidx]
        dcminfo = mw.dcm_data
        files = np.array(fl)[np.array(groups[0]) == series].tolist()
        filegroup[series] = files
        size = list(mw.image_shape) + [len(files)]
        total += size[-1]
        if len(size) < 4:
            size.append(1)
        try:
            TR = float(dcminfo.RepetitionTime) / 1000.
        except AttributeError:
            TR = -1
        try:
            TE = float(dcminfo.EchoTime)
        except AttributeError:
            TE = -1
        info = [total, os.path.split(files[0])[1], series, '-', '-', '-'] + \
               size + [TR, TE, dcminfo.ProtocolName, 'MoCo' in dcminfo.SeriesDescription]
        seqinfo.append(info)
    return seqinfo, filegroup
Example #6
0
def get_scan_info(root, basedir):
    """
    Parameters:
    item
    ---------------------------------------------------------------------------
    Returns:
    row: list
        [ISO acquisition time, performing physician name, random string]
    """
    for path, subdirs, files in os.walk(root):
        if path.split('_')[-1] == 'dicoms' and "Save" not in re.split(r'[_|@|/]', path) \
        and "calibration" not in re.split(r'[_|@|/]', path) \
        and "Loc" not in re.split(r'[_|@|/]', path) \
        and "Processed" not in re.split(r'[_|@|/]', path):
            if files:
                niiDir = path.replace(path.split('/')[-1], "").replace('untar', 'raw')
                niifiles = glob(os.path.join(niiDir, '*.nii.gz'))
                dcm_files = glob(op.join(path,'*.dcm'))
                dcm_fn = dcm_files[0]
                dcm_filepath = os.path.join(path, dcm_fn)
                mw = ds.wrapper_from_data(dcm.read_file(dcm_filepath,
                                                stop_before_pixels=True,
                                                force=True))
                dcminfo = mw.dcm_data   
                # MG
                try:
                    TR = float(dcminfo.RepetitionTime) / 1000.
                except (AttributeError, ValueError):
                    TR = -1
                try:
                    TE = float(dcminfo.EchoTime)
                except (AttributeError, ValueError):
                    TE = -1

                projectName = re.split(r'[_|@|/]', dcminfo.PatientID)[-1]
                projectPath = os.path.join(basedir, projectName)
                mkdir(projectPath)
                targetPath = os.path.join(projectPath, 'source')
                mkdir(targetPath)
                date = dcminfo.ContentDate
                time = dcminfo.ContentTime.split('.')[0]
                td = time + date
                acq_time = datetime.strptime(td, '%H%M%S%Y%m%d').isoformat()            
                key_subID = date
                if key_subID in dict_subID.keys():
                    info_subID = dict_subID[key_subID]
                    subPrefix = info_subID
                    subID = re.split(r'[_]', info_subID)[0].split('-')[1]
                    subDir = os.path.join(targetPath, 'sub-'+subID)
                    mkdir(subDir)
                    phase = re.split(r'[_]', info_subID)[1].split('-')[1]
                    secDir = os.path.join(subDir, 'ses-'+phase)
                    mkdir(secDir)
                else: 
                    print('no such key')
                    subID = re.split(r'[_|@|/]', dcminfo.PatientID)[0].split('s')[-1]
                    subDir = os.path.join(targetPath, subID)
                    mkdir(subDir)
                    phase = re.split(r'[_|@|/]', dcminfo.PatientID)[1]
                    secDir = os.path.join(subDir, subID)
                    mkdir(secDir)
                    subPrefix = 'sub-'+subID+'_ses-'+phase


                subAge = int(dcminfo.PatientAge.split('Y')[0])
                subSex = dcminfo.PatientSex

                #scanType = re.split(r'[\s]', dcminfo.SeriesDescription)[0] 
                scanType = dcminfo[0x19109e].value
                # Fieldmap; EffectiveEchoSpacing and TotalReadoutTime
                ETL = dcminfo.EchoTrainLength
                PEbandwidth = float(dcminfo.PixelBandwidth)
                ACCFactor = 3#have not figure out how to read directlly from dcm. right now read from nii header
                #EffectiveEchoSpacing = 1 / (PEbandwidth * (ETL - 1) * ACCFactor)
                #TotalReadoutTime = 1/PEbandwidth 
                taskName = dcminfo.SeriesDescription.split(' ')[0]

                #Build the info dics and save them
                Dataset_Description = dict()
                Dataset_Description['Name'] = projectName
                Dataset_Description['BIDSVersion'] = BIDSVersion
                Dataset_Description['License'] = License
                Dataset_Description['Authors'] = Authors
                Dataset_Description['Acknowledgements'] = Acknowledgements
                Dataset_Description['HowToAcknowledge'] = HowToAcknowledge
                Dataset_Description['Funding'] = Funding
                Dataset_Description['InstitutionName'] = dcminfo.InstitutionName
                Dataset_Description['Manufacturer'] = dcminfo.Manufacturer
                Dataset_Description['ManufacturersModelName'] = dcminfo.ManufacturersModelName
                fname = os.path.join(targetPath, 'dataset_description.json') 
                write_json(fname, Dataset_Description, 'TRUE')
                #Dataset_Description['Session'] = phase
                Dataset_Description['SubID'] = subID
                Dataset_Description['SubAge'] = subAge
                Dataset_Description['SubSex'] = subSex
                fname = os.path.join(subDir, 'dataset_description.json') 
                write_json(fname, Dataset_Description, 'TRUE')            
                if scanType == 'EFGRE3D': 
                    Anatomy_Info = dict()
                    Anatomy_Info['ScanType'] = 'T1'
                    Anatomy_Info['SeriesDescription'] = dcminfo.SeriesDescription
                    Anatomy_Info['AcquisitionMatrix'] = dcminfo.AcquisitionMatrix
                    Anatomy_Info['RepetitionTime'] = TR
                    Anatomy_Info['TE'] = TE
                    Anatomy_Info['FlipAngle'] = int(dcminfo.FlipAngle)
                    Anatomy_Info['InstitutionName'] = dcminfo.InstitutionName
                    Anatomy_Info['Manufacturer'] = dcminfo.Manufacturer
                    Anatomy_Info['ManufacturersModelName'] = dcminfo.ManufacturersModelName
                    Anatomy_Info['AcqTime'] = acq_time
                    anatdir = os.path.join(secDir, 'anat')
                    mkdir(anatdir)
                    for i in niifiles:
                        copy(i, os.path.join(anatdir, subPrefix+'_T1w.nii.gz'))
                    fname = os.path.join(anatdir, subPrefix+'_T1w.json') 
                    write_json(fname, Anatomy_Info, 'TRUE')
                elif scanType == '3DFSE': 
                    Anatomy_Info = dict()
                    Anatomy_Info['ScanType'] = 'T2'
                    Anatomy_Info['SeriesDescription'] = dcminfo.SeriesDescription
                    Anatomy_Info['AcquisitionMatrix'] = dcminfo.AcquisitionMatrix
                    Anatomy_Info['RepetitionTime'] = TR
                    Anatomy_Info['TE'] = TE
                    Anatomy_Info['FlipAngle'] = int(dcminfo.FlipAngle)
                    Anatomy_Info['InstitutionName'] = dcminfo.InstitutionName
                    Anatomy_Info['Manufacturer'] = dcminfo.Manufacturer
                    Anatomy_Info['ManufacturersModelName'] = dcminfo.ManufacturersModelName
                    Anatomy_Info['AcqTime'] = acq_time
                    anatdir = os.path.join(secDir, 'anat')
                    mkdir(anatdir)
                    for i in niifiles:
                        copy(i, os.path.join(anatdir, subPrefix+'_T2w.nii.gz'))
                    fname = os.path.join(anatdir, subPrefix+'_T2w.json') 
                    write_json(fname, Anatomy_Info, 'TRUE')    
                elif scanType == 'EPI':
                    slice_times = get_slice_timing(root)
                    Func_Info = dict()
                    Func_Info['ScanType'] = 'Func'
                    Func_Info['SeriesDescription'] = dcminfo.SeriesDescription
                    Func_Info['AcquisitionMatrix'] = dcminfo.AcquisitionMatrix
                    Func_Info['RepetitionTime'] = TR
                    Func_Info['TE'] = TE
                    Func_Info['FlipAngle'] = int(dcminfo.FlipAngle)
                    Func_Info['SliceTiming'] = slice_times
                    Func_Info['ACCFactor'] = ACCFactor
                    Func_Info['InstitutionName'] = dcminfo.InstitutionName
                    Func_Info['Manufacturer'] = dcminfo.Manufacturer
                    Func_Info['ManufacturersModelName'] = dcminfo.ManufacturersModelName
                    Func_Info['AcqTime'] = acq_time
                    Func_Info['TaskName'] = taskName
                    funcdir = os.path.join(secDir, 'func')
                    mkdir(funcdir)
                    for i in niifiles:
                        copy(i, os.path.join(funcdir, subPrefix+'_task-'+taskName+'_bold.nii.gz'))
                    fname = os.path.join(funcdir, subPrefix+'_task-'+taskName+'_bold.json') 
                    write_json(fname, Func_Info, 'TRUE')
                elif scanType == 'Fieldmap':
                    Fieldmap_Info = dict()
                    Fieldmap_Info['ScanType'] = 'Fieldmap'
                    Fieldmap_Info['SeriesDescription'] = dcminfo.SeriesDescription
                    Fieldmap_Info['AcquisitionMatrix'] = dcminfo.AcquisitionMatrix
                    Fieldmap_Info['RepetitionTime'] = TR
                    Fieldmap_Info['TE'] = TE
                    Fieldmap_Info['FlipAngle'] = int(dcminfo.FlipAngle)
                    Fieldmap_Info['ACCFactor'] = ACCFactor
                    Fieldmap_Info['EffectiveEchoSpacing'] = EffectiveEchoSpacing
                    Fieldmap_Info['phaseEncodeDirection'] = phaseEncodeDirection                
                    Fieldmap_Info['InstitutionName'] = dcminfo.InstitutionName
                    Fieldmap_Info['Manufacturer'] = dcminfo.Manufacturer
                    Fieldmap_Info['ManufacturersModelName'] = dcminfo.ManufacturersModelName
                    Fieldmap_Info['AcqTime'] = acq_time
                    fpdir = os.path.join(secDir, 'fmap')
                    mkdir(fpdir)
                    for i in niifiles:
                        copy(i, os.path.join(fpdir, subPrefix+'_fmap.nii.gz'))
                    fname = os.path.join(fpdir, subPrefix+'_fmap.json') 
                    write_json(fname, Fieldmap_Info, 'TRUE') 
Example #7
0
def sort_rule_clinical(filename, args):
    '''
    Clinical sort rule:

    patient_name
        |-study_date
            |-modality
                |-series_number
                  |-{patient}.{modality}.{series:04d}.{image:04d}.{study_date}.{unique}.dcm
                  ... 
                |-series_number
                ...
    intput:
        filename: dicom filename
    output:
        a dictionary:
            key: filename
            value: patient_name/study_date/modality/sereis_number/{patient}.{modality}.{series:04d}.{image:04d}.{study_date}.{unique}.dcm

    '''
    def write_error_file(filen, errorInfoTemp):
        if os.path.exists(filen):
            with open(filen, 'r') as readFile:
                reader = csv.reader(readFile, delimiter='\t')
                lines = list(reader)
                if errorInfoTemp.split('\t') not in lines:
                    with open(filen, 'a') as writeFile:
                        writeFile.write(errorInfoTemp)
                        writeFile.write("\n")
        else:
            with open(filen, 'w') as writeFile:
                writeFile.write(
                    "\t".join(['subject', 'date', 'series', 'issue']))
                writeFile.write("\n")
                writeFile.write(errorInfoTemp)
                writeFile.write("\n")

    def clean_path(path):
        return re.sub(r'[^a-zA-Z0-9.-]', '_', '{0}'.format(path))

    def hashcode(value):
        code = 0
        for character in value:
            code = (code * 31 + ord(character)) & 0xffffffff
        return '{0:08X}'.format(code)

    # This will ignore any dicomdir present in the folder
    if all(['DICOMDIR' not in filename, not filename.endswith('OR_dates.tsv')]):
        logger = logging.getLogger(__name__)

        try:
            error_file = os.path.join(args.dicom_dir, 'errorInfo.tsv')
            or_dates_file = os.path.join(args.dicom_dir, 'or_dates.tsv')
            
            dataset = pydicom.read_file(
                filename, stop_before_pixels=True, force=True)
            study_date = dataset.StudyDate[0:4] + '_' + \
                dataset.StudyDate[4:6] + '_' + dataset.StudyDate[6:8]
            
            if 'SeriesDescription' in dataset:
                if any(x in dataset.SeriesDescription.upper() for x in {'REJECTION'}):
                    return None
                
            # This will skip any order sheets
            if dataset.Modality in {'SR', 'PR', 'KO'}:
                errorInfoTemp = "\t".join(['P' + [s for s in filename.split(os.sep) if 'sub' in s][0].split('-')[1], study_date,
                                           clean_path('{series:04d}'.format(series=dataset.SeriesNumber)), dataset.Modality])
                write_error_file(error_file, errorInfoTemp)
                return None

            # This will skip any order sheets and localizers
            elif 'ImageType' not in dataset:
                return None
            elif any(x in  dataset.ImageType for x in {'SECONDARY', 'LOCALIZER'}):
                return None
            else:
                if 'Manufacturer' in dataset:
                    if 'SIEMENS' in dataset.Manufacturer:
                        errorInfoTemp = "\t".join(['P' + [s for s in filename.split(os.sep) if 'sub' in s][0].split('-')[1], study_date,
                                                   clean_path('{series:04d}'.format(series=dataset.SeriesNumber)), 'SIEMENS'])
                        write_error_file(error_file, errorInfoTemp)
                        return None
                try:
                    csaReader = ds.wrapper_from_data(dataset)
                    modality = dataset.Modality

                    # --- INTRAOP X-RAY determination
                    if any(substring in modality for substring in {'Intraoperative', 'Skull', 'XA', 'RF','CR','OT'}):
                        if 'CR' not in dataset.Modality:
                            or_date = dataset.StudyDate[0:4] + '_' + \
                                dataset.StudyDate[4:6] + \
                                '_' + dataset.StudyDate[6:8]
                            orDateTemp = "\t".join(
                                ['P' + [s for s in filename.split(os.sep) if 'sub' in s][0].split('-')[1], or_date])
                            
                            if os.path.exists(or_dates_file):
                                with open(or_dates_file, 'r') as readFile:
                                    reader = csv.reader(
                                        readFile, delimiter='\t')
                                    lines = list(reader)
                                if orDateTemp.split('\t') not in lines:
                                    with open(or_dates_file, 'a') as writeFile:
                                        writeFile.write(orDateTemp)
                                        writeFile.write("\n")
                            else:
                                with open(or_dates_file, 'w') as writeFile:
                                    writeFile.write(
                                        "\t".join(['subject', 'or_date']))
                                    writeFile.write("\n")
                                    writeFile.write(orDateTemp)
                                    writeFile.write("\n")
                            return None

                        elif all(['CR' in dataset.Modality, any(x in dataset.StudyDescription for x in {'Skull Routine Portable', 'Intraoperative Portable'})]):
                            errorInfoTemp = "\t".join(['P' + [s for s in filename.split(os.sep) if 'sub' in s][0].split('-')[1], study_date,
                                                       clean_path('{series:04d}'.format(series=dataset.SeriesNumber)), dataset.StudyDescription])
                            write_error_file(error_file, errorInfoTemp)
                            return None
                        else:
                            patient = 'P' + \
                                [s for s in filename.split(os.sep) if 'sub' in s][0].split(
                                    '-')[1] + '_' + study_date
                            series_number = clean_path(
                                '{series:04d}'.format(series=dataset.SeriesNumber))
                            studyID_and_hash_studyInstanceUID = clean_path('.'.join([dataset.StudyID or 'NA',
                                                                                     hashcode(dataset.StudyInstanceUID)]))

                            path = os.path.join(
                                patient, dataset.StudyDate, studyID_and_hash_studyInstanceUID, modality, series_number)
                            sorted_filename = '{patient}.{modality}.{series:04d}.{image:04d}.{study_date}.{unique}.dcm'.format(
                                patient=patient.upper(),
                                modality=modality,
                                series=dataset.SeriesNumber,
                                image=dataset.InstanceNumber,
                                study_date=dataset.StudyDate,
                                unique=hashcode(dataset.SOPInstanceUID),
                            )
                    else:
                        if dataset.SeriesDescription.lower() not in {'loc', 'dose report'}:
                            patient = 'P' + \
                                [s for s in filename.split(os.sep) if 'sub' in s][0].split(
                                    '-')[1] + '_' + study_date
                            series_number = clean_path(
                                '{series:04d}'.format(series=dataset.SeriesNumber))
                            studyID_and_hash_studyInstanceUID = clean_path('.'.join([dataset.StudyID or 'NA',
                                                                                     hashcode(dataset.StudyInstanceUID)]))

                            path = os.path.join(
                                patient, dataset.StudyDate, studyID_and_hash_studyInstanceUID, modality, series_number)
                            sorted_filename = '{patient}.{modality}.{series:04d}.{image:04d}.{study_date}.{unique}.dcm'.format(
                                patient=patient.upper(),
                                modality=modality,
                                series=dataset.SeriesNumber,
                                image=dataset.InstanceNumber,
                                study_date=dataset.StudyDate,
                                unique=hashcode(dataset.SOPInstanceUID),
                            )
                except Exception as e:
                    errorInfoTemp = "\t".join(['P' + [s for s in filename.split(os.sep) if 'sub' in s][0].split('-')[1], study_date,
                                               clean_path('{series:04d}'.format(series=dataset.SeriesNumber)), 'csaReader'])
                    write_error_file(error_file, errorInfoTemp)
                    return None

        except Exception as e:
            logger.exception('something wrong with {}'.format(filename))
            logger.exception(e)
            return None

        if 'path' in locals():
            sorted_full_filename = os.path.join(path, sorted_filename)
            return sorted_full_filename
        else:
            return None
Example #8
0
def process_dicoms(fl):
    groups = [[], []]
    mwgroup = []
    for fidx, filename in enumerate(fl):
        mw = ds.wrapper_from_data(dcm.read_file(filename, force=True))
        try:
            del mw.series_signature['iop']
        except:
            pass
        try:
            del mw.series_signature['ICE_Dims']
        except:
            pass
        try:
            del mw.series_signature['SequenceName']
        except:
            pass
        if not groups:
            if hasattr(mw.dcm_data, 'SeriesNumber'):
                mwgroup.append(mw)
                groups[0].append(int(mw.dcm_data.SeriesNumber))
                groups[1].append(len(mwgroup) - 1)
                continue
        N = len(mwgroup)
        #print fidx, N, filename
        ingrp = False
        for idx in range(N):
            same = mw.is_same_series(mwgroup[idx])
            #print idx, same, groups[idx][0] 
            if same:
                if hasattr(mw.dcm_data, 'SeriesNumber'):
                    groups[0].append(int(mwgroup[idx].dcm_data.SeriesNumber))
                    groups[1].append(idx)
                    ingrp = True
        if not ingrp:
            if hasattr(mw.dcm_data, 'SeriesNumber'):
                mwgroup.append(mw)
                groups[0].append(int(mw.dcm_data.SeriesNumber))
                groups[1].append(len(mwgroup) - 1)

    group_map = dict(zip(groups[0], groups[1]))
    
    if 20002 in group_map.keys():
        group_map.pop(20002, None)
    if 300 in group_map.keys():
        group_map.pop(300, None)

    total = 0
    filegroup = {}
    seqinfo = []
    for series, mwidx in sorted(group_map.items()):
        mw = mwgroup[mwidx]
        dcminfo = mw.dcm_data
        files = np.array(fl)[np.array(groups[0]) == series].tolist()
        filegroup[series] = files
        size = list(mw.image_shape) + [dcminfo.ImagesinAcquisition]
        total += size[-1]
        if hasattr(dcminfo, 'NumberofTemporalPositions'):
            size.append(dcminfo.NumberofTemporalPositions)
        else:
            size.append(1)
        try:
            TR = float(dcminfo.RepetitionTime)/1000.
        except AttributeError:
            TR = -1
        try:
            TE = float(dcminfo.EchoTime)
        except AttributeError:
            TE = -1
        info = [total, os.path.split(files[0])[1], series, '-', '-', '-'] + \
               size + [TR, TE, dcminfo.SeriesDescription]
        seqinfo.append(info)
    return seqinfo, filegroup