def get_formatted_scans_key_row(item): """ Parameters ---------- item Returns ------- row: list [ISO acquisition time, performing physician name, random string] """ dcm_fn = item[-1][0] mw = ds.wrapper_from_data(dcm.read_file(dcm_fn, stop_before_pixels=True, force=True)) # we need to store filenames and acquisition times # parse date and time and get it into isoformat date = mw.dcm_data.ContentDate time = mw.dcm_data.ContentTime.split('.')[0] td = time + date acq_time = datetime.strptime(td, '%H%M%S%Y%m%d').isoformat() # add random string randstr = ''.join(map(chr, sample(k=8, population=range(33, 127)))) try: perfphys = mw.dcm_data.PerformingPhysicianName except AttributeError: perfphys = '' row = [acq_time, perfphys, randstr] # empty entries should be 'n/a' # https://github.com/dartmouth-pbs/heudiconv/issues/32 row = ['n/a' if not str(e) else e for e in row] return row
def get_slice_timing(root): for path, subdirs, files in os.walk(root): if files: dcm_files = glob(op.join(path,'*.dcm')) num_files = len(dcm_files) slice_time = [None]*num_files for i in range(num_files): dcm_fn = dcm_files[i] dcm_filepath = os.path.join(path, dcm_fn) mw = ds.wrapper_from_data(dcm.read_file(dcm_filepath, stop_before_pixels=True, force=True)) dcminfo = mw.dcm_data aquision_time = float(dcminfo.TriggerTime)/1000 slice_time[i] = aquision_time return (slice_time) else: print('Nan file path')
def group_dicoms_into_seqinfos(files, file_filter, dcmfilter, grouping): """Process list of dicoms and return seqinfo and file group `seqinfo` contains per-sequence extract of fields from DICOMs which will be later provided into heuristics to decide on filenames Parameters ---------- files : list of str List of files to consider file_filter : callable, optional Applied to each item of filenames. Should return True if file needs to be kept, False otherwise. dcmfilter : callable, optional If called on dcm_data and returns True, it is used to set series_id grouping : {'studyUID', 'accession_number', None}, optional what to group by: studyUID or accession_number Returns ------- seqinfo : list of list `seqinfo` is a list of info entries per each sequence (some entry there defines a key for `filegrp`) filegrp : dict `filegrp` is a dictionary with files groupped per each sequence """ allowed_groupings = ['studyUID', 'accession_number', None] if grouping not in allowed_groupings: raise ValueError('I do not know how to group by {0}'.format(grouping)) per_studyUID = grouping == 'studyUID' per_accession_number = grouping == 'accession_number' lgr.info("Analyzing %d dicoms", len(files)) import dcmstack as ds import dicom as dcm groups = [[], []] mwgroup = [] studyUID = None # for sanity check that all DICOMs came from the same # "study". If not -- what is the use-case? (interrupted acquisition?) # and how would then we deal with series numbers # which would differ already if file_filter: nfl_before = len(files) files = list(filter(file_filter, files)) nfl_after = len(files) lgr.info('Filtering out {0} dicoms based on their filename'.format( nfl_before - nfl_after)) for fidx, filename in enumerate(files): # TODO after getting a regression test check if the same behavior # with stop_before_pixels=True mw = ds.wrapper_from_data(dcm.read_file(filename, force=True)) for sig in ('iop', 'ICE_Dims', 'SequenceName'): try: del mw.series_signature[sig] except: pass try: file_studyUID = mw.dcm_data.StudyInstanceUID except AttributeError: lgr.info( "File {} is missing any StudyInstanceUID".format(filename)) file_studyUID = None try: series_id = (int(mw.dcm_data.SeriesNumber), mw.dcm_data.ProtocolName) file_studyUID = mw.dcm_data.StudyInstanceUID if not per_studyUID: # verify that we are working with a single study if studyUID is None: studyUID = file_studyUID elif not per_accession_number: assert studyUID == file_studyUID, ( "Conflicting study identifiers found [{}, {}].".format( studyUID, file_studyUID)) except AttributeError as exc: lgr.warning('Ignoring %s since not quite a "normal" DICOM: %s', filename, exc) series_id = (-1, 'none') file_studyUID = None if not series_id[0] < 0: if dcmfilter is not None and dcmfilter(mw.dcm_data): series_id = (-1, mw.dcm_data.ProtocolName) # filter out unwanted non-image-data DICOMs by assigning # a series number < 0 (see test below) if not series_id[0] < 0 and mw.dcm_data[0x0008, 0x0016].repval in ( 'Raw Data Storage', 'GrayscaleSoftcopyPresentationStateStorage'): series_id = (-1, mw.dcm_data.ProtocolName) if per_studyUID: series_id = series_id + (file_studyUID, ) ingrp = False for idx in range(len(mwgroup)): # same = mw.is_same_series(mwgroup[idx]) if mw.is_same_series(mwgroup[idx]): # the same series should have the same study uuid assert (mwgroup[idx].dcm_data.get('StudyInstanceUID', None) == file_studyUID) ingrp = True if series_id[0] >= 0: series_id = (mwgroup[idx].dcm_data.SeriesNumber, mwgroup[idx].dcm_data.ProtocolName) if per_studyUID: series_id = series_id + (file_studyUID, ) groups[0].append(series_id) groups[1].append(idx) if not ingrp: mwgroup.append(mw) groups[0].append(series_id) groups[1].append(len(mwgroup) - 1) group_map = dict(zip(groups[0], groups[1])) total = 0 seqinfo = OrderedDict() # for the next line to make any sense the series_id needs to # be sortable in a way that preserves the series order for series_id, mwidx in sorted(group_map.items()): if series_id[0] < 0: # skip our fake series with unwanted files continue mw = mwgroup[mwidx] if mw.image_shape is None: # this whole thing has now image data (maybe just PSg DICOMs) # nothing to see here, just move on continue dcminfo = mw.dcm_data series_files = [ files[i] for i, s in enumerate(groups[0]) if s == series_id ] # turn the series_id into a human-readable string -- string is needed # for JSON storage later on if per_studyUID: studyUID = series_id[2] series_id = series_id[:2] accession_number = dcminfo.get('AccessionNumber') series_id = '-'.join(map(str, series_id)) size = list(mw.image_shape) + [len(series_files)] total += size[-1] if len(size) < 4: size.append(1) # MG - refactor into util function try: TR = float(dcminfo.RepetitionTime) / 1000. except (AttributeError, ValueError): TR = -1 try: TE = float(dcminfo.EchoTime) except (AttributeError, ValueError): TE = -1 try: refphys = str(dcminfo.ReferringPhysicianName) except AttributeError: refphys = '' try: image_type = tuple(dcminfo.ImageType) except AttributeError: image_type = '' try: series_desc = dcminfo.SeriesDescription except AttributeError: series_desc = '' motion_corrected = 'MOCO' in image_type if dcminfo.get([0x18, 0x24], None): # GE and Philips scanners sequence_name = dcminfo[0x18, 0x24].value elif dcminfo.get([0x19, 0x109c], None): # Siemens scanners sequence_name = dcminfo[0x19, 0x109c].value else: sequence_name = 'Not found' info = SeqInfo( total, op.split(series_files[0])[1], series_id, op.basename(op.dirname(series_files[0])), '-', '-', size[0], size[1], size[2], size[3], TR, TE, dcminfo.ProtocolName, motion_corrected, 'derived' in [x.lower() for x in dcminfo.get('ImageType', [])], dcminfo.get('PatientID'), dcminfo.get('StudyDescription'), refphys, dcminfo.get('SeriesDescription'), sequence_name, image_type, accession_number, # For demographics to populate BIDS participants.tsv dcminfo.get('PatientAge'), dcminfo.get('PatientSex'), dcminfo.get('AcquisitionDate'), ) # candidates # dcminfo.AccessionNumber # len(dcminfo.ReferencedImageSequence) # len(dcminfo.SourceImageSequence) # FOR demographics if per_studyUID: key = studyUID.split('.')[-1] elif per_accession_number: key = accession_number else: key = '' lgr.debug( "%30s %30s %27s %27s %5s nref=%-2d nsrc=%-2d %s" % (key, info.series_id, dcminfo.SeriesDescription, dcminfo.ProtocolName, info.is_derived, len(dcminfo.get('ReferencedImageSequence', '')), len(dcminfo.get('SourceImageSequence', '')), info.image_type)) if per_studyUID: if studyUID not in seqinfo: seqinfo[studyUID] = OrderedDict() seqinfo[studyUID][info] = series_files elif per_accession_number: if accession_number not in seqinfo: seqinfo[accession_number] = OrderedDict() seqinfo[accession_number][info] = series_files else: seqinfo[info] = series_files if per_studyUID: lgr.info( "Generated sequence info for %d studies with %d entries total", len(seqinfo), sum(map(len, seqinfo.values()))) elif per_accession_number: lgr.info( "Generated sequence info for %d accession numbers with %d " "entries total", len(seqinfo), sum(map(len, seqinfo.values()))) else: lgr.info("Generated sequence info with %d entries", len(seqinfo)) return seqinfo
def process_dicoms(fl): groups = [[], []] mwgroup = [] hays = open("/scratch/PSB6351_2017/week4/hays/debug1.txt", 'w') hays.write("Here\n") hays.flush() for fidx, filename in enumerate(fl): mw = ds.wrapper_from_data(dcm.read_file(filename, force=True)) try: del mw.series_signature['iop'] except: pass try: del mw.series_signature['ICE_Dims'] except: pass try: del mw.series_signature['SequenceName'] except: pass if not groups: if hasattr(mw.dcm_data, 'SeriesNumber'): mwgroup.append(mw) groups[0].append(int(mw.dcm_data.SeriesNumber)) groups[1].append(len(mwgroup) - 1) continue N = len(mwgroup) #print fidx, N, filename ingrp = False for idx in range(N): same = mw.is_same_series(mwgroup[idx]) #print idx, same, groups[idx][0] if same: if hasattr(mw.dcm_data, 'SeriesNumber'): groups[0].append(int(mwgroup[idx].dcm_data.SeriesNumber)) groups[1].append(idx) ingrp = True if not ingrp: if hasattr(mw.dcm_data, 'SeriesNumber'): mwgroup.append(mw) groups[0].append(int(mw.dcm_data.SeriesNumber)) groups[1].append(len(mwgroup) - 1) group_map = dict(zip(groups[0], groups[1])) if 20002 in group_map.keys(): group_map.pop(20002, None) if 20004 in group_map.keys(): group_map.pop(20004, None) if 300 in group_map.keys(): group_map.pop(300, None) if 301 in group_map.keys(): group_map.pop(301, None) if 302 in group_map.keys(): group_map.pop(302, None) total = 0 filegroup = {} seqinfo = [] for series, mwidx in sorted(group_map.items()): mw = mwgroup[mwidx] dcminfo = mw.dcm_data files = np.array(fl)[np.array(groups[0]) == series].tolist() filegroup[series] = files size = list(mw.image_shape) + [dcminfo.ImagesinAcquisition] total += size[-1] if hasattr(dcminfo, 'NumberofTemporalPositions'): size.append(dcminfo.NumberofTemporalPositions) else: size.append(1) try: TR = float(dcminfo.RepetitionTime)/1000. except AttributeError: TR = -1 try: TE = float(dcminfo.EchoTime) except AttributeError: TE = -1 hays.write("Total: {0}\n".format(total)) hays.write("series: {0}\n".format(series)) hays.write("Split Path: {0}\n".format(os.path.split(files[0])[1])) hays.write("dcminfo.SeriesDescription: {0}\n".format(dcminfo.SeriesDescription)) hays.write("size: {0}\n".format(size)) hays.write("TR: {0}\n".format(TR)) info = [total, os.path.split(files[0])[1], series, '-', '-', '-'] + \ size + [TR, TE, dcminfo.SeriesDescription] seqinfo.append(info) hays.write(str(seqinfo)) hays.flush() hays.close() return seqinfo, filegroup
def process_dicoms(fl): groups = [[], []] mwgroup = [] for fidx, filename in enumerate(fl): mw = ds.wrapper_from_data(dcm.read_file(filename, force=True)) try: del mw.series_signature['iop'] except: pass try: del mw.series_signature['ICE_Dims'] except: pass try: del mw.series_signature['SequenceName'] except: pass if not groups: mwgroup.append(mw) groups[0].append(int(mw.dcm_data.SeriesNumber)) groups[1].append(len(mwgroup) - 1) continue N = len(mwgroup) #print fidx, N, filename ingrp = False for idx in range(N): same = mw.is_same_series(mwgroup[idx]) #print idx, same, groups[idx][0] if same: groups[0].append(int(mwgroup[idx].dcm_data.SeriesNumber)) groups[1].append(idx) ingrp = True if not ingrp: mwgroup.append(mw) groups[0].append(int(mw.dcm_data.SeriesNumber)) groups[1].append(len(mwgroup) - 1) group_map = dict(zip(groups[0], groups[1])) if 5001 in group_map.keys(): group_map.pop(5001, None) total = 0 filegroup = {} seqinfo = [] for series, mwidx in sorted(group_map.items()): mw = mwgroup[mwidx] dcminfo = mw.dcm_data files = np.array(fl)[np.array(groups[0]) == series].tolist() filegroup[series] = files size = list(mw.image_shape) + [len(files)] total += size[-1] if len(size) < 4: size.append(1) try: TR = float(dcminfo.RepetitionTime) / 1000. except AttributeError: TR = -1 try: TE = float(dcminfo.EchoTime) except AttributeError: TE = -1 info = [total, os.path.split(files[0])[1], series, '-', '-', '-'] + \ size + [TR, TE, dcminfo.ProtocolName, 'MoCo' in dcminfo.SeriesDescription] seqinfo.append(info) return seqinfo, filegroup
def get_scan_info(root, basedir): """ Parameters: item --------------------------------------------------------------------------- Returns: row: list [ISO acquisition time, performing physician name, random string] """ for path, subdirs, files in os.walk(root): if path.split('_')[-1] == 'dicoms' and "Save" not in re.split(r'[_|@|/]', path) \ and "calibration" not in re.split(r'[_|@|/]', path) \ and "Loc" not in re.split(r'[_|@|/]', path) \ and "Processed" not in re.split(r'[_|@|/]', path): if files: niiDir = path.replace(path.split('/')[-1], "").replace('untar', 'raw') niifiles = glob(os.path.join(niiDir, '*.nii.gz')) dcm_files = glob(op.join(path,'*.dcm')) dcm_fn = dcm_files[0] dcm_filepath = os.path.join(path, dcm_fn) mw = ds.wrapper_from_data(dcm.read_file(dcm_filepath, stop_before_pixels=True, force=True)) dcminfo = mw.dcm_data # MG try: TR = float(dcminfo.RepetitionTime) / 1000. except (AttributeError, ValueError): TR = -1 try: TE = float(dcminfo.EchoTime) except (AttributeError, ValueError): TE = -1 projectName = re.split(r'[_|@|/]', dcminfo.PatientID)[-1] projectPath = os.path.join(basedir, projectName) mkdir(projectPath) targetPath = os.path.join(projectPath, 'source') mkdir(targetPath) date = dcminfo.ContentDate time = dcminfo.ContentTime.split('.')[0] td = time + date acq_time = datetime.strptime(td, '%H%M%S%Y%m%d').isoformat() key_subID = date if key_subID in dict_subID.keys(): info_subID = dict_subID[key_subID] subPrefix = info_subID subID = re.split(r'[_]', info_subID)[0].split('-')[1] subDir = os.path.join(targetPath, 'sub-'+subID) mkdir(subDir) phase = re.split(r'[_]', info_subID)[1].split('-')[1] secDir = os.path.join(subDir, 'ses-'+phase) mkdir(secDir) else: print('no such key') subID = re.split(r'[_|@|/]', dcminfo.PatientID)[0].split('s')[-1] subDir = os.path.join(targetPath, subID) mkdir(subDir) phase = re.split(r'[_|@|/]', dcminfo.PatientID)[1] secDir = os.path.join(subDir, subID) mkdir(secDir) subPrefix = 'sub-'+subID+'_ses-'+phase subAge = int(dcminfo.PatientAge.split('Y')[0]) subSex = dcminfo.PatientSex #scanType = re.split(r'[\s]', dcminfo.SeriesDescription)[0] scanType = dcminfo[0x19109e].value # Fieldmap; EffectiveEchoSpacing and TotalReadoutTime ETL = dcminfo.EchoTrainLength PEbandwidth = float(dcminfo.PixelBandwidth) ACCFactor = 3#have not figure out how to read directlly from dcm. right now read from nii header #EffectiveEchoSpacing = 1 / (PEbandwidth * (ETL - 1) * ACCFactor) #TotalReadoutTime = 1/PEbandwidth taskName = dcminfo.SeriesDescription.split(' ')[0] #Build the info dics and save them Dataset_Description = dict() Dataset_Description['Name'] = projectName Dataset_Description['BIDSVersion'] = BIDSVersion Dataset_Description['License'] = License Dataset_Description['Authors'] = Authors Dataset_Description['Acknowledgements'] = Acknowledgements Dataset_Description['HowToAcknowledge'] = HowToAcknowledge Dataset_Description['Funding'] = Funding Dataset_Description['InstitutionName'] = dcminfo.InstitutionName Dataset_Description['Manufacturer'] = dcminfo.Manufacturer Dataset_Description['ManufacturersModelName'] = dcminfo.ManufacturersModelName fname = os.path.join(targetPath, 'dataset_description.json') write_json(fname, Dataset_Description, 'TRUE') #Dataset_Description['Session'] = phase Dataset_Description['SubID'] = subID Dataset_Description['SubAge'] = subAge Dataset_Description['SubSex'] = subSex fname = os.path.join(subDir, 'dataset_description.json') write_json(fname, Dataset_Description, 'TRUE') if scanType == 'EFGRE3D': Anatomy_Info = dict() Anatomy_Info['ScanType'] = 'T1' Anatomy_Info['SeriesDescription'] = dcminfo.SeriesDescription Anatomy_Info['AcquisitionMatrix'] = dcminfo.AcquisitionMatrix Anatomy_Info['RepetitionTime'] = TR Anatomy_Info['TE'] = TE Anatomy_Info['FlipAngle'] = int(dcminfo.FlipAngle) Anatomy_Info['InstitutionName'] = dcminfo.InstitutionName Anatomy_Info['Manufacturer'] = dcminfo.Manufacturer Anatomy_Info['ManufacturersModelName'] = dcminfo.ManufacturersModelName Anatomy_Info['AcqTime'] = acq_time anatdir = os.path.join(secDir, 'anat') mkdir(anatdir) for i in niifiles: copy(i, os.path.join(anatdir, subPrefix+'_T1w.nii.gz')) fname = os.path.join(anatdir, subPrefix+'_T1w.json') write_json(fname, Anatomy_Info, 'TRUE') elif scanType == '3DFSE': Anatomy_Info = dict() Anatomy_Info['ScanType'] = 'T2' Anatomy_Info['SeriesDescription'] = dcminfo.SeriesDescription Anatomy_Info['AcquisitionMatrix'] = dcminfo.AcquisitionMatrix Anatomy_Info['RepetitionTime'] = TR Anatomy_Info['TE'] = TE Anatomy_Info['FlipAngle'] = int(dcminfo.FlipAngle) Anatomy_Info['InstitutionName'] = dcminfo.InstitutionName Anatomy_Info['Manufacturer'] = dcminfo.Manufacturer Anatomy_Info['ManufacturersModelName'] = dcminfo.ManufacturersModelName Anatomy_Info['AcqTime'] = acq_time anatdir = os.path.join(secDir, 'anat') mkdir(anatdir) for i in niifiles: copy(i, os.path.join(anatdir, subPrefix+'_T2w.nii.gz')) fname = os.path.join(anatdir, subPrefix+'_T2w.json') write_json(fname, Anatomy_Info, 'TRUE') elif scanType == 'EPI': slice_times = get_slice_timing(root) Func_Info = dict() Func_Info['ScanType'] = 'Func' Func_Info['SeriesDescription'] = dcminfo.SeriesDescription Func_Info['AcquisitionMatrix'] = dcminfo.AcquisitionMatrix Func_Info['RepetitionTime'] = TR Func_Info['TE'] = TE Func_Info['FlipAngle'] = int(dcminfo.FlipAngle) Func_Info['SliceTiming'] = slice_times Func_Info['ACCFactor'] = ACCFactor Func_Info['InstitutionName'] = dcminfo.InstitutionName Func_Info['Manufacturer'] = dcminfo.Manufacturer Func_Info['ManufacturersModelName'] = dcminfo.ManufacturersModelName Func_Info['AcqTime'] = acq_time Func_Info['TaskName'] = taskName funcdir = os.path.join(secDir, 'func') mkdir(funcdir) for i in niifiles: copy(i, os.path.join(funcdir, subPrefix+'_task-'+taskName+'_bold.nii.gz')) fname = os.path.join(funcdir, subPrefix+'_task-'+taskName+'_bold.json') write_json(fname, Func_Info, 'TRUE') elif scanType == 'Fieldmap': Fieldmap_Info = dict() Fieldmap_Info['ScanType'] = 'Fieldmap' Fieldmap_Info['SeriesDescription'] = dcminfo.SeriesDescription Fieldmap_Info['AcquisitionMatrix'] = dcminfo.AcquisitionMatrix Fieldmap_Info['RepetitionTime'] = TR Fieldmap_Info['TE'] = TE Fieldmap_Info['FlipAngle'] = int(dcminfo.FlipAngle) Fieldmap_Info['ACCFactor'] = ACCFactor Fieldmap_Info['EffectiveEchoSpacing'] = EffectiveEchoSpacing Fieldmap_Info['phaseEncodeDirection'] = phaseEncodeDirection Fieldmap_Info['InstitutionName'] = dcminfo.InstitutionName Fieldmap_Info['Manufacturer'] = dcminfo.Manufacturer Fieldmap_Info['ManufacturersModelName'] = dcminfo.ManufacturersModelName Fieldmap_Info['AcqTime'] = acq_time fpdir = os.path.join(secDir, 'fmap') mkdir(fpdir) for i in niifiles: copy(i, os.path.join(fpdir, subPrefix+'_fmap.nii.gz')) fname = os.path.join(fpdir, subPrefix+'_fmap.json') write_json(fname, Fieldmap_Info, 'TRUE')
def sort_rule_clinical(filename, args): ''' Clinical sort rule: patient_name |-study_date |-modality |-series_number |-{patient}.{modality}.{series:04d}.{image:04d}.{study_date}.{unique}.dcm ... |-series_number ... intput: filename: dicom filename output: a dictionary: key: filename value: patient_name/study_date/modality/sereis_number/{patient}.{modality}.{series:04d}.{image:04d}.{study_date}.{unique}.dcm ''' def write_error_file(filen, errorInfoTemp): if os.path.exists(filen): with open(filen, 'r') as readFile: reader = csv.reader(readFile, delimiter='\t') lines = list(reader) if errorInfoTemp.split('\t') not in lines: with open(filen, 'a') as writeFile: writeFile.write(errorInfoTemp) writeFile.write("\n") else: with open(filen, 'w') as writeFile: writeFile.write( "\t".join(['subject', 'date', 'series', 'issue'])) writeFile.write("\n") writeFile.write(errorInfoTemp) writeFile.write("\n") def clean_path(path): return re.sub(r'[^a-zA-Z0-9.-]', '_', '{0}'.format(path)) def hashcode(value): code = 0 for character in value: code = (code * 31 + ord(character)) & 0xffffffff return '{0:08X}'.format(code) # This will ignore any dicomdir present in the folder if all(['DICOMDIR' not in filename, not filename.endswith('OR_dates.tsv')]): logger = logging.getLogger(__name__) try: error_file = os.path.join(args.dicom_dir, 'errorInfo.tsv') or_dates_file = os.path.join(args.dicom_dir, 'or_dates.tsv') dataset = pydicom.read_file( filename, stop_before_pixels=True, force=True) study_date = dataset.StudyDate[0:4] + '_' + \ dataset.StudyDate[4:6] + '_' + dataset.StudyDate[6:8] if 'SeriesDescription' in dataset: if any(x in dataset.SeriesDescription.upper() for x in {'REJECTION'}): return None # This will skip any order sheets if dataset.Modality in {'SR', 'PR', 'KO'}: errorInfoTemp = "\t".join(['P' + [s for s in filename.split(os.sep) if 'sub' in s][0].split('-')[1], study_date, clean_path('{series:04d}'.format(series=dataset.SeriesNumber)), dataset.Modality]) write_error_file(error_file, errorInfoTemp) return None # This will skip any order sheets and localizers elif 'ImageType' not in dataset: return None elif any(x in dataset.ImageType for x in {'SECONDARY', 'LOCALIZER'}): return None else: if 'Manufacturer' in dataset: if 'SIEMENS' in dataset.Manufacturer: errorInfoTemp = "\t".join(['P' + [s for s in filename.split(os.sep) if 'sub' in s][0].split('-')[1], study_date, clean_path('{series:04d}'.format(series=dataset.SeriesNumber)), 'SIEMENS']) write_error_file(error_file, errorInfoTemp) return None try: csaReader = ds.wrapper_from_data(dataset) modality = dataset.Modality # --- INTRAOP X-RAY determination if any(substring in modality for substring in {'Intraoperative', 'Skull', 'XA', 'RF','CR','OT'}): if 'CR' not in dataset.Modality: or_date = dataset.StudyDate[0:4] + '_' + \ dataset.StudyDate[4:6] + \ '_' + dataset.StudyDate[6:8] orDateTemp = "\t".join( ['P' + [s for s in filename.split(os.sep) if 'sub' in s][0].split('-')[1], or_date]) if os.path.exists(or_dates_file): with open(or_dates_file, 'r') as readFile: reader = csv.reader( readFile, delimiter='\t') lines = list(reader) if orDateTemp.split('\t') not in lines: with open(or_dates_file, 'a') as writeFile: writeFile.write(orDateTemp) writeFile.write("\n") else: with open(or_dates_file, 'w') as writeFile: writeFile.write( "\t".join(['subject', 'or_date'])) writeFile.write("\n") writeFile.write(orDateTemp) writeFile.write("\n") return None elif all(['CR' in dataset.Modality, any(x in dataset.StudyDescription for x in {'Skull Routine Portable', 'Intraoperative Portable'})]): errorInfoTemp = "\t".join(['P' + [s for s in filename.split(os.sep) if 'sub' in s][0].split('-')[1], study_date, clean_path('{series:04d}'.format(series=dataset.SeriesNumber)), dataset.StudyDescription]) write_error_file(error_file, errorInfoTemp) return None else: patient = 'P' + \ [s for s in filename.split(os.sep) if 'sub' in s][0].split( '-')[1] + '_' + study_date series_number = clean_path( '{series:04d}'.format(series=dataset.SeriesNumber)) studyID_and_hash_studyInstanceUID = clean_path('.'.join([dataset.StudyID or 'NA', hashcode(dataset.StudyInstanceUID)])) path = os.path.join( patient, dataset.StudyDate, studyID_and_hash_studyInstanceUID, modality, series_number) sorted_filename = '{patient}.{modality}.{series:04d}.{image:04d}.{study_date}.{unique}.dcm'.format( patient=patient.upper(), modality=modality, series=dataset.SeriesNumber, image=dataset.InstanceNumber, study_date=dataset.StudyDate, unique=hashcode(dataset.SOPInstanceUID), ) else: if dataset.SeriesDescription.lower() not in {'loc', 'dose report'}: patient = 'P' + \ [s for s in filename.split(os.sep) if 'sub' in s][0].split( '-')[1] + '_' + study_date series_number = clean_path( '{series:04d}'.format(series=dataset.SeriesNumber)) studyID_and_hash_studyInstanceUID = clean_path('.'.join([dataset.StudyID or 'NA', hashcode(dataset.StudyInstanceUID)])) path = os.path.join( patient, dataset.StudyDate, studyID_and_hash_studyInstanceUID, modality, series_number) sorted_filename = '{patient}.{modality}.{series:04d}.{image:04d}.{study_date}.{unique}.dcm'.format( patient=patient.upper(), modality=modality, series=dataset.SeriesNumber, image=dataset.InstanceNumber, study_date=dataset.StudyDate, unique=hashcode(dataset.SOPInstanceUID), ) except Exception as e: errorInfoTemp = "\t".join(['P' + [s for s in filename.split(os.sep) if 'sub' in s][0].split('-')[1], study_date, clean_path('{series:04d}'.format(series=dataset.SeriesNumber)), 'csaReader']) write_error_file(error_file, errorInfoTemp) return None except Exception as e: logger.exception('something wrong with {}'.format(filename)) logger.exception(e) return None if 'path' in locals(): sorted_full_filename = os.path.join(path, sorted_filename) return sorted_full_filename else: return None
def process_dicoms(fl): groups = [[], []] mwgroup = [] for fidx, filename in enumerate(fl): mw = ds.wrapper_from_data(dcm.read_file(filename, force=True)) try: del mw.series_signature['iop'] except: pass try: del mw.series_signature['ICE_Dims'] except: pass try: del mw.series_signature['SequenceName'] except: pass if not groups: if hasattr(mw.dcm_data, 'SeriesNumber'): mwgroup.append(mw) groups[0].append(int(mw.dcm_data.SeriesNumber)) groups[1].append(len(mwgroup) - 1) continue N = len(mwgroup) #print fidx, N, filename ingrp = False for idx in range(N): same = mw.is_same_series(mwgroup[idx]) #print idx, same, groups[idx][0] if same: if hasattr(mw.dcm_data, 'SeriesNumber'): groups[0].append(int(mwgroup[idx].dcm_data.SeriesNumber)) groups[1].append(idx) ingrp = True if not ingrp: if hasattr(mw.dcm_data, 'SeriesNumber'): mwgroup.append(mw) groups[0].append(int(mw.dcm_data.SeriesNumber)) groups[1].append(len(mwgroup) - 1) group_map = dict(zip(groups[0], groups[1])) if 20002 in group_map.keys(): group_map.pop(20002, None) if 300 in group_map.keys(): group_map.pop(300, None) total = 0 filegroup = {} seqinfo = [] for series, mwidx in sorted(group_map.items()): mw = mwgroup[mwidx] dcminfo = mw.dcm_data files = np.array(fl)[np.array(groups[0]) == series].tolist() filegroup[series] = files size = list(mw.image_shape) + [dcminfo.ImagesinAcquisition] total += size[-1] if hasattr(dcminfo, 'NumberofTemporalPositions'): size.append(dcminfo.NumberofTemporalPositions) else: size.append(1) try: TR = float(dcminfo.RepetitionTime)/1000. except AttributeError: TR = -1 try: TE = float(dcminfo.EchoTime) except AttributeError: TE = -1 info = [total, os.path.split(files[0])[1], series, '-', '-', '-'] + \ size + [TR, TE, dcminfo.SeriesDescription] seqinfo.append(info) return seqinfo, filegroup