def validate_dicom(fl, dcmfilter): """ Parse DICOM attributes. Returns None if not valid. """ mw = dw.wrapper_from_file(fl, force=True, stop_before_pixels=True) # clean series signature for sig in ('iop', 'ICE_Dims', 'SequenceName'): try: del mw.series_signature[sig] except KeyError: pass # Workaround for protocol name in private siemens csa header if not getattr(mw.dcm_data, 'ProtocolName', '').strip(): mw.dcm_data.ProtocolName = parse_private_csa_header( mw.dcm_data, 'ProtocolName', 'tProtocolName') if mw.is_csa else '' try: series_id = (int(mw.dcm_data.SeriesNumber), mw.dcm_data.ProtocolName) except AttributeError as e: lgr.warning('Ignoring %s since not quite a "normal" DICOM: %s', fl, e) return if dcmfilter is not None and dcmfilter(mw.dcm_data): lgr.warning("Ignoring %s because of DICOM filter", fl) return if mw.dcm_data[0x0008, 0x0016].repval in ( 'Raw Data Storage', 'GrayscaleSoftcopyPresentationStateStorage'): return try: file_studyUID = mw.dcm_data.StudyInstanceUID except AttributeError: lgr.info("File {} is missing any StudyInstanceUID".format(fl)) file_studyUID = None return mw, series_id, file_studyUID
def infotoids(seqinfos, outdir): seqinfo = next(seqinfos.__iter__()) ex_dcm = nb_dw.wrapper_from_file(seqinfo.example_dcm_file_path) # pi = str(ex_dcm.dcm_data.ReferringPhysicianName) pi = str(seqinfo.referring_physician_name) # study_name = str(ex_dcm.dcm_data.StudyDescription) study_name = str(seqinfo.study_description) patient_name = str(ex_dcm.dcm_data.PatientName) study_path = study_name.split("^") rema = re.match("(([^_]*)_)?(([^_]*)_)?p([0-9]*)_([a-z]*)([0-9]*)", patient_name) locator = os.path.join(pi, *study_path) study_name = rema.group(1) sub_study_name = rema.group(3) subject_id = rema.group(5) session_type = rema.group(6) session_id = rema.group(7) return { "locator": locator, # Sessions to be deduced yet from the names etc TODO "session": session_id, "subject": subject_id, }
def nki_pre_preprocess(root_dir, path_of_interest, extraction_bool, memory_control_bool): ##take as command line argument the directory/list of .dcm files #output directory as a command line argument #specific to our case only, if extraction_bool: with zipfile.ZipFile(path_of_interest, 'r') as zip_ref: zip_ref.extractall(dir_to_be_extracted) created_files = os.listdir(dir_to_be_extracted) #os.rename('/mnt/project1/data/fMRI/raw/nki-rs/nanditha/nki', '/mnt/project1/data/fMRI/raw/nki-rs/nanditha/nki_{0}'.format(path_of_interest[51]+path_of_interest[52]+path_of_interest[53])) # except zipfile.BadZipfile: # pass else: created_files = os.listdir(root_dir) list_of_timings = [] count = 0 paths = [] for created_file in created_files: for dirpath, dirnames, filenames in os.walk(created_file): #if not os.path.exists('{0}/slice_timings.txt'.format(dirpath)): for files in filenames: if files.endswith('.dcm'): paths.append(dirpath) file_path = os.path.join(dirpath, files) a = nnd.wrapper_from_file(file_path) imp = a.csa_header['tags']['MosaicRefAcqTimes'] list_of_timings = imp['items'] with open('{0}/slice_timings.txt'.format(dirpath), 'w') as f: for elem in list_of_timings: f.write("%s\n" % elem) count += 1 paths = set(paths) paths = (list(paths)) for i in range(len(paths)): source = paths[i] dcm_to_nii = nipype.interfaces.dcm2nii.Dcm2niix() dcm_to_nii.inputs.source_dir = source dcm_to_nii.inputs.bids_format = True dcm_to_nii.inputs.output_dir = source dcm_to_nii.run() if memory_control_bool: for dirpath, dirnames, filenames in os.walk(created_file): for files in filenames: if files.endswith('.dcm'): file_path = os.path.join(dirpath, files) os.remove(file_path)
def test_read_single(): """Test reading a single combined DICOM """ test_file = 'tests/data/siemens/3/S8457LTU_2_3_00001_00001_173218510000__1263534865.dcm' data_file = siemens.Siemens(test_file) svs_file = data_file.get_svsdata() data_dir = siemens.Siemens('tests/data/siemens/3') svs_dir = data_dir.get_svsdata() # is it the right size? assert data_file.data.shape == data_dir.data.shape == (1, 1, 1, 1, 2048) dcm = dcmwrapper.wrapper_from_file(test_file) packed = dcm.get((0x7fe1, 0x1010)).value data = struct.unpack("<%df" % (len(packed) / 4), packed) cmplx = [data[i]+data[i+1]*1j for i in range(0, len(data), 2)] assert (data_file.data == np.conj(cmplx)).all() assert (data_file.data == data_dir.data).all()
def walk_path(self, base_dir): self.clear_files() # walk through all sub-folders for root, dirs, files in os.walk(base_dir): if len(files) > 1: # pool multi-file images all_files = [self.dicom_regex.match(i).group('File') for i in files] all_extensions = [self.dicom_regex.match(i).group('Ext') for i in files] duplicates = set([x for x in all_files if all_files.count(x) > 1]) if len(duplicates) > 0: for dup in duplicates: if dup is not None: same = list(map(lambda x: (x.startswith(dup)), files)) ext = set([x for (x, v) in zip(all_extensions, same) if v]) # validate that all files have same extension if len(ext) == 1: str_nbr = ([(self.dicom_regex.match(i).group('Nbr')) for (i, v) in zip(files, same) if v]) i_nbr = [int(x) for x in str_nbr] # remove matching file-names from the files list files = [i for (i, v) in zip(files, same) if not v] all_extensions = [i for (i, v) in zip(all_extensions, same) if not v] files.append(dup + str_nbr[i_nbr.index(min(i_nbr))] + list(ext)[0]) # files.append(dup + '[' + str_nbr[i_nbr.index(min(i_nbr))] + '-' + str_nbr[ # i_nbr.index(max(i_nbr))] + ']' + list(ext)[0]) for file in files: # get file extension file_ext = ''.join(pathlib.Path(file).suffixes) file_name = file.replace(file_ext, '') files_to_append = [] formats_to_append = [] # no file extension, but DICOMDIR if file_ext.lower() in ['.dcm', ''] and file_name.upper() in ['DICOMDIR'] or \ file_name[0:3].upper() in ['KEY']: ''' wrapper = dicom_wrap.wrapper_from_file(os.path.join(root, file)) temp_dicom = dict() for temp in wrapper.dcm_data.DirectoryRecordSequence._list: if temp.DirectoryRecordType == 'IMAGE': match = dicom_regex.match(os.path.join(root, *temp.ReferencedFileID)) if match.group('File') not in temp_dicom and match.group('File') is not None: temp_dicom[match.group('File')] = [int(match.group('Nbr')), match.group('Ext')] elif match.group('File') in temp_dicom and int(match.group('Nbr')) > \ temp_dicom[match.group('File')][0]: temp_dicom[match.group('File')] = [int(match.group('Nbr')), match.group('Ext')] files_to_append = [] formats_to_append = [] for keys, values in temp_dicom.items(): files_to_append.append(keys+'[1-'+str(values[0])+']'+values[1]) formats_to_append.append('dicom') ''' # DICOM file elif file_ext.lower() in ['.dcm', '']: # validate dicom format invalid = False try: dicom_wrap.wrapper_from_file(os.path.join(root, file)) except: invalid = True if not invalid: files_to_append = [os.path.join(root, file)] formats_to_append = ['dicom'] # nifti or compressed nifti elif file_ext.lower() in ['.nii', '.nii.gz']: files_to_append = [os.path.join(root, file)] formats_to_append = ['nifti'] elif file_ext.lower() in ['.json']: files_to_append = [os.path.join(root, file)] formats_to_append = ['json'] # append data to structure for i, to_append in enumerate(files_to_append): if to_append not in self.file_names: self.file_names.append(to_append) self.formats.append(formats_to_append[i]) self.parse_files()
def infotodict(seqinfo): """Heuristic evaluator for determining which runs belong where allowed template fields - follow python string module: item: index within category subject: participant id seqitem: run number during scanning subindex: sub index within group session: scan index for longitudinal acq """ lgr.info("Processing %d seqinfo entries", len(seqinfo)) # for s in seqinfo: # print(s) info = OrderedDict() skipped, skipped_unknown = [], [] current_run = 0 run_label = None # run- dcm_image_iod_spec = None skip_derived = True outtype = ("nii.gz", ) sbref_as_fieldmap = True # duplicate sbref in fmap dir to be used by topup prefix = "" fieldmap_runs = {} for s in seqinfo: ex_dcm = nb_dw.wrapper_from_file(s.example_dcm_file_path) bids_info = get_seq_bids_info(s, ex_dcm) print(s) print(bids_info) # XXX: skip derived sequences, we don't store them to avoid polluting # the directory, unless it is the motion corrected ones # (will get _rec-moco suffix) if (skip_derived and (s.is_derived or ("MPR" in s.image_type)) and not s.is_motion_corrected and not "UNI" in s.image_type): skipped.append(s.series_id) lgr.debug("Ignoring derived data %s", s.series_id) continue seq_type = bids_info["type"] seq_label = bids_info["label"] if (seq_type == "fmap" and seq_label == "epi") or (sbref_as_fieldmap and seq_label == "sbref"): pe_dir = bids_info.get("dir", None) if not pe_dir in fieldmap_runs: fieldmap_runs[pe_dir] = 0 fieldmap_runs[pe_dir] += 1 # override the run number run_id = fieldmap_runs[pe_dir] # duplicate sbref to be used as fieldmap if sbref_as_fieldmap and seq_label == "sbref": suffix_parts = [ "acq-sbref", None if not bids_info.get("ce") else "ce-%s" % bids_info["ce"], None if not pe_dir else "dir-%s" % bids_info["dir"], "run-%02d" % run_id, "epi", ] suffix = "_".join(filter(bool, suffix_parts)) template = create_key("fmap", suffix, prefix=prefix, outtype=outtype) if template not in info: info[template] = [] info[template].append(s.series_id) show_dir = seq_type in ["fmap", "dwi"] # print(bids_info) suffix_parts = [ None if not bids_info.get("task") else "task-%s" % bids_info["task"], None if not bids_info.get("acq") else "acq-%s" % bids_info["acq"], None if not bids_info.get("ce") else "ce-%s" % bids_info["ce"], None if not (bids_info.get("dir") and show_dir) else "dir-%s" % bids_info["dir"], None if not bids_info.get("inv") else "inv-%d" % bids_info["inv"], None if not bids_info.get("part") else "part-%s" % bids_info["part"], None if not bids_info.get("tsl") else "tsl-%d" % bids_info["tsl"], None if not bids_info.get("loc") else "loc-%s" % bids_info["loc"], None if not bids_info.get("run") else "run-%02d" % int(bids_info["run"]), None if not bids_info.get("bp") else "bp-%s" % bids_info["bp"], None if not bids_info.get("echo") else "echo-%d" % int(bids_info["echo"]), seq_label, ] # filter those which are None, and join with _ suffix = "_".join(filter(bool, suffix_parts)) # if "_Scout" in s.series_description or \ # (seqtype == 'anat' and seqtype_label and seqtype_label.startswith('scout')): # outtype = ('dicom',) # else: # outtype = ('nii.gz', 'dicom') template = create_key(seq_type, suffix, prefix=prefix, outtype=outtype) # we wanted ordered dict for consistent demarcation of dups if template not in info: info[template] = [] else: # maybe images are exported with different reconstruction parameters. if bids_info.get("rec") and not any([]): # insert the rec- suffix_parts.insert(7, "rec-%s" % bids_info["rec"]) # filter those which are None, and join with _ suffix = "_".join(filter(bool, suffix_parts)) template = create_key(seq_type, suffix, prefix=prefix, outtype=outtype) info[template] = [] info[template].append(s.series_id) if skipped: lgr.info("Skipped %d sequences: %s" % (len(skipped), skipped)) if skipped_unknown: lgr.warning("Could not figure out where to stick %d sequences: %s" % (len(skipped_unknown), skipped_unknown)) info = get_dups_marked(info) # mark duplicate ones with __dup-0x suffix info = dict( info ) # convert to dict since outside functionality depends on it being a basic dict for k, i in info.items(): print(k, i) return info
def __init__(self, path): """Initialize a Siemens object from a DICOM file or directory. If a directory is given, read the first DICOM file in the directory. Args: path (str): The path to a Siemens SVS DICOM file or directory of DICOM files. """ self.path = os.path.realpath(path) dcmfile = None if os.path.isdir(self.path): # find the first dicom file to read meta data from files = os.listdir(self.path) # exclude any non dicoms files = [ f for f in files if (os.path.isfile(os.path.join(self.path, f)) and f.endswith(( '.DCM', '.dcm', '.ima', '.IMA')) and not f.startswith('.')) ] if len(files) == 0: raise FileNotFoundError('No DICOM files found') self.files = sorted(files) dcmfile = os.path.join(self.path, files[0]) # a single file, change the path if len(files) == 1: path = os.path.join(self.path, files[0]) self.path = path if not (dcmfile): raise FileNotFoundError('No DICOM files found') else: dcmfile = path dcm = dcmwrapper.wrapper_from_file(dcmfile) self.csa = csareader.get_csa_header(dcm.dcm_data) scalar_fields = [ 'MagneticFieldStrength', 'ImagingFrequency', 'MixingTime', 'EchoTime', 'RepetitionTime', 'ImaCoilString', 'SequenceName', 'VoiReadoutFoV', 'VoiPhaseFoV', 'VoiThickness', 'VoiInPlaneRotation', 'DataPointColumns', 'RealDwellTime', 'PixelBandwidth', 'ImagedNucleus' ] for k in scalar_fields: self.meta[k] = csareader.get_scalar(self.csa, k) self.meta['ImagePositionPatient'] = \ csareader.get_vector(self.csa, 'ImagePositionPatient', 3) self.meta['VoiPosition'] = csareader.get_vector( self.csa, 'VoiPosition', 3) self.meta['VoiOrientation'] = csareader.get_vector( self.csa, 'VoiOrientation', 3) self.meta['ImageOrientationPatient'] = \ csareader.get_vector(self.csa, 'ImageOrientationPatient', 6)
def read_data(self, conj=True): """Read the associated fids. If the instance was initialized with a directory, the directory is assumed to contain a single series in order. Dimensions are channel x rep x mega x isis x t """ print('Reading data...') if os.path.isfile(self.path): dcm = dcmwrapper.wrapper_from_file(self.path) data = np.array(_read_fid(dcm), ndmin=5) self.dcm_data = dcm.dcm_data elif os.path.isdir(self.path): # read a directory of DICOMS, each containing one fid # the directory must contain more than one file, as this is checked # in the class constructor channels = [] # find the instance number of the last dicom to calculate data size # this assumes different interleaved acquisitions have the same # (0020, 0012) Acquisition Number # true for eja sequences lastdcmfile = os.path.join(self.path, self.files[-1]) lastdcm = dcmwrapper.wrapper_from_file(lastdcmfile) self.dcm_data = lastdcm.dcm_data lastinstance = int(lastdcm.get((0x0020, 0x0013)).value) # figure out which channels are on csa_series = csareader.get_csa_header(lastdcm.dcm_data, 'series') csa_image = csareader.get_csa_header(lastdcm.dcm_data, 'image') siemens_hdr = csa_series['tags']['MrPhoenixProtocol']['items'][0] m = re.findall( r"""sCoilSelectMeas.aRxCoilSelectData\[0\].asList\[(?P<coilnum>\d+)\].sCoilElementID.tElement\t = \t""(?P<coilname>[HENC]+\d+)""" "", siemens_hdr) channels = dict(m) channels = dict(zip(channels.values(), channels.keys())) n_channels = len(channels) self.channels = channels # is the data combined over channels? # mri_probedicom reports ucUncombImages, but where is this in the CSA? # the first two instances of uncombined eja sequences are single channels # TODO: figure out what they are # Assume the first match the last two, which are missing the same channel n_reps = lastinstance - 2 is_combined = False # TODO: handle channel combined data if len(self.files) != (n_reps * (n_channels + 1)): # not enough files for uncombined data if len(self.files) == lastinstance: warnings.warn('Assuming channels are combined') n_reps = lastinstance n_channels = 1 is_combined = True else: raise Exception( 'Expected n_reps[%d] * (n_channels[%d] + 1 files' % (n_reps, n_channels)) data = np.zeros( (n_channels, n_reps, 1, 1, int(csareader.get_scalar(csa_image, 'DataPointColumns'))), dtype=complex) for fi in range(len(self.files)): dcmfile = os.path.join(self.path, self.files[fi]) dcm = dcmwrapper.wrapper_from_file(dcmfile) csa = csareader.get_csa_header(dcm.dcm_data) fid = np.array(_read_fid(dcm), ndmin=5) channel = csareader.get_scalar(csa, 'ImaCoilString') inst = int(dcm.get((0x0020, 0x0013)).value) if not is_combined: if inst <= 2: ri = n_reps - inst else: ri = inst - 2 - 1 else: ri = inst - 1 # there are combined coils (HC1-7) in the dicoms? # make sure this channel is one that is turned on if not is_combined: if channel in channels.keys(): ci = int(channels[channel]) data[ci, ri, 0, 0, :] = fid else: data[0, ri, 0, 0, :] = fid print('Read %d acquisitions from %d channels' % (n_reps, n_channels)) print(channels.keys()) # take the complex conjugate, which Tarquin seems to expect if conj: data = np.conj(data) # permute data for SPECIAL and MEGA if self.get_parameter('SequenceName') in [ 'eja_svs_mpress', 'eja_svs_mslaser' ]: data_on = data[:, 0:int(n_reps / 2.), 0, ::] data_off = data[:, int(n_reps / 2.):n_reps, 0, ::] data = np.stack((data_off, data_on), 2) #data = np.reshape(data, (n_channels, int(n_reps/2.), 2, 1, self.get_parameter('DataPointColumns'))) self.data = data return (data)
def fallback_dcmstack(self, fnames): """ Create NIFTI from DCM files. Quick and dirty method for when DCMSTACK does not exist or fails. Basically we determine the sequence using the InstanceNumber tag but make sure we put slices together into volumes using the SliceLocation tag """ import nibabel.nicom.dicomwrappers as nib_dcm ignored_files = [] first = True dcms = [] slices = set() ss, rs, ri = 1, 1, 0 # Pixel value scaling sys.stdout.write(" 0%") for idx, fname in enumerate(fnames): try: dcm = dicom.read_file(fname) except: ignored_files.append(fname) continue slices.add(float(dcm.SliceLocation)) dcms.append(dcm) if first: dcm1 = nib_dcm.wrapper_from_file(fname) dcm_affine = dcm1.get_affine() print(dcm_affine) try: # Need all three of these to be of use ss = dcm[0x2005, 0x100e].value rs = dcm[0x2005, 0x140a].value ri = dcm[0x2005, 0x1409].value except: pass first = False percent = 100 * float(idx + 1) / len(fnames) sys.stdout.write("\b\b\b\b%3i%%" % int(percent)) sys.stdout.flush() if len(slices) < 0: raise QpException("This doesn't seem to be a DICOM folder") n_vols = int(len(dcms) / len(slices)) if n_vols * len(slices) != len(dcms): raise QpException( "Could not parse DICOMS - unable to determine fixed number of volumes" ) print("\n") print("%i Volumes" % n_vols) print("Ignored (non-DICOM) files: %i" % len(ignored_files)) print("Slice locations are: " + ", ".join([str(s) for s in slices])) print("RescaleSlope: %f" % rs) print("RescaleIntercept: %f" % ri) print("ScaleSlope: %f" % ss) for sidx, s in enumerate(sorted(slices)): for idx, dcm in enumerate( sorted([d for d in dcms if d.SliceLocation == s], key=lambda x: x.InstanceNumber)): dcm.GlobalIndex = sidx + len(slices) * idx print("Creating NIFTI...") data = np.zeros([ dcm1.image_shape[0], dcm1.image_shape[1], len(slices), int(n_vols) ]) sidx, vidx = 0, 0 for dcm in sorted(dcms, key=lambda x: x.GlobalIndex): data[:, :, sidx, vidx] = (np.squeeze(dcm.pixel_array) * rs + ri) / ss sidx += 1 if sidx == len(slices): sidx = 0 vidx += 1 nii = nib.Nifti1Image(data, dcm_affine) nii.update_header() print("DONE") return nii
print('%s exists!' % filename) # Now we have a dicom we can work with. Let's try loading it with # pydicom, the obvious choice pydicom_dataset = pydicom.dcmread(filename) im0 = pydicom_dataset.pixel_array # prove it's numpy array assert isinstance(im0, np.ndarray), 'I should be a numpy array!' # Let's visually inspect it -- looks good! plt.imshow(im0, cmap='gray') plt.title('Look at me! I\'m a numpy array!') plt.show() # Now let's try it a little different -- use nibabel! nibabel_dataset = wrapper_from_file(filename) im1 = nibabel_dataset.get_pixel_array() # Prove it's a numpy array assert isinstance(im1, np.ndarray), 'I should be a numpy array!' # Again, take a look: plt.imshow(im1, cmap='gray') plt.title('Look at me! I came from a nifti library!') plt.show() # Prove that we get the same data either way: assert np.all(im0 == im1), ( 'I should have loaded the same data both times!')
PatientName = [] DICOMPath = [] EchoTime = [] InPlanePhaseEncodingDirection = [] ImageType = [] MB = [] PhaseEncodingDirectionPositive = [] for d in dirs: #fieldmaps are sometimes stored in the same sequence-this will glob only 1 echo #this gets handled later in the dicom conversion imgs = glob.glob(join(dicom_path, d, 'IM-*[0-9]*.dcm')) imgs.sort() if len(imgs) > 0: dcm = dicom.read_file(imgs[0]) dcm_w = dcmwrappers.wrapper_from_file(imgs[0]) if ('ORIGINAL' in dcm.ImageType) and ('CSA REPORT' not in dcm.ImageType): nimages = nimages + [len(imgs)] ProtocolName = ProtocolName + [dcm.ProtocolName] SeriesNumber = SeriesNumber + [int(dcm.SeriesNumber)] AcquisitionDate = AcquisitionDate + [int(dcm.AcquisitionDate)] AcquisitionTime = AcquisitionTime + [float(dcm.AcquisitionTime)] PatientID = PatientID + [dcm.PatientID] PatientName = PatientName + [dcm.PatientName] DICOMPath = DICOMPath + [join(dicom_path, d)] EchoTime = EchoTime + [float(dcm.EchoTime)] if 'InPlanePhaseEncodingDirection' in dcm.dir(): InPlanePhaseEncodingDirection = InPlanePhaseEncodingDirection + [ dcm.InPlanePhaseEncodingDirection ]