forked from vistalab/scitran-data
/
nimsdicom.py
executable file
·338 lines (298 loc) · 19.4 KB
/
nimsdicom.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
#!/usr/bin/env python
#
# @author: Reno Bowen
# Gunnar Schaefer
# Bob Dougherty
import os
import dicom
import logging
import tarfile
import argparse
import datetime
import cStringIO
import numpy as np
import nimspng
import nimsmrdata
import nimsnifti
log = logging.getLogger('nimsdicom')
dicom.config.enforce_valid_values = False
TYPE_ORIGINAL = ['ORIGINAL', 'PRIMARY', 'OTHER']
TYPE_EPI = ['ORIGINAL', 'PRIMARY', 'EPI', 'NONE']
TYPE_SCREEN = ['DERIVED', 'SECONDARY', 'SCREEN SAVE']
# GE-specific tags
TAG_PSD_NAME = (0x0019, 0x109c)
TAG_PSD_INAME = (0x0019, 0x109e)
TAG_PHASE_ENCODE_DIR = (0x0018, 0x1312)
TAG_EPI_EFFECTIVE_ECHO_SPACING = (0x0043, 0x102c)
TAG_PHASE_ENCODE_UNDERSAMPLE = (0x0043, 0x1083)
TAG_SLICES_PER_VOLUME = (0x0021, 0x104f)
TAG_DIFFUSION_DIRS = (0x0019, 0x10e0)
TAG_BVALUE = (0x0043, 0x1039)
TAG_BVEC = [(0x0019, 0x10bb), (0x0019, 0x10bc), (0x0019, 0x10bd)]
TAG_MTOFF_HZ = (0x0043, 0x1034)
# Siemens-specific tags
TAG_IMAGES_IN_MOSAIC = (0x0019, 0x100a)
# Siemens: b-value=(0x0019, 0x100C), all 3 grad dirs=(0x0019, 0x100E), and csa (??) =(0x0029, [0x1010 | 0x1020])
# Philips: b-value=(0x2001, 0x1003), grad dirs=(0x2005, [0x100B0, 0x100B1, 0x100B2])
def getelem(hdr, tag, type_=None, default=None):
try:
value = getattr(hdr, tag) if isinstance(tag, basestring) else hdr[tag].value
if type_ is not None:
value = [type_(x) for x in value] if isinstance(value, list) else type_(value)
except (AttributeError, KeyError, ValueError):
value = default
return value
class NIMSDicomError(nimsmrdata.NIMSMRDataError):
pass
class NIMSDicom(nimsmrdata.NIMSMRData):
filetype = u'dicom'
priority = 0
parse_priority = 9
def __init__(self, dcm_path, metadata_only=True):
self.filepath = dcm_path
try:
if os.path.isfile(self.filepath) and tarfile.is_tarfile(self.filepath):
# compressed tarball
self.compressed = True
with tarfile.open(self.filepath) as archive:
archive.next() # skip over top-level directory
self._hdr = dicom.read_file(cStringIO.StringIO(archive.extractfile(archive.next()).read()), stop_before_pixels=metadata_only)
else:
# directory of dicoms or single file
self.compressed = False
dcm_path = self.filepath if os.path.isfile(self.filepath) else os.path.join(self.filepath, os.listdir(self.filepath)[0])
self._hdr = dicom.read_file(dcm_path, stop_before_pixels=metadata_only)
except Exception as e:
raise NIMSDicomError(str(e))
self.exam_no = getelem(self._hdr, 'StudyID', int)
self.patient_id = getelem(self._hdr, 'PatientID')
super(NIMSDicom, self).__init__()
def acq_date(hdr):
if 'AcquisitionDate' in hdr: return hdr.AcquisitionDate
elif 'StudyDate' in hdr: return hdr.StudyDate
else: return '19000101'
def acq_time(hdr):
if 'AcquisitionTime' in hdr: return hdr.AcquisitionTime
elif 'StudyTime' in hdr: return hdr.StudyTime
else: return '000000'
self.series_no = getelem(self._hdr, 'SeriesNumber', int)
self.acq_no = getelem(self._hdr, 'AcquisitionNumber', int, 0)
self.exam_uid = getelem(self._hdr, 'StudyInstanceUID')
self.series_uid = getelem(self._hdr, 'SeriesInstanceUID')
self.series_desc = getelem(self._hdr, 'SeriesDescription')
self.subj_firstname, self.subj_lastname = self.parse_subject_name(getelem(self._hdr, 'PatientName', None, ''))
self.subj_dob = self.parse_subject_dob(getelem(self._hdr, 'PatientBirthDate', None, ''))
self.subj_sex = {'M': 'male', 'F': 'female'}.get(getelem(self._hdr, 'PatientSex'))
self.psd_name = os.path.basename(getelem(self._hdr, TAG_PSD_NAME, None, 'unknown'))
self.psd_type = nimsmrdata.infer_psd_type(self.psd_name)
self.timestamp = datetime.datetime.strptime(acq_date(self._hdr) + acq_time(self._hdr)[:6], '%Y%m%d%H%M%S')
self.ti = getelem(self._hdr, 'InversionTime', float, 0.) / 1000.0
self.te = getelem(self._hdr, 'EchoTime', float, 0.) / 1000.0
self.tr = getelem(self._hdr, 'RepetitionTime', float, 0.) / 1000.0
self.flip_angle = getelem(self._hdr, 'FlipAngle', float, 0.)
self.pixel_bandwidth = getelem(self._hdr, 'PixelBandwidth', float, 0.)
self.phase_encode = int(getelem(self._hdr, 'InPlanePhaseEncodingDirection', None, '') == 'COL')
self.mt_offset_hz = getelem(self._hdr, TAG_MTOFF_HZ, float, 0.)
self.images_in_mosaic = getelem(self._hdr, TAG_IMAGES_IN_MOSAIC, int, 0)
self.total_num_slices = getelem(self._hdr, 'ImagesInAcquisition', int, 0)
self.num_slices = getelem(self._hdr, TAG_SLICES_PER_VOLUME, int, 1)
self.num_timepoints = getelem(self._hdr, 'NumberOfTemporalPositions', int, self.total_num_slices / self.num_slices)
if self.total_num_slices == self.num_slices:
self.total_num_slices = self.num_slices * self.num_timepoints
self.num_averages = getelem(self._hdr, 'NumberOfAverages', int, 1)
self.num_echos = getelem(self._hdr, 'EchoNumbers', int, 1)
self.receive_coil_name = getelem(self._hdr, 'ReceiveCoilName', None, 'unknown')
self.num_receivers = 0 # FIXME: where is this stored?
self.prescribed_duration = self.tr * self.num_timepoints * self.num_averages # FIXME: probably need more hacks in here to compute the correct duration.
self.duration = self.prescribed_duration # actual duration can only be computed after all data are loaded
self.operator = getelem(self._hdr, 'OperatorsName', None, 'unknown')
self.protocol_name = getelem(self._hdr, 'ProtocolName', None, 'unknown')
self.scanner_name = '%s %s'.strip() % (getelem(self._hdr, 'InstitutionName', None, ''), getelem(self._hdr, 'StationName', None, ''))
self.scanner_type = '%s %s'.strip() % (getelem(self._hdr, 'Manufacturer', None, ''), getelem(self._hdr, 'ManufacturerModelName', None, ''))
self.acquisition_type = getelem(self._hdr, 'MRAcquisitionType', None, 'unknown')
self.mm_per_vox = getelem(self._hdr, 'PixelSpacing', float, [1., 1.]) + [getelem(self._hdr, 'SpacingBetweenSlices', float, getelem(self._hdr, 'SliceThickness', float, 1.))]
# FIXME: confirm that DICOM (Columns,Rows) = PFile (X,Y)
self.size = [getelem(self._hdr, 'Columns', int, 0), getelem(self._hdr, 'Rows', int, 0)]
self.fov = 2 * [getelem(self._hdr, 'ReconstructionDiameter', float, 0.)]
# Dicom convention is ROW,COL. E.g., ROW is the first dim (index==0), COL is the second (index==1)
if self.phase_encode == 1:
# The Acquisition matrix field includes four values: [freq rows, freq columns, phase rows, phase columns].
# E.g., for a 64x64 image, it would be [64,0,0,64] if the image row axis was the frequency encoding axis or
# [0,64,64,0] if the image row was the phase encoding axis.
self.acquisition_matrix = getelem(self._hdr, 'AcquisitionMatrix', None, [0, 0, 0, 0])[0:4:3]
self.fov[1] /= (getelem(self._hdr, 'PercentPhaseFieldOfView', float, 0.) / 100.) if 'PercentPhaseFieldOfView' in self._hdr else 1.
else:
# We want the acq matrix to always be ROWS,COLS, so we flip the order for the case where the phase encode is the first dim:
self.acquisition_matrix = getelem(self._hdr, 'AcquisitionMatrix', None, [0, 0, 0, 0])[2:0:-1]
self.fov[0] /= (getelem(self._hdr, 'PercentPhaseFieldOfView', float, 0.) / 100.) if 'PercentPhaseFieldOfView' in self._hdr else 1.
r = getelem(self._hdr, TAG_PHASE_ENCODE_UNDERSAMPLE, None, [1., 1.])
self.phase_encode_undersample, self.slice_encode_undersample = [float(x) for x in (r.split('\\') if isinstance(r, basestring) else r)]
self.num_bands = 1 # assume that dicoms are never multiband
self.qto_xyz = None
self.image_type = getelem(self._hdr, 'ImageType', None, [])
self.effective_echo_spacing = getelem(self._hdr, TAG_EPI_EFFECTIVE_ECHO_SPACING, float, 0.) / 1e6
self.phase_encode_direction = None; # FINDME: 'pepolar'-- stored in bit 4 of rec.dacq_ctrl in pfiles. Probably in a private tag in DICOM.
self.is_dwi = bool(self.image_type == TYPE_ORIGINAL and getelem(self._hdr, TAG_DIFFUSION_DIRS, int, 0) >= 6)
self.bvals = None
self.bvecs = None
self.slice_order = None
self.slice_duration = None
self.reverse_slice_order = None
self.notes = ''
self.scan_type = self.infer_scan_type()
self.dcm_list = None
def write_anonymized_file(self, filepath):
self._hdr.PatientName = ''
self._hdr.PatientBirthDate = self._hdr.PatientBirthDate[:6] + '15' if self._hdr.PatientBirthDate else ''
self._hdr.save_as(filepath)
def get_imagedata(self):
if self.dcm_list == None:
self.load_all_metadata()
slice_loc = [getelem(dcm, 'SliceLocation') for dcm in self.dcm_list]
imagedata = np.dstack([np.swapaxes(dcm.pixel_array, 0, 1) for dcm in self.dcm_list])
dims = np.array((self.size[1], self.size[0], self.num_slices, self.num_timepoints))
slices_total = len(self.dcm_list)
# If we can figure the dimensions out, reshape the matrix
if np.prod(dims) == np.size(imagedata):
imagedata = imagedata.reshape(dims, order='F')
else:
log.debug('dimensions inconsistent with size, attempting to construct volume')
# round up slices to nearest multiple of self.num_slices
slices_total_rounded_up = ((slices_total + self.num_slices - 1) / self.num_slices) * self.num_slices
slices_padding = slices_total_rounded_up - slices_total
if slices_padding: #LOOK AT THIS MORE CLOSELY TODO
msg = 'dimensions indicate missing slices from volume - zero padding with %d slices' % slices_padding
self.notes += 'WARNING: ' + msg + '\n'
log.warning(msg)
padding = np.zeros((self.size[1], self.size[0], slices_padding))
imagedata = np.dstack([imagedata, padding])
volume_start_indices = range(0, slices_total_rounded_up, self.num_slices)
imagedata = np.concatenate([imagedata[:,:,index:(index + self.num_slices),np.newaxis] for index in volume_start_indices], axis=3)
# Check for multi-echo data where duplicate slices might be interleaved
# TODO: we only handle the 4d case here, but this could in theory happen with others.
# TODO: it's inefficient to reshape the array above and *then* check to see if
# that shape is wrong. The reshape op is expensive, and to fix the shape requires
# an expensive loop and a copy of the data, which doubles memory usage. Instead, try
# to do the de-interleaving up front in the beginning.
if self.num_timepoints>1 and slice_loc[0::self.num_timepoints]==slice_loc[1::self.num_timepoints] and imagedata.ndim==4:
# If a scan was aborted, the number of volumes might be less than the target number of
# volumes (self.num_timepoints). We'll zero-pad in that case.
if imagedata.shape[3] < self.num_timepoints:
pad_vols = self.num_timepoints - imagedata.shape[3]
msg = 'dimensions indicate missing data - zero padding with %d volumes' % pad_vols
self.notes += 'WARNING: ' + msg + '\n'
log.warning(msg)
imagedata = np.append(imagedata, np.zeros(imagedata.shape[0:3]+(pad_vols,), dtype=imagedata.dtype), axis=3)
nvols = np.prod(imagedata.shape[2:4])
tmp = imagedata.copy().reshape([imagedata.shape[0], imagedata.shape[1], nvols], order='F')
for vol_num in range(self.num_timepoints):
imagedata[:,:,:,vol_num] = tmp[:,:,vol_num::self.num_timepoints]
if self.reverse_slice_order:
log.debug('flipping slice order')
imagedata = imagedata[:,:,::-1,:]
return imagedata
def load_all_metadata(self):
# TODO: make this less expensive. We can probably get away with a much more selective
# loading of dicoms. E.g, maybe just the first volume and then the first slice of each subsequent vol?
if self.dcm_list == None:
self.load_dicoms()
if self.is_dwi:
self.bvals = np.array([getelem(dcm, TAG_BVALUE, float)[0] for dcm in self.dcm_list[0::self.num_slices]])
self.bvecs = np.array([[getelem(dcm, TAG_BVEC[i], float) for i in range(3)] for dcm in self.dcm_list[0::self.num_slices]]).transpose()
# Try to carry on on incomplete datasets. Also, some weird scans like MRVs don't set the
# number of slices correctly in the dicom header. (Or at least they set it in a weird way
# that we don't understand.)
if len(self.dcm_list) < self.num_slices:
self.num_slices = len(self.dcm_list)
if len(self.dcm_list) < self.total_num_slices:
self.total_num_slices = len(self.dcm_list)
image_position = [tuple(getelem(dcm, 'ImagePositionPatient', float, [0., 0., 0.])) for dcm in self.dcm_list]
if self.num_timepoints == 1:
unique_slice_pos = np.unique(image_position).astype(np.float)
# crude check for a 3-plane localizer. When we get one of these, we actually
# want each plane to be a different time point.
slice_distance = np.sqrt((np.diff(unique_slice_pos,axis=0)**2).sum(1))
# ugly hack! The number of time points is the number of big (>10mm) jumps in slice-to-slice distance.
self.num_timepoints = np.sum((slice_distance - np.median(slice_distance)) > 10) + 1
self.num_slices = self.total_num_slices / self.num_timepoints
cosines = getelem(self._hdr, 'ImageOrientationPatient', float, 6 * [np.nan])
row_cosines = np.array(cosines[0:3])
col_cosines = np.array(cosines[3:6])
# Compute the slice_norm. From the NIFTI-1 header:
# The third column of R will be either the cross-product of the first 2 columns or
# its negative. It is possible to infer the sign of the 3rd column by examining
# the coordinates in DICOM attribute (0020,0032) "Image Position (Patient)" for
# successive slices. However, this method occasionally fails for reasons that I
# (RW Cox) do not understand.
# For Siemens data, it seems that looking at 'SliceNormalVector' can help resolve this.
# dicom_slice_norm = getelem(self._hdr, 'SliceNormalVector', float, None)
# if dicom_slice_norm != None and np.dot(self.slice_norm, dicom_slice_norm) < 0.:
# self.slice_norm = -self.slice_norm
# But otherwise, we'll have to fix this up after we load all the dicoms and check
# the slice-to-slice position deltas.
slice_norm = np.cross(row_cosines, col_cosines)
# FIXME: the following could fail if the acquisition was before a full volume was aquired.
if np.dot(slice_norm, image_position[0]) > np.dot(slice_norm, image_position[self.num_slices-1]):
log.debug('flipping slice order')
#slice_norm = -slice_norm
self.reverse_slice_order = True
self.origin = image_position[self.num_slices-1] * np.array([-1, -1, 1])
else:
self.origin = image_position[0] * np.array([-1, -1, 1])
self.slice_order = nimsmrdata.SLICE_ORDER_UNKNOWN
if self.total_num_slices >= self.num_slices and getelem(self.dcm_list[0], 'TriggerTime', float) is not None:
trigger_times = np.array([getelem(dcm, 'TriggerTime', float) for dcm in self.dcm_list[0:self.num_slices]])
if self.reverse_slice_order:
# the slice order will be flipped when the image is saved, so flip the trigger times
trigger_times = trigger_times[::-1]
trigger_times_from_first_slice = trigger_times[0] - trigger_times
if self.num_slices > 2:
self.slice_duration = float(min(abs(trigger_times_from_first_slice[1:]))) / 1000. # msec to sec
if trigger_times_from_first_slice[1] < 0:
# slice 1 happened after slice 0, so this must be either SEQ_INC or ALT_INC
self.slice_order = nimsmrdata.SLICE_ORDER_SEQ_INC if trigger_times[2] > trigger_times[1] else nimsmrdata.SLICE_ORDER_ALT_INC
else:
# slice 1 before slice 0, so must be ALT_DEC or SEQ_DEC
self.slice_order = nimsmrdata.SLICE_ORDER_ALT_DEC if trigger_times[2] > trigger_times[1] else nimsmrdata.SLICE_ORDER_SEQ_DEC
else:
self.slice_duration = trigger_times[0]
self.slice_order = nimsmrdata.SLICE_ORDER_SEQ_INC
rot = nimsmrdata.compute_rotation(row_cosines, col_cosines, slice_norm)
if self.is_dwi:
self.bvecs,self.bvals = nimsmrdata.adjust_bvecs(self.bvecs, self.bvals, self.scanner_type, rot)
self.qto_xyz = nimsmrdata.build_affine(rot, self.mm_per_vox, self.origin)
super(NIMSDicom, self).load_all_metadata()
def load_dicoms(self):
if os.path.isfile(self.filepath) and tarfile.is_tarfile(self.filepath): # compressed tarball
with tarfile.open(self.filepath) as archive:
self.dcm_list = [dicom.read_file(cStringIO.StringIO(archive.extractfile(ti).read())) for ti in archive if ti.isreg()]
elif os.path.isfile(self.filepath): # single file
self.dcm_list = [dicom.read_file(self.filepath)]
else: # directory of dicoms
self.dcm_list = [dicom.read_file(os.path.join(self.filepath, f)) for f in os.listdir(self.filepath)]
self.dcm_list.sort(key=lambda dcm: dcm.InstanceNumber)
def convert(self, outbase, *args, **kwargs):
if not self.image_type:
log.warning('dicom conversion failed for %s: ImageType not set in dicom header' % os.path.basename(outbase))
return
result = (None, None)
if self.image_type == TYPE_SCREEN:
self.load_dicoms()
for i, dcm in enumerate(self.dcm_list):
result = ('bitmap', nimspng.NIMSPNG.write(self, dcm.pixel_array, outbase + '_%d' % (i+1)))
elif 'PRIMARY' in self.image_type:
imagedata = self.get_imagedata()
result = ('nifti', nimsnifti.NIMSNifti.write(self, imagedata, outbase, self.notes))
if result[0] is None:
log.warning('dicom conversion failed for %s: no applicable conversion defined' % os.path.basename(outbase))
return result
class ArgumentParser(argparse.ArgumentParser):
def __init__(self):
super(ArgumentParser, self).__init__()
self.description = """Convert a directory of dicom images to a NIfTI or bitmap."""
self.add_argument('dcm_dir', help='directory of dicoms to convert')
self.add_argument('outbase', nargs='?', help='basename for output files (default: dcm_dir)')
if __name__ == '__main__':
args = ArgumentParser().parse_args()
logging.basicConfig(level=logging.DEBUG)
NIMSDicom(args.dcm_dir).convert(args.outbase or os.path.basename(args.dcm_dir.rstrip('/')))