def _generate_waveform_sequence(self): if self._is_12_lead_ecg: maximum_waveform_sequences = 5 # PS3.3 A.34.3.4.3 Waveform Sequence maximum_waveform_samples = 16384 # PS3.3 A.34.3.4.5 Number of Waveform Samples else: maximum_waveform_sequences = 4 # PS3.3 A.34.4.4.2 Waveform Sequence maximum_waveform_samples = 2 ** 32 - 1 # NumberOfWaveformSamples's VR is `UL`. data_file_len = fileutil.file_size(self._file) pack_size = struct.calcsize(self._format) data_file_total_samples = data_file_len // pack_size saved_samples = maximum_waveform_sequences * maximum_waveform_samples if data_file_total_samples > saved_samples: warn_msg = ( 'File "{}" is too big. File size: {}, pack size: {}, format string: {}, ' "total samples: {}, saved samples: {}, saved size: {}.".format( fileutil.file_name(self._file), data_file_len, pack_size, self._format, data_file_total_samples, saved_samples, saved_samples * pack_size, ) ) if not _frozen: # Fix issue #7 warnings.warn(warn_msg) logger.warn(warn_msg) data_file_total_samples = saved_samples waveform_seq = dicom.sequence.Sequence() data_unpacker = unpacker.unpack_data_from_file(self._file, self._format) target_fmt = "<{}".format("h" * self._channels) adjusted_data = map(lambda v: map(self._adjust_callback, v), data_unpacker) while data_file_total_samples > 0: seq_item = dicom.dataset.Dataset() seq_item.WaveformOriginality = "ORIGINAL" # Type 1 seq_item.NumberOfWaveformChannels = self._channels # Type 1. if self._is_12_lead_ecg: assert 1 <= seq_item.NumberOfWaveformChannels <= 13 # PS3.3 A.34.3.4.4 else: assert 1 <= seq_item.NumberOfWaveformChannels <= 24 # PS3.3 A.34.4.4.3 Number of Waveform Channels if data_file_total_samples >= maximum_waveform_samples: seq_item.NumberOfWaveformSamples = maximum_waveform_samples # Type 1. UL. else: seq_item.NumberOfWaveformSamples = data_file_total_samples # Type 1. UL. data_file_total_samples -= seq_item.NumberOfWaveformSamples assert 200 <= self._sampling_frequency <= 1000 # DICOM PS3.3-2015a A.34.3.4.6 seq_item.SamplingFrequency = "{:d}".format(self._sampling_frequency) # Type 1. DS. seq_item.ChannelDefinitionSequence = self._generate_channel_definition_sequence() # Type 1. seq_item.WaveformBitsAllocated = ( 16 ) # Type 1. PS3.3 C.10.9.1.5 Waveform Bits Allocated and Waveform Sample Interpretation seq_item.WaveformSampleInterpretation = "SS" # Type 1. PS3.3 A.34.3.4.8 Waveform Sample Interpretation # The VR of `Waveform Padding Value` may be OB or OW, so: # seq_item.WaveformPaddingValue = b'\x00\x00' # will not work, instead: seq_item.add_new((0x5400, 0x100A), "OW", b"\x00\x80") # Type 1C. OB or OW. data = bytearray() for i, d in zip(range(seq_item.NumberOfWaveformSamples), adjusted_data): data.extend(struct.pack(target_fmt, *d)) seq_item.add_new((0x5400, 0x1010), "OW", bytes(data)) # WaveformData. Type 1. OB or OW. waveform_seq.append(seq_item) return waveform_seq
if not dest: dest = fileutil.replace_ext(src, ".dcm") data_set = DCMECGDataset( src, "<{}".format("d" * 12), 500, 12, ("I", "II", "III", "aVR", "aVL", "aVF", "V1", "V2", "V3", "V4", "V5", "V6"), adjust_callback=lambda v: int(v * 1000 / 6), ) data_set.save_as(dest) if __name__ == "__main__": u = unpacker.unpack_data_from_file(r"E:\data\1\20110607153002.dat", "<d") print(max(u)) u = unpacker.unpack_data_from_file(r"E:\data\1\20110607153002.dat", "<d") print(min(u)) ecg_to_dcm(r"E:\data\1\20110607153002.dat") ecg_to_dcm(r"d:\20120503152310.dat") # References: # DICOM 2015a PS3.5 7.4 Data Element Type # DICOM 2015a PS3.5 8.3 Waveform Data and Related Data Elements # DICOM 2015a PS3.6 6 Registry of DICOM Data Elements # [DICOM Waveform Generator](http://libir.tmu.edu.tw/bitstream/987654321/21661/1/B09.pdf) # [Mandatory Tags for DICOM Instance](http://stackoverflow.com/questions/6608535/mandatory-tags-for-dicom-instance) # [Questions regarding the DICOM file](http://fixunix.com/dicom/50267-questions-regarding-dicom-file.html) # [Dicom: What's the point of SOPInstanceUID tag?](http://stackoverflow.com/questions/1434918/dicom-whats-the-point-of-sopinstanceuid-tag)
def main(): prog = os.path.splitext(os.path.basename(sys.argv[0]))[0] parser = argparse.ArgumentParser( prog=prog, description='Extract data from binary file.', ) parser.add_argument('-i', '--input-file', required=True, help='Input file path') parser.add_argument('--skip-bytes', type=int, default=0, help='Skip how many bytes at the begin of the file') parser.add_argument('-f', '--format', required=True, help='Format string to describe the data') parser.add_argument('--endian', choices=('little', 'ieee-le', 'le', 'l', 'big', 'ieee-be', 'be', 'b', 'native', 'n'), default='native', help='Endianess of binary data') parser.add_argument('-F', '--factor', help='Factor') parser.add_argument('-o', '--output-file', help='Output file path') parser.add_argument('-d', '--delimiter', help='Column delimiter of output file') parser.add_argument('-O', '--overwrite', action='store_true', help='Overwrite existing file') args = parser.parse_args() if not os.path.isfile(args.input_file): sys.stderr.write('Input file \'{0}\' doesn\'t exist!\n'.format( args.input_file)) sys.exit(1) if args.skip_bytes < 0: sys.stderr.write('Invalid skip bytes: {0}.'.format(args.skip_bytes)) sys.exit(1) # A factor string can be: # 1. A numeric literal: "0.001" # 2. A math expression: "1.0/1000.0" # 3. A math expression with unknown value: "x+300" # For 1 and 2, we can get the value of factor via `parse_factor`. # For 3, we first replace `x` with `1.0`, and then we call `math_eval`. # If it failed, then it means the factor string is not a valid one. # Otherwise, we evaluate it later. if args.factor: factor = parse_factor(args.factor) if factor is None: try: _ = math_eval_v1.math_eval(args.factor, x=1) fixed_factor = False except: sys.stderr.write( 'Invalid factor: \'{0}\' is neither a valid numeric literal nor a math expression.\n' .format(args.factor)) sys.exit(1) else: fixed_factor = True if args.output_file and os.path.isfile( args.output_file) and not args.overwrite: sys.stderr.write('Output file \'{0}\' already exists! ' 'Please use a different file path or specify ' '\'--overwrite\' on the command line.\n'.format( args.output_file)) sys.exit(1) if not args.delimiter: if args.output_file: if is_csv_file(args.output_file): args.delimiter = ',' elif is_tsv_file(args.output_file): args.delimiter = '\t' else: args.delimiter = ' ' else: args.delimiter = ' ' if args.output_file: output_fp = open(args.output_file, 'w', encoding='ascii') else: output_fp = sys.stdout for line in unpacker.unpack_data_from_file(args.input_file, args.format, args.skip_bytes): if not args.factor: output_line = line elif fixed_factor: output_line = (col * factor for col in line) else: output_line = (math_eval_v1.math_eval(args.factor, x=col) for col in line) output_fp.write(args.delimiter.join( (repr(col) for col in output_line))) output_fp.write('\n') if args.output_file: output_fp.close()
def _generate_waveform_sequence(self): if self._is_12_lead_ecg: maximum_waveform_sequences = 5 # PS3.3 A.34.3.4.3 Waveform Sequence maximum_waveform_samples = 16384 # PS3.3 A.34.3.4.5 Number of Waveform Samples else: maximum_waveform_sequences = 4 # PS3.3 A.34.4.4.2 Waveform Sequence maximum_waveform_samples = 2**32 - 1 # NumberOfWaveformSamples's VR is `UL`. data_file_len = fileutil.file_size(self._file) pack_size = struct.calcsize(self._format) data_file_total_samples = data_file_len // pack_size saved_samples = maximum_waveform_sequences * maximum_waveform_samples if data_file_total_samples > saved_samples: warn_msg = 'File "{}" is too big. File size: {}, pack size: {}, format string: {}, ' \ 'total samples: {}, saved samples: {}, saved size: {}.'.format( fileutil.file_name(self._file), data_file_len, pack_size, self._format, data_file_total_samples, saved_samples, saved_samples * pack_size) if not _frozen: # Fix issue #7 warnings.warn(warn_msg) logger.warn(warn_msg) data_file_total_samples = saved_samples waveform_seq = dicom.sequence.Sequence() data_unpacker = unpacker.unpack_data_from_file(self._file, self._format) target_fmt = '<{}'.format('h' * self._channels) adjusted_data = map(lambda v: map(self._adjust_callback, v), data_unpacker) while data_file_total_samples > 0: seq_item = dicom.dataset.Dataset() seq_item.WaveformOriginality = 'ORIGINAL' # Type 1 seq_item.NumberOfWaveformChannels = self._channels # Type 1. if self._is_12_lead_ecg: assert 1 <= seq_item.NumberOfWaveformChannels <= 13 # PS3.3 A.34.3.4.4 else: assert 1 <= seq_item.NumberOfWaveformChannels <= 24 # PS3.3 A.34.4.4.3 Number of Waveform Channels if data_file_total_samples >= maximum_waveform_samples: seq_item.NumberOfWaveformSamples = maximum_waveform_samples # Type 1. UL. else: seq_item.NumberOfWaveformSamples = data_file_total_samples # Type 1. UL. data_file_total_samples -= seq_item.NumberOfWaveformSamples assert 200 <= self._sampling_frequency <= 1000 # DICOM PS3.3-2015a A.34.3.4.6 seq_item.SamplingFrequency = '{:d}'.format( self._sampling_frequency) # Type 1. DS. seq_item.ChannelDefinitionSequence = self._generate_channel_definition_sequence( ) # Type 1. seq_item.WaveformBitsAllocated = 16 # Type 1. PS3.3 C.10.9.1.5 Waveform Bits Allocated and Waveform Sample Interpretation seq_item.WaveformSampleInterpretation = 'SS' # Type 1. PS3.3 A.34.3.4.8 Waveform Sample Interpretation # The VR of `Waveform Padding Value` may be OB or OW, so: #seq_item.WaveformPaddingValue = b'\x00\x00' # will not work, instead: seq_item.add_new((0x5400, 0x100A), 'OW', b'\x00\x80') # Type 1C. OB or OW. data = bytearray() for i, d in zip(range(seq_item.NumberOfWaveformSamples), adjusted_data): data.extend(struct.pack(target_fmt, *d)) seq_item.add_new((0x5400, 0x1010), 'OW', bytes(data)) # WaveformData. Type 1. OB or OW. waveform_seq.append(seq_item) return waveform_seq
# Lead I, II, III, aVR, aVL, aVF, V1, V2, V3, V4, V5, V6, I, II, III, ... # The unit of signals collected by the cardiac conduction is: 0.4V/(2^15). if not dest: dest = fileutil.replace_ext(src, '.dcm') data_set = DCMECGDataset(src, '<{}'.format('d' * 12), 500, 12, ('I', 'II', 'III', 'aVR', 'aVL', 'aVF', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6'), adjust_callback=lambda v: int(v * 1000 / 6)) data_set.save_as(dest) if __name__ == '__main__': u = unpacker.unpack_data_from_file(r'E:\data\1\20110607153002.dat', '<d') print(max(u)) u = unpacker.unpack_data_from_file(r'E:\data\1\20110607153002.dat', '<d') print(min(u)) ecg_to_dcm(r'E:\data\1\20110607153002.dat') ecg_to_dcm(r'd:\20120503152310.dat') # References: # DICOM 2015a PS3.5 7.4 Data Element Type # DICOM 2015a PS3.5 8.3 Waveform Data and Related Data Elements # DICOM 2015a PS3.6 6 Registry of DICOM Data Elements # [DICOM Waveform Generator](http://libir.tmu.edu.tw/bitstream/987654321/21661/1/B09.pdf) # [Mandatory Tags for DICOM Instance](http://stackoverflow.com/questions/6608535/mandatory-tags-for-dicom-instance) # [Questions regarding the DICOM file](http://fixunix.com/dicom/50267-questions-regarding-dicom-file.html) # [Dicom: What's the point of SOPInstanceUID tag?](http://stackoverflow.com/questions/1434918/dicom-whats-the-point-of-sopinstanceuid-tag)
def _generate_waveform_sequence(self): if self._is_12_lead_ecg: maximum_waveform_sequences = 5 # PS3.3 A.34.3.4.3 Waveform Sequence maximum_waveform_samples = 16384 # PS3.3 A.34.3.4.5 Number of Waveform Samples else: maximum_waveform_sequences = 4 # PS3.3 A.34.4.4.2 Waveform Sequence maximum_waveform_samples = 2 ** 32 - 1 # NumberOfWaveformSamples's VR is `UL`. data_file_len = fileutil.file_size(self._file) pack_size = struct.calcsize(self._format) data_file_total_samples = data_file_len // pack_size saved_samples = maximum_waveform_sequences * maximum_waveform_samples if data_file_total_samples > saved_samples: data_file_total_samples = saved_samples self._channel_labels = list(self._channel_labels) channel_cnt = len(self._channel_labels) waveform_seq = dicom.sequence.Sequence() data_unpacker = unpacker.unpack_data_from_file(self._file, self._format) target_fmt = '<{}'.format('h' * channel_cnt) if self._adjust_callback and hasattr(self._adjust_callback, '__call__'): adjusted_data = map(lambda v: map(self._adjust_callback, v), data_unpacker) else: adjusted_data = data_unpacker if self._is_12_lead_ecg: assert 1 <= channel_cnt <= 13 # PS3.3 A.34.3.4.4 else: assert 1 <= channel_cnt <= 24 # PS3.3 A.34.4.4.3 Number of Waveform Channels assert 200 <= self._sampling_frequency <= 1000 # DICOM PS3.3-2015a A.34.3.4.6 while data_file_total_samples > 0: seq_item = dicom.dataset.Dataset() seq_item.WaveformOriginality = 'ORIGINAL' # Type 1 seq_item.NumberOfWaveformChannels = channel_cnt # Type 1. if data_file_total_samples >= maximum_waveform_samples: seq_item.NumberOfWaveformSamples = maximum_waveform_samples # Type 1. UL. else: seq_item.NumberOfWaveformSamples = data_file_total_samples # Type 1. UL. data_file_total_samples -= seq_item.NumberOfWaveformSamples seq_item.SamplingFrequency = '{:d}'.format(self._sampling_frequency) # Type 1. DS. seq_item.ChannelDefinitionSequence = self._generate_channel_definition_sequence() # Type 1. seq_item.WaveformBitsAllocated = 16 # Type 1. PS3.3 C.10.9.1.5 Waveform Bits Allocated and Waveform Sample Interpretation seq_item.WaveformSampleInterpretation = 'SS' # Type 1. PS3.3 A.34.3.4.8 Waveform Sample Interpretation # The VR of `Waveform Padding Value` may be OB or OW, so: #seq_item.WaveformPaddingValue = b'\x00\x00' # will not work, instead: seq_item.add_new((0x5400, 0x100A), 'OW', b'\x00\x80') # Type 1C. OB or OW. data = bytearray() for i, d in zip(range(seq_item.NumberOfWaveformSamples), adjusted_data): data.extend(struct.pack(target_fmt, *d)) seq_item.add_new((0x5400, 0x1010), 'OW', bytes(data)) # WaveformData. Type 1. OB or OW. waveform_seq.append(seq_item) return waveform_seq