def write_shard_records(filename, excel_path, dicom_dir, split_name): ds_pytorch = ecg_to_echo_dataset.ECGToEchoDataset(excel_path, dicom_dir, split_name, None) dicom_files = ds_pytorch.annotations_df['file name'] labels = ds_pytorch.annotations_df['label'] index = 0 n_ecg_shard = 300 n_shards = len(dicom_files) // 300 # tqdm is an amazing package that if you don't know yet you must check it for shard in tqdm.tqdm(range(n_shards)): # The original tfrecords_path is "{}_{}_{}.records" so the first parameter is the name of the dataset, # the second is "train" or "val" or "test" and the last one the pattern. tfrecords_shard_path = "{}_{}_{}.record".format( filename, "test", '%.5d-of-%.5d' % (shard, n_shards - 1)) end = index + n_ecg_shard if len(dicom_files) > (index + n_ecg_shard) else -1 ecg_shard_list = dicom_files[index:end] labeld_shard_list = labels[index:end] print(index, end) with tf.io.TFRecordWriter(os.path.join( 'data', tfrecords_shard_path)) as writer: for dicom_file, label in zip(ecg_shard_list, labeld_shard_list): print(dicom_file, label) dicom_path = os.path.join(dicom_dir, f"{dicom_file}") patient_obj = patient.Patient(patient_dicom_path=dicom_path) example = serialize_example(patient_obj.filtered_signals, label) writer.write(example) index = end
def __getitem__(self, idx): if torch.is_tensor(idx): idx = idx.tolist() dicom_path = os.path.join(self.dicom_dir, f"{self.annotations_df['file name'].iloc[idx]}") patient_obj = patient.Patient(patient_dicom_path=dicom_path) if not self.threshold_35: echo_result = self.annotations_df['label'].iloc[idx] else: label_str = self.annotations_df['cognos_reserved_text13'].iloc[idx] if label_str in metadata.HEALTHY_35: echo_result = 1 else: if label_str not in metadata.SICK_35: raise AssertionError(f"{label_str}") echo_result = 0 sample = {# 'ecg_signal_unfiltered': patient_obj.unfiltered_signals, # 'ecg_signal_filtered': patient_obj.filtered_signals, 'ecg_signal_filtered': patient_obj.sub_sampled_ecg, 'echo': echo_result, 'dicom_file': self.annotations_df['file name'].iloc[idx]} # print(sample['ecg_signal_filtered'].shape) if self.transform: sample = self.transform(sample) return sample
def export_data_to_csv(dicom_dir): print("Starting job..") dict_info = defaultdict(list) for f_name in os.listdir(dicom_dir): # print(f_name) if f_name.endswith('.dcm'): file_path = os.path.join(dicom_dir, f_name) print("Processing file {}...".format(f_name)) p = patient.Patient(file_path) file_name = f_name first_name = p.first_name last_name = p.last_name patient_id = p.id date = " ".join(p.date.split()[:-1]) time = p.date.split()[-1] print( "file name: {}\nfirst name: {}\nlast name: {}\npatient id: {}\ndate: {}\ntime: {}\n" .format(file_name, first_name, last_name, patient_id, date, time)) dict_info['file name'].append(file_name) dict_info['first name'].append(first_name) dict_info['last name'].append(last_name) dict_info['patient id'].append(patient_id) dict_info['date'].append(date) dict_info['time'].append(time) df = pd.DataFrame(dict_info, columns=[ 'file name', 'first name', 'last name', 'patient id', 'date', 'time' ]) df.to_csv('data_info.csv', index=False) print("Export completed...")
def export_excel_with_new_dicoms(minimum_date, dicom_dir='.'): """Export an excel with lists of all dicom files. The excel holds names of dicom files with thier ID numbers and date. :param minimum_date: :param dicom_dir: :return: """ dcm_files = [f for f in os.listdir(dicom_dir) if f.endswith('.dcm')] logging.info("Found %d dcm files...", len(dcm_files)) dicom_paths = [] ecg_dates = [] patient_ids = [] for f in dcm_files: dicom_path = os.path.join(dicom_dir, f) patient_obj = patient.Patient(patient_dicom_path=dicom_path) ecg_date = patient_obj.date date_time_obj = datetime.datetime.strptime(ecg_date, '%d %b %Y %H:%M') if date_time_obj > minimum_date: logging.info("Found new ECG. DICOM: %s. ID: %s. Date: %s", f, patient_obj.id, date_time_obj) dicom_paths.append(dicom_path) ecg_dates.append(date_time_obj) patient_ids.append(patient_obj.id) df = pd.DataFrame(list(zip(dicom_paths, ecg_dates, patient_ids)), columns=['DICOM_PATH', 'ECG_DATE', 'PATIENT_ID']) df.to_csv("new_ecgs.csv")
def create_tf_records(filename, excel_path, dicom_dir, split_name): with tf.io.TFRecordWriter(filename) as writer: ds_pytorch = ecg_to_echo_dataset.ECGToEchoDataset( excel_path, dicom_dir, split_name, None) dicom_files = ds_pytorch.annotations_df['file name'] labels = ds_pytorch.annotations_df['label'] for dicom_file, label in zip(dicom_files, labels): print(dicom_file, label) dicom_path = os.path.join(dicom_dir, f"{dicom_file}") patient_obj = patient.Patient(patient_dicom_path=dicom_path) example = serialize_example(patient_obj.filtered_signals, label) writer.write(example)
def __getitem__(self, idx): if torch.is_tensor(idx): idx = idx.tolist() dicom_path = os.path.join( self.root_dir, f"{self.ecg_echo_df['file name'].iloc[idx]}") patient_obj = patient.Patient(patient_dicom_path=dicom_path) echo_result = self.ecg_echo_df['label'].iloc[idx] sample = { 'ecg_signal': patient_obj.unfiltered_signals, 'echo': echo_result } if self.transform: sample = self.transform(sample) return sample
def create_tf_dataset(excel_path, dicom_dir, split_name): def _parse_dicom(dicom_file, label): filtered_signals = tf.numpy_function(parse_dicom, inp=[dicom_file, dicom_dir], Tout=tf.float32) # dicom_path = os.path.join(dicom_dir, f"{dicom_file}") # patient_obj = patient.Patient(patient_dicom_path=dicom_path) return filtered_signals, label ds_pytorch = ecg_to_echo_dataset.ECGToEchoDataset(excel_path, dicom_dir, split_name, None) dicom_files = ds_pytorch.annotations_df['file name'] labels = ds_pytorch.annotations_df['label'] signals = [] for dicom_file in dicom_files: dicom_path = os.path.join(dicom_dir, f"{dicom_file}") patient_obj = patient.Patient(patient_dicom_path=dicom_path) signals.append(patient_obj.filtered_signals) tf_ds = tf.data.Dataset.from_tensor_slices((signals, labels)) return tf_ds
def __getitem__(self, idx): if torch.is_tensor(idx): idx = idx.tolist() dicom_path = os.path.join( metadata.DICOM_DIR, f"{self.test_set_df['file name'].iloc[idx]}") patient_obj = patient.Patient(patient_dicom_path=dicom_path) ecg_number = self.test_set_df['ecg_number'].iloc[idx] age = patient_obj.age number_of_samples = patient_obj.number_of_samples sampling_rate = patient_obj.sampling_frequency gender = patient_obj.gender duration = patient_obj.decoded_dcom_obj.duration if not self.threshold_35: echo_result = self.test_set_df['label'].iloc[idx] else: label_str = self.test_set_df['cognos_reserved_text13'].iloc[idx] if label_str in metadata.HEALTHY_35: echo_result = 1 else: if label_str not in metadata.SICK_35: raise AssertionError(f"{label_str}") echo_result = 0 sample = { # 'ecg_signal_unfiltered': patient_obj.unfiltered_signals, # 'ecg_signal_filtered': patient_obj.filtered_signals, 'ecg_signal_filtered': patient_obj.sub_sampled_ecg, 'echo': echo_result, 'ecg_number': ecg_number, 'dicom_file': self.test_set_df['file name'].iloc[idx], 'age': age, 'number_of_samples': number_of_samples, 'sampling_rate': sampling_rate, 'gender': gender, 'duration': duration } if self.transform: sample = self.transform(sample) return sample
def parse_dicom(dicom_file, dicom_dir): dicom_path = os.path.join(dicom_dir, f"{dicom_file}") patient_obj = patient.Patient(patient_dicom_path=dicom_path) return patient_obj.filtered_signals
# # print(e.channels_no) # print(e.sampling_frequency) # print(e.samples) # print(e.duration) # # interp = e.interpretation() # print(interp) # inf = e.print_info(interp) # print(inf) # decoded_dcom = decode_dcom_files.DecodeDCOM(dicom_file_path) # unfiltered_signals = decoded_dcom.unfiltered_signlas # filtererd_signals = decoded_dcom.filtered_signals # plt.figure() # plt.plot(unfiltered_signals[0][500:6000]) # plt.title("Unfiltered signals") # plt.figure() # plt.plot(filtererd_signals[0][500:6000]) # plt.title("Filtered signals") # plt.show() # print(decoded_dcom.channel_definitions) p = patient.Patient(dicom_file_path) p.print_info() sigs = p.get_signals() print("Signals shape: ", sigs.shape) plt.plot(sigs[0][100:800]) plt.show()
def parse_sampling_rate(row, dicom_dir): dicom_path = os.path.join(dicom_dir, f"{row['file name']}") patient_obj = patient.Patient(patient_dicom_path=dicom_path) return patient_obj.sampling_frequency
def parse_num_samples(row, dicom_dir): dicom_path = os.path.join(dicom_dir, f"{row['file name']}") patient_obj = patient.Patient(patient_dicom_path=dicom_path) return patient_obj.filtered_signals.shape[1]