def create_data_yaml(path): if Path(str(output_path / 't22seg_train.yaml')).exists(): return paired_data = list() path = Path(path) / 't2' for t1_file in path.iterdir(): seg_file = str(t1_file).replace('t2', 'seg') t1_image = nii_utils.nii_reader(str(t1_file)) seg_image = nii_utils.nii_reader(str(seg_file)) if t1_image.shape == seg_image.shape: # check dataset paired_data.append({'t2': str(t1_file), 'seg': str(seg_file)}) shuffle(paired_data) yaml_utils.write(str(output_path / 't22seg_train.yaml'), paired_data[:8 * len(paired_data) // 10]) # train 80% yaml_utils.write(str(output_path / 't22seg_test.yaml'), paired_data[8 * len(paired_data) // 10:]) # test 20%
def train(self): optimizer = tf.train.GradientDescentOptimizer(1.0).minimize(self.loss) # We must initialize all variables before we use them. init_op = tf.global_variables_initializer() self.sess.run(init_op) writer = tf.summary.FileWriter( '../tensorboard_logs/{}/{}/{}'.format(self.dataset_name, self.name, self.tag), self.sess.graph) for epoch in range(self.num_epoch): batch_inputs, batch_labels = next(self.train_data_generator) _, loss_val, summary = self.sess.run( [optimizer, self.loss, self.scalar_summary], feed_dict={ self.inputs: batch_inputs, self.labels: batch_labels }) writer.add_summary(summary, epoch) if epoch % self.valid_freq == 0: sim = self.similarity.eval() print('>>第{}次验证:'.format(epoch)) for i in range(self.valid_size): valid_word = self.reverse_dictionary[ self.valid_examples[i]] top_k = 8 # number of nearest neighbors nearest = (-sim[i, :]).argsort()[1:top_k + 1] log_str = '最接近 ' + valid_word + ' 的词语是:' for k in range(top_k): close_word = self.reverse_dictionary[nearest[k]] log_str += str(k + 1) + ':' + close_word + ' ' print(log_str) # save train model self.save(self.checkpoint_dir / 'train', self.saver, epoch) final_embeddings = self.normalized_embeddings.eval() # Write corresponding labels for the embeddings. self.save(self.checkpoint_dir / 'train', self.saver, self.num_epoch) yaml_utils.write(self.checkpoint_dir / 'final_embeddings.yaml', final_embeddings) # Create a configuration for visualizing embeddings with the labels in TensorBoard. config = projector.ProjectorConfig() embedding_conf = config.embeddings.add() embedding_conf.tensor_name = self.embeddings.name embedding_conf.metadata_path = self.dataset_info['dictionary'] projector.visualize_embeddings(writer, config) writer.close()
def build_dataset(words): print('统计字符出现的数量') count = [('UNK', -1)] count.extend(collections.Counter(words).most_common(vocabulary_size - 1)) print('创建词典') dictionary = dict() for word, _ in count: dictionary[word] = len(dictionary) print('根据词典转化原数据成序列') data = list() unk_count = 0 for word in words: index = dictionary.get(word, 0) # dictionary['UNK'] if index == 0: unk_count += 1 data.append(index) count[0] = ('UNK', unk_count) print('制作反向查询词典') reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys())) output_path = output_dir / dataset_name print('导出文件') yaml_utils.write(output_path / 'data.yaml', data) yaml_utils.write(output_path / 'reverse_dictionary.yaml', reverse_dictionary) with (output_path / 'dictionary.tsv').open(mode='w', encoding='UTF-8') as file: for i in range(vocabulary_size): file.write(reverse_dictionary[i] + '\n') info_dict = {'vocabulary_size': vocabulary_size, 'data': str(output_path / 'data.yaml'), 'dictionary': str(output_path / 'dictionary.tsv'), 'reverse_dictionary': str(output_path / 'reverse_dictionary.yaml')} yaml_utils.write(output_path / 'info.yaml', info_dict) print('导出完成')
a = 'T1' b = 'STIR' source = 'E:/Datasets/Neurofibromatosis/source' # /home/yf/datas/NF/ output = 'E:/Datasets/Neurofibromatosis' # E:/Datasets/Neurofibromatosis /home/yf/datas/NF/ o_A = listdir(source + '/' + a) o_B = listdir(source + '/' + b) A = list() B = list() for i in range(len(o_A)): image_A = nii_utils.nii_reader(source + '/' + a + '/' + o_A[i]) image_B = nii_utils.nii_reader(source + '/' + b + '/' + o_B[i]) if image_A.shape[2] == image_B.shape[2]: A.append(source + '/' + a + '/' + o_A[i]) B.append(source + '/' + b + '/' + o_B[i]) train_A, test_A, train_B, test_B = train_test_split(A, B, test_size=0.2, random_state=10) fold = dict() fold['A'] = train_A fold['B'] = train_B yaml_utils.write(output + '/' + a + '2' + b + '_train.yaml', fold) fold['A'] = test_A fold['B'] = test_B yaml_utils.write(output + '/' + a + '2' + b + '_test.yaml', fold)
name = obj.find('name').text if name not in classes.keys() or int(difficult) == 1: continue item = dict() # save object params item['class_id'] = classes[name] bndbox = obj.find('bndbox') xmin = int(bndbox.find('xmin').text) ymin = int(bndbox.find('ymin').text) xmax = int(bndbox.find('xmax').text) ymax = int(bndbox.find('ymax').text) item['bndbox'] = { 'xmin': xmin, 'ymin': ymin, 'xmax': xmax, 'ymax': ymax } cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (0, 0, 255), thickness=2) data['objects'].append(item) if len(data['objects']) < 1: continue # cv2.imshow('image', img) # 展示图片 # cv2.waitKey(1) dataset.append(data) output_dir.mkdir(parents=True, exist_ok=True) np.random.shuffle(dataset) train_steps = int(len(dataset) * train_scale) yaml_utils.write(output_dir / 'train_dataset.yaml', dataset[:train_steps]) yaml_utils.write(output_dir / 'eval_dataset.yaml', dataset[train_steps:]) yaml_utils.write(output_dir / 'classes.yaml', classes)
from pathlib import Path from utils import nii_utils, yaml_utils output_path = Path('E:/Datasets/BraTS_2018') def nii_gz2nii(path): detail = dict() for file in Path(path).iterdir(): item_name = Path(path).stem class_fold = file.stem.replace(item_name, '').replace('.nii', '').replace('_', '') output = output_path / class_fold / (item_name + '.nii') output.parent.mkdir(parents=True, exist_ok=True) image = nii_utils.nii_reader(str(file)) header = nii_utils.nii_header_reader(str(file)) nii_utils.nii_writer(str(output), header, image) detail[class_fold] = image.shape return detail if __name__ == '__main__': source_data = 'E:/SourceDatasets/LGG' _dict = dict() for item in Path(source_data).iterdir(): _dict[item.stem] = nii_gz2nii(str(item)) print('\r>>' + item.stem, end='') yaml_utils.write(str(output_path) + '/detail.yaml', _dict)
except: error['sequence name error'].append(tag_dict['path']) if A in dict_.keys() and B in dict_.keys(): for key, value in dict_.items(): print(output_path + '/' + key + '/' + value['patient_id'] + '.nii') Path(output_path + '/' + key + '/').mkdir(parents=True, exist_ok=True) dcm_utils.write( dcm_utils.read(value['path']), output_path + '/' + key + '/' + value['patient_id'] + '.nii') return dict_ else: one_error = list() for key, value in dict_.items(): one_error.append(key + ':' + value['path']) error['match datas error'].append(one_error) return None if __name__ == '__main__': source_data = 'E:/SourceDatasets/Neurofibromatosis/NF' detail = list() for dataset in Path(source_data).iterdir(): dataset = dataset / 'DICOM' result = dcm2nii(dataset) if result is not None: detail.append(result) yaml_utils.write(output_path + '/info.yaml', detail) yaml_utils.write(output_path + '/error.yaml', error)