def augment_dataset(self, params, source_samples, tagrget_dir, range = None): patch_root = params.PATCHS_ROOT_PATH[source_samples[0]] sample_filename = source_samples[1] train_list = "{}/{}".format(patch_root, sample_filename) Xtrain, Ytrain = read_csv_file(patch_root, train_list) if range is not None: Xtrain = Xtrain[range[0]:range[1]] Ytrain = Ytrain[range[0]:range[1]] target_cancer_path = "{}/{}_cancer".format(patch_root, tagrget_dir) target_normal_path = "{}/{}_noraml".format(patch_root, tagrget_dir) if (not os.path.exists(target_cancer_path)): os.makedirs(target_cancer_path) if (not os.path.exists(target_normal_path)): os.makedirs(target_normal_path) for K, (x, y) in enumerate(zip(Xtrain, Ytrain)): block = Block() block.load_img(x) img = block.get_img() aug_img = self.augment_images(img) * 255 block.set_img(aug_img) block.opcode = self.opcode if y == 0: block.save_img(target_normal_path) else: block.save_img(target_cancer_path) if (0 == K % 1000): print("{} augmenting >>> {}".format(time.asctime(time.localtime()), K))
def load_custom_data_to_memory(self, samples_name): data_list = "{}/{}.txt".format(self._params.PATCHS_ROOT_PATH, samples_name) Xtrain, Ytrain = read_csv_file(self._params.PATCHS_ROOT_PATH, data_list) # train_data = Image_Dataset(Xtrain, Ytrain) img_data = [] for file_name in Xtrain: img = imread(file_name) / 255 img_data.append(img) img_numpy = np.array(img_data).transpose((0, 3, 1, 2)) label_numpy = np.array(Ytrain) train_data = torch.utils.data.TensorDataset(torch.from_numpy(img_numpy).float(), torch.from_numpy(label_numpy).long()) return train_data
def test_show_sample_txt(self): c = Params() c.load_config_file( "D:/CloudSpace/WorkSpace/PatholImage/config/justin2.json") sample_txt = "{}/{}".format(c.PATCHS_ROOT_PATH, "S500_128_False_normal.txt") patch_path = c.PATCHS_ROOT_PATH filenames_list, labels_list = read_csv_file(patch_path, sample_txt) fig = plt.figure(figsize=(8, 10), dpi=100) for index, filename in enumerate(filenames_list): img = imread(filename) pos = index % 20 plt.subplot(4, 5, pos + 1) plt.imshow(img) plt.axis("off") if pos == 19: fig.tight_layout() # 调整整体空白 plt.subplots_adjust(wspace=0, hspace=0) # 调整子图间距 plt.show()
def calculate_hist(self, source_code, source_txt, file_code): def _generate_histogram(filennames): Shape_L = (101, ) # 100 + 1 Shape_A = (256, ) # 127 + 128 + 1 Shape_B = (256, ) hist_l = np.zeros(Shape_L) hist_a = np.zeros(Shape_A) hist_b = np.zeros(Shape_B) for K, file in enumerate(filennames): img = io.imread(file, as_gray=False) lab_img = color.rgb2lab(img) # LAB三通道分离 labO_l = np.array(lab_img[:, :, 0]) labO_a = np.array(lab_img[:, :, 1]) labO_b = np.array(lab_img[:, :, 2]) labO_l = np.rint(labO_l) labO_a = np.rint(labO_a) labO_b = np.rint(labO_b) values, counts = np.unique(labO_l.ravel(), return_counts=True) for value, count in zip(values, counts): hist_l[int(value)] += count values, counts = np.unique(labO_a.ravel(), return_counts=True) for value, count in zip(values, counts): hist_a[int(value) + 128] += count values, counts = np.unique(labO_b.ravel(), return_counts=True) for value, count in zip(values, counts): hist_b[int(value) + 128] += count if (0 == K % 1000): print("{} calculate histogram >>> {}".format( time.asctime(time.localtime()), K)) tag = hist_l > 0 values_l = np.arange(0, 101) hist_l = hist_l[tag] values_l = values_l[tag] tag = hist_a > 0 values_a = np.arange(-128, 128) hist_a = hist_a[tag] values_a = values_a[tag] tag = hist_b > 0 values_b = np.arange(-128, 128) hist_b = hist_b[tag] values_b = values_b[tag] return { "L": (values_l, hist_l), "A": (values_a, hist_a), "B": (values_b, hist_b) } root_path = self._params.PATCHS_ROOT_PATH print("prepare transform function ...", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) source_path = "{}/{}".format(root_path[source_code], source_txt) source_files, _ = read_csv_file(root_path[source_code], source_path) print("Loaded the number of images = ", len(source_files)) hist_sources = _generate_histogram(source_files) project_root = self._params.PROJECT_ROOT np.save("{}/data/{}".format(project_root, file_code), hist_sources) return
def normalize_dataset(self, source_samples, tagrget_dir, range=None, batch_size=20): self.opcode = 19 # normal = ACDNormalization_tf("acd", dc_txt="dc.txt", w_txt="w.txt", template_path="template_normal") normal = ACDNormalization("acd", dc_txt="dc.txt", w_txt="w.txt", template_path="template_normal") patch_root = self._params.PATCHS_ROOT_PATH[source_samples[0]] sample_filename = source_samples[1] train_list = "{}/{}".format(patch_root, sample_filename) Xtrain, Ytrain = read_csv_file(patch_root, train_list) if range is not None: Xtrain = Xtrain[range[0]:range[1]] Ytrain = Ytrain[range[0]:range[1]] # prepare images = [] for patch_file in Xtrain: img = io.imread(patch_file, as_gray=False) # imgBGR = img[:, :, (2, 1, 0)] # images.append(imgBGR) images.append(img) normal.prepare(images) target_cancer_path = "{}/{}_cancer".format(patch_root, tagrget_dir) target_normal_path = "{}/{}_normal".format(patch_root, tagrget_dir) if (not os.path.exists(target_cancer_path)): os.makedirs(target_cancer_path) if (not os.path.exists(target_normal_path)): os.makedirs(target_normal_path) n = 0 batch_images = [] batch_y = [] batch_blocks = [] for K, (x, y) in enumerate(zip(Xtrain, Ytrain)): new_block = Block() new_block.load_img(x) img = np.array(new_block.get_img()) # imgBGR = img[:, :, (2, 1, 0)] # batch_images.append(imgBGR) batch_images.append(img) batch_y.append(y) batch_blocks.append(new_block) n = n + 1 if n >= batch_size: norm_images = normal.normalize_on_batch(batch_images) for block, norm_img, y in zip(batch_blocks, norm_images, batch_y): # block.set_img(255 * norm_img[:, :, (2, 1, 0)]) block.set_img(255 * norm_img) block.opcode = self.opcode if y == 0: block.save_img(target_normal_path) else: block.save_img(target_cancer_path) batch_images = [] batch_y = [] batch_blocks = [] n = 0 if (0 == K % 1000): print("{} normalizing >>> {}".format( time.asctime(time.localtime()), K)) if n > 0: norm_images = normal.normalize_on_batch(batch_images) for block, norm_img, y in zip(batch_blocks, norm_images, batch_y): # block.set_img(255 * norm_img[:, :, (2, 1, 0)]) block.set_img(255 * norm_img) block.opcode = self.opcode if y == 0: block.save_img(target_normal_path) else: block.save_img(target_cancer_path) return
def test_read_csv_file(self): filenames_list, labels_list = util.read_csv_file( "D:/Data/Patches/P1113/", "D:/Data/Patches/P1113/T_NC_256_test.txt") print(len(filenames_list), len(labels_list[0]))
def evaluate_model_based_slice(self, samples_name, batch_size, max_count, slice_count): test_list = "{}/{}".format( self._params.PATCHS_ROOT_PATH[samples_name[0]], samples_name[1]) Xtest, Ytest = read_csv_file( self._params.PATCHS_ROOT_PATH[samples_name[0]], test_list) slice_X = {} slice_Y = {} b = Block() for file_name, true_y in zip(Xtest, Ytest): b.decoding(file_name, 256, 256) if b.slice_number in slice_X.keys(): slice_X[b.slice_number].append(file_name) slice_Y[b.slice_number].append(true_y) else: slice_X[b.slice_number] = [file_name] slice_Y[b.slice_number] = [true_y] result = [] for slice_name in sorted(slice_X.keys()): if "Normal" in slice_name: continue X_data = slice_X[slice_name] Y_data = slice_Y[slice_name] if max_count is not None: X_data, Y_data = X_data[: max_count], Y_data[: max_count] # for debug test_data = Image_Dataset(X_data, Y_data, norm=None) test_loader = Data.DataLoader(dataset=test_data, batch_size=batch_size, shuffle=False, num_workers=self.NUM_WORKERS) data_len = len(test_loader) self.model = self.load_pretrained_model_on_predict() self.construct_shadow_classifier(self.model) self.model.to(self.device) self.model.eval() probability = [] prediction = [] high_dim_features = [] low_dim_features = [] for step, (x, y) in enumerate(test_loader): b_x = Variable(x.to(self.device)) output = self.model(b_x) # model最后不包括一个softmax层 output_softmax = nn.functional.softmax(output, dim=1) probs, preds = torch.max(output_softmax, 1) high_dim_features.extend(self.model.out_feature.cpu().numpy()) low_dim_features.extend(output.detach().cpu().numpy()) probability.extend(probs.detach().cpu().numpy()) prediction.extend(preds.detach().cpu().numpy()) print('predicting => %d / %d ' % (step + 1, data_len)) low_dim_features = np.array(low_dim_features) prediction = np.array(prediction) probability = np.array(probability) if len(prediction) > 6 and np.sum( prediction) > 3: # 至少3个Cancer样本输入 # 对样本进行加权处理 weight = self.correct_sample_weights(low_dim_features, prediction) high_dim_features = np.array(high_dim_features) prediction = np.array(prediction) # 开启弹性调整过程 self.shadow_classifier.train_myself(high_dim_features, prediction, weight, batch_size, 0.1, 10) probability, prediction, low_dim_features = self.shadow_classifier.predict( high_dim_features, batch_size) Ytest = np.array(Y_data) predicted_tags = np.array(prediction) probability = np.array(probability) count = len(Y_data) accu = float(sum(predicted_tags == Ytest)) / count result_str = "{} => accu ={:.4f}, count = {}, mean of prob = {:.6f}".format( slice_name, accu, count, np.mean(probability)) print(result_str) result.append(result_str) if len(result) > slice_count: break # 最后一次性输出全部 for item in result: print(item)
def extract_features_save_to_file(self, samples_name, batch_size): ''' 提取图样本集的特征向量,并存盘 :param samples_name: 样本集的文件列表文件 :param batch_size: 每批的图片数量 :return: 特征向量的存盘文件 ''' train_list = "{}/{}_train.txt".format(self._params.PATCHS_ROOT_PATH, samples_name) test_list = "{}/{}_test.txt".format(self._params.PATCHS_ROOT_PATH, samples_name) Xtrain, Ytrain = read_csv_file(self._params.PATCHS_ROOT_PATH, train_list) train_data = Image_Dataset(Xtrain, Ytrain) Xtest, Ytest = read_csv_file(self._params.PATCHS_ROOT_PATH, test_list) test_data = Image_Dataset(Xtest, Ytest) train_loader = Data.DataLoader(dataset=train_data, batch_size=batch_size, shuffle=False, num_workers=self.NUM_WORKERS) test_loader = Data.DataLoader(dataset=test_data, batch_size=batch_size, shuffle=False, num_workers=self.NUM_WORKERS) model = self.load_pretrained_model() print(model) if self.use_GPU: model.cuda() model.eval() for stage in ["train", "test"]: if stage == "train": data_loader = train_loader else: data_loader = test_loader data_len = data_loader.__len__() features = [] for step, (x, y) in enumerate(data_loader): if self.use_GPU: b_x = Variable(x).cuda() # batch x else: b_x = Variable(x) # batch x output = model(b_x) f = output.cpu().data.numpy() avg_f = np.mean(f, axis=(-2, -1)) # 全局平均池化 features.extend(avg_f) print('extracting features => %d / %d ' % (step + 1, data_len)) if stage == "train": Y = Ytrain else: Y = Ytest save_path = "{}/data/pytorch/{}_{}_{}".format( self._params.PROJECT_ROOT, self.model_name, samples_name, stage) labels = Y[:len(features)] np.savez(save_path + "_features", features, labels)