def augment_dataset(self, params, source_samples, tagrget_dir, range = None):
        patch_root = params.PATCHS_ROOT_PATH[source_samples[0]]
        sample_filename = source_samples[1]
        train_list = "{}/{}".format(patch_root, sample_filename)

        Xtrain, Ytrain = read_csv_file(patch_root, train_list)
        if range is not None:
            Xtrain = Xtrain[range[0]:range[1]]
            Ytrain = Ytrain[range[0]:range[1]]

        target_cancer_path = "{}/{}_cancer".format(patch_root, tagrget_dir)
        target_normal_path = "{}/{}_noraml".format(patch_root, tagrget_dir)

        if (not os.path.exists(target_cancer_path)):
            os.makedirs(target_cancer_path)
        if (not os.path.exists(target_normal_path)):
            os.makedirs(target_normal_path)

        for K, (x, y) in enumerate(zip(Xtrain, Ytrain)):
            block = Block()
            block.load_img(x)
            img = block.get_img()

            aug_img = self.augment_images(img) * 255
            block.set_img(aug_img)
            block.opcode = self.opcode

            if y == 0:
                block.save_img(target_normal_path)
            else:
                block.save_img(target_cancer_path)

            if (0 == K % 1000):
                print("{} augmenting >>> {}".format(time.asctime(time.localtime()), K))
예제 #2
0
    def load_custom_data_to_memory(self, samples_name):
        data_list = "{}/{}.txt".format(self._params.PATCHS_ROOT_PATH, samples_name)
        Xtrain, Ytrain = read_csv_file(self._params.PATCHS_ROOT_PATH, data_list)
        # train_data = Image_Dataset(Xtrain, Ytrain)
        img_data = []
        for file_name in Xtrain:
            img = imread(file_name) / 255
            img_data.append(img)

        img_numpy = np.array(img_data).transpose((0, 3, 1, 2))
        label_numpy = np.array(Ytrain)
        train_data = torch.utils.data.TensorDataset(torch.from_numpy(img_numpy).float(),
                                                    torch.from_numpy(label_numpy).long())
        return train_data
예제 #3
0
    def test_show_sample_txt(self):
        c = Params()
        c.load_config_file(
            "D:/CloudSpace/WorkSpace/PatholImage/config/justin2.json")
        sample_txt = "{}/{}".format(c.PATCHS_ROOT_PATH,
                                    "S500_128_False_normal.txt")
        patch_path = c.PATCHS_ROOT_PATH

        filenames_list, labels_list = read_csv_file(patch_path, sample_txt)

        fig = plt.figure(figsize=(8, 10), dpi=100)
        for index, filename in enumerate(filenames_list):
            img = imread(filename)
            pos = index % 20
            plt.subplot(4, 5, pos + 1)
            plt.imshow(img)
            plt.axis("off")

            if pos == 19:
                fig.tight_layout()  # 调整整体空白
                plt.subplots_adjust(wspace=0, hspace=0)  # 调整子图间距
                plt.show()
    def calculate_hist(self, source_code, source_txt, file_code):
        def _generate_histogram(filennames):
            Shape_L = (101, )  # 100 + 1
            Shape_A = (256, )  # 127 + 128 + 1
            Shape_B = (256, )

            hist_l = np.zeros(Shape_L)
            hist_a = np.zeros(Shape_A)
            hist_b = np.zeros(Shape_B)
            for K, file in enumerate(filennames):
                img = io.imread(file, as_gray=False)
                lab_img = color.rgb2lab(img)

                # LAB三通道分离
                labO_l = np.array(lab_img[:, :, 0])
                labO_a = np.array(lab_img[:, :, 1])
                labO_b = np.array(lab_img[:, :, 2])

                labO_l = np.rint(labO_l)
                labO_a = np.rint(labO_a)
                labO_b = np.rint(labO_b)

                values, counts = np.unique(labO_l.ravel(), return_counts=True)
                for value, count in zip(values, counts):
                    hist_l[int(value)] += count

                values, counts = np.unique(labO_a.ravel(), return_counts=True)
                for value, count in zip(values, counts):
                    hist_a[int(value) + 128] += count

                values, counts = np.unique(labO_b.ravel(), return_counts=True)
                for value, count in zip(values, counts):
                    hist_b[int(value) + 128] += count

                if (0 == K % 1000):
                    print("{} calculate histogram >>> {}".format(
                        time.asctime(time.localtime()), K))

            tag = hist_l > 0
            values_l = np.arange(0, 101)
            hist_l = hist_l[tag]
            values_l = values_l[tag]

            tag = hist_a > 0
            values_a = np.arange(-128, 128)
            hist_a = hist_a[tag]
            values_a = values_a[tag]

            tag = hist_b > 0
            values_b = np.arange(-128, 128)
            hist_b = hist_b[tag]
            values_b = values_b[tag]

            return {
                "L": (values_l, hist_l),
                "A": (values_a, hist_a),
                "B": (values_b, hist_b)
            }

        root_path = self._params.PATCHS_ROOT_PATH
        print("prepare transform function ...",
              time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
        source_path = "{}/{}".format(root_path[source_code], source_txt)
        source_files, _ = read_csv_file(root_path[source_code], source_path)
        print("Loaded the number of images = ", len(source_files))
        hist_sources = _generate_histogram(source_files)

        project_root = self._params.PROJECT_ROOT
        np.save("{}/data/{}".format(project_root, file_code), hist_sources)
        return
    def normalize_dataset(self,
                          source_samples,
                          tagrget_dir,
                          range=None,
                          batch_size=20):
        self.opcode = 19
        # normal = ACDNormalization_tf("acd", dc_txt="dc.txt", w_txt="w.txt", template_path="template_normal")
        normal = ACDNormalization("acd",
                                  dc_txt="dc.txt",
                                  w_txt="w.txt",
                                  template_path="template_normal")

        patch_root = self._params.PATCHS_ROOT_PATH[source_samples[0]]
        sample_filename = source_samples[1]
        train_list = "{}/{}".format(patch_root, sample_filename)

        Xtrain, Ytrain = read_csv_file(patch_root, train_list)
        if range is not None:
            Xtrain = Xtrain[range[0]:range[1]]
            Ytrain = Ytrain[range[0]:range[1]]

        # prepare
        images = []
        for patch_file in Xtrain:
            img = io.imread(patch_file, as_gray=False)
            # imgBGR = img[:, :, (2, 1, 0)]
            # images.append(imgBGR)
            images.append(img)

        normal.prepare(images)

        target_cancer_path = "{}/{}_cancer".format(patch_root, tagrget_dir)
        target_normal_path = "{}/{}_normal".format(patch_root, tagrget_dir)

        if (not os.path.exists(target_cancer_path)):
            os.makedirs(target_cancer_path)
        if (not os.path.exists(target_normal_path)):
            os.makedirs(target_normal_path)

        n = 0
        batch_images = []
        batch_y = []
        batch_blocks = []
        for K, (x, y) in enumerate(zip(Xtrain, Ytrain)):
            new_block = Block()
            new_block.load_img(x)
            img = np.array(new_block.get_img())
            # imgBGR = img[:, :, (2, 1, 0)]
            # batch_images.append(imgBGR)
            batch_images.append(img)
            batch_y.append(y)
            batch_blocks.append(new_block)
            n = n + 1

            if n >= batch_size:
                norm_images = normal.normalize_on_batch(batch_images)

                for block, norm_img, y in zip(batch_blocks, norm_images,
                                              batch_y):
                    # block.set_img(255 * norm_img[:, :, (2, 1, 0)])
                    block.set_img(255 * norm_img)
                    block.opcode = self.opcode

                    if y == 0:
                        block.save_img(target_normal_path)
                    else:
                        block.save_img(target_cancer_path)

                batch_images = []
                batch_y = []
                batch_blocks = []
                n = 0

            if (0 == K % 1000):
                print("{} normalizing >>> {}".format(
                    time.asctime(time.localtime()), K))

        if n > 0:
            norm_images = normal.normalize_on_batch(batch_images)
            for block, norm_img, y in zip(batch_blocks, norm_images, batch_y):
                # block.set_img(255 * norm_img[:, :, (2, 1, 0)])
                block.set_img(255 * norm_img)
                block.opcode = self.opcode

                if y == 0:
                    block.save_img(target_normal_path)
                else:
                    block.save_img(target_cancer_path)

        return
예제 #6
0
 def test_read_csv_file(self):
     filenames_list, labels_list = util.read_csv_file(
         "D:/Data/Patches/P1113/",
         "D:/Data/Patches/P1113/T_NC_256_test.txt")
     print(len(filenames_list), len(labels_list[0]))
예제 #7
0
    def evaluate_model_based_slice(self, samples_name, batch_size, max_count,
                                   slice_count):
        test_list = "{}/{}".format(
            self._params.PATCHS_ROOT_PATH[samples_name[0]], samples_name[1])
        Xtest, Ytest = read_csv_file(
            self._params.PATCHS_ROOT_PATH[samples_name[0]], test_list)

        slice_X = {}
        slice_Y = {}
        b = Block()
        for file_name, true_y in zip(Xtest, Ytest):
            b.decoding(file_name, 256, 256)
            if b.slice_number in slice_X.keys():
                slice_X[b.slice_number].append(file_name)
                slice_Y[b.slice_number].append(true_y)
            else:
                slice_X[b.slice_number] = [file_name]
                slice_Y[b.slice_number] = [true_y]

        result = []
        for slice_name in sorted(slice_X.keys()):
            if "Normal" in slice_name:
                continue

            X_data = slice_X[slice_name]
            Y_data = slice_Y[slice_name]

            if max_count is not None:
                X_data, Y_data = X_data[:
                                        max_count], Y_data[:
                                                           max_count]  # for debug

            test_data = Image_Dataset(X_data, Y_data, norm=None)
            test_loader = Data.DataLoader(dataset=test_data,
                                          batch_size=batch_size,
                                          shuffle=False,
                                          num_workers=self.NUM_WORKERS)
            data_len = len(test_loader)

            self.model = self.load_pretrained_model_on_predict()
            self.construct_shadow_classifier(self.model)

            self.model.to(self.device)
            self.model.eval()

            probability = []
            prediction = []
            high_dim_features = []
            low_dim_features = []
            for step, (x, y) in enumerate(test_loader):
                b_x = Variable(x.to(self.device))

                output = self.model(b_x)  # model最后不包括一个softmax层
                output_softmax = nn.functional.softmax(output, dim=1)
                probs, preds = torch.max(output_softmax, 1)

                high_dim_features.extend(self.model.out_feature.cpu().numpy())
                low_dim_features.extend(output.detach().cpu().numpy())
                probability.extend(probs.detach().cpu().numpy())
                prediction.extend(preds.detach().cpu().numpy())
                print('predicting => %d / %d ' % (step + 1, data_len))

            low_dim_features = np.array(low_dim_features)
            prediction = np.array(prediction)
            probability = np.array(probability)

            if len(prediction) > 6 and np.sum(
                    prediction) > 3:  # 至少3个Cancer样本输入
                # 对样本进行加权处理
                weight = self.correct_sample_weights(low_dim_features,
                                                     prediction)

                high_dim_features = np.array(high_dim_features)
                prediction = np.array(prediction)
                # 开启弹性调整过程
                self.shadow_classifier.train_myself(high_dim_features,
                                                    prediction, weight,
                                                    batch_size, 0.1, 10)
                probability, prediction, low_dim_features = self.shadow_classifier.predict(
                    high_dim_features, batch_size)

            Ytest = np.array(Y_data)
            predicted_tags = np.array(prediction)
            probability = np.array(probability)

            count = len(Y_data)
            accu = float(sum(predicted_tags == Ytest)) / count

            result_str = "{} => accu ={:.4f}, count = {}, mean of prob = {:.6f}".format(
                slice_name, accu, count, np.mean(probability))
            print(result_str)

            result.append(result_str)

            if len(result) > slice_count:
                break

        # 最后一次性输出全部
        for item in result:
            print(item)
    def extract_features_save_to_file(self, samples_name, batch_size):
        '''
        提取图样本集的特征向量,并存盘
        :param samples_name: 样本集的文件列表文件
        :param batch_size: 每批的图片数量
        :return: 特征向量的存盘文件
        '''

        train_list = "{}/{}_train.txt".format(self._params.PATCHS_ROOT_PATH,
                                              samples_name)
        test_list = "{}/{}_test.txt".format(self._params.PATCHS_ROOT_PATH,
                                            samples_name)

        Xtrain, Ytrain = read_csv_file(self._params.PATCHS_ROOT_PATH,
                                       train_list)
        train_data = Image_Dataset(Xtrain, Ytrain)

        Xtest, Ytest = read_csv_file(self._params.PATCHS_ROOT_PATH, test_list)
        test_data = Image_Dataset(Xtest, Ytest)

        train_loader = Data.DataLoader(dataset=train_data,
                                       batch_size=batch_size,
                                       shuffle=False,
                                       num_workers=self.NUM_WORKERS)
        test_loader = Data.DataLoader(dataset=test_data,
                                      batch_size=batch_size,
                                      shuffle=False,
                                      num_workers=self.NUM_WORKERS)

        model = self.load_pretrained_model()
        print(model)

        if self.use_GPU:
            model.cuda()

        model.eval()

        for stage in ["train", "test"]:
            if stage == "train":
                data_loader = train_loader
            else:
                data_loader = test_loader

            data_len = data_loader.__len__()
            features = []
            for step, (x, y) in enumerate(data_loader):
                if self.use_GPU:
                    b_x = Variable(x).cuda()  # batch x
                else:
                    b_x = Variable(x)  # batch x

                output = model(b_x)
                f = output.cpu().data.numpy()
                avg_f = np.mean(f, axis=(-2, -1))  # 全局平均池化
                features.extend(avg_f)
                print('extracting features => %d / %d ' % (step + 1, data_len))

            if stage == "train":
                Y = Ytrain
            else:
                Y = Ytest

            save_path = "{}/data/pytorch/{}_{}_{}".format(
                self._params.PROJECT_ROOT, self.model_name, samples_name,
                stage)

            labels = Y[:len(features)]
            np.savez(save_path + "_features", features, labels)