def train(self, data_path, class_name):
        # set model's intermediate outputs
        outputs = []

        def hook(module, input, output):
            outputs.append(output)

        for i in range(self.model_count):
            self.models[i].layer1[-1].register_forward_hook(hook)
            self.models[i].layer2[-1].register_forward_hook(hook)
            self.models[i].layer3[-1].register_forward_hook(hook)

        train_dataset = mvtec.MVTecDataset(data_path, class_name=class_name, is_train=True, resize=(IMG_SIZE,IMG_SIZE), cropsize = IMG_SIZE)
        train_dataloader = DataLoader(train_dataset, batch_size=1, pin_memory=True)
    
        self.train_outputs = []
        train_outputs = []
        for i in range(self.model_count):
            train_outputs.append(OrderedDict([('layer1', []), ('layer2', []), ('layer3', [])]))
        
        for (x, _, _, file) in tqdm(train_dataloader, '| feature extraction | train | %s |' % class_name):
            model_index = self.model_selector(file)
            model = self.models[model_index]

            # model prediction
            with torch.no_grad():
                _ = model(x.to(self.device))
            # get intermediate layer outputs
            for k, v in zip(train_outputs[model_index].keys(), outputs):
                train_outputs[model_index][k].append(v.cpu().detach())
            # clear hook outputs
            outputs = []

        for i in range(self.model_count):
            for k, v in train_outputs[i].items():
                train_outputs[i][k] = torch.cat(v, 0)

            # Embedding concat
            embedding_vectors = train_outputs[i]['layer1']
            for layer_name in ['layer2', 'layer3']:
                embedding_vectors = self.embedding_concat(embedding_vectors, train_outputs[i][layer_name])

            # randomly select d dimension
            embedding_vectors = torch.index_select(embedding_vectors, 1, self.idx)

            # calculate multivariate Gaussian distribution
            B, C, H, W = embedding_vectors.size()
            embedding_vectors = embedding_vectors.view(B, C, H * W)
            mean = torch.mean(embedding_vectors, dim=0).numpy()
            cov = torch.zeros(C, C, H * W).numpy()

            I = np.identity(C)

            for i in range(H * W):
                # cov[:, :, i] = LedoitWolf().fit(embedding_vectors[:, :,
                # i].numpy()).covariance_
                cov[:, :, i] = np.cov(embedding_vectors[:, :, i].numpy(), rowvar=False) + 0.01 * I

            conv_inv = np.linalg.inv(cov.T).T
            self.train_outputs.append([mean, conv_inv])
Esempio n. 2
0
                  resume=cf.vit_resume)
cf.fmap_size = model.fe.fe_h
print('\n--------config----------')
for k in list(vars(cf).keys()):
    print('%s: %s' % (k, vars(cf)[k]))
total_parameters = sum(p.numel() for p in model.parameters()
                       if p.requires_grad)
print('total learnable parameters: %d\n' % total_parameters)

model.to(device)

results = {}
os.makedirs(os.path.join(cf.save_path, cf.experiment_name), exist_ok=True)

for class_name in mvtec.CLASS_NAMES:
    train_dataset = mvtec.MVTecDataset(cf.data_aug_path, class_name, True,
                                       cf.im_size, cf.im_size)
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=cf.batchsize,
                                  shuffle=True,
                                  num_workers=cf.n_workers,
                                  pin_memory=True)
    test_dataset = mvtec.MVTecDataset(cf.data_path, class_name, False,
                                      cf.im_resize, cf.im_size)
    test_dataloader = DataLoader(test_dataset,
                                 batch_size=cf.batchsize,
                                 shuffle=False,
                                 num_workers=cf.n_workers,
                                 pin_memory=True)

    test_list, test_imgs_list, gt_list, gt_mask_list = walking_once(
        test_dataloader)
    def evaluate(self, data_path, class_name, save_path):
        test_dataset = mvtec.MVTecDataset(data_path, class_name=class_name, is_train=False, resize=(IMG_SIZE,IMG_SIZE), cropsize = IMG_SIZE)
        test_dataloader = DataLoader(test_dataset, batch_size=1, pin_memory=True)
               
        gt_list = []
        gt_mask_list = []
        test_imgs = []    

        score_map = []
        for (x, y, mask, file) in tqdm(test_dataloader, '| feature extraction | test | %s |' % class_name):           
            _, score = self.check(file[0], True)        
            score_map.append(score)

            test_imgs.extend(x.cpu().detach().numpy())
            gt_list.extend(y.cpu().detach().numpy())
            gt_mask_list.extend(mask.cpu().detach().numpy())

        score_map = np.array(score_map)    
        #(316, 256, 256)
        
        # Normalization
        self.max_score = score_map.max()
        self.min_score = score_map.min()
        scores = (score_map - self.min_score) / (self.max_score - self.min_score)
        
        # calculate image-level ROC AUC score
        img_scores = scores.reshape(scores.shape[0], -1).max(axis=1)
        gt_list = np.asarray(gt_list)
        fpr, tpr, _ = roc_curve(gt_list, img_scores)
        img_roc_auc = roc_auc_score(gt_list, img_scores)

        fig, ax = plt.subplots(1, 2, figsize=(20, 10))
        fig_img_rocauc = ax[0]
        fig_pixel_rocauc = ax[1]
           
        print('image ROCAUC: %.3f' % (img_roc_auc))
        fig_img_rocauc.plot(fpr, tpr, label='%s img_ROCAUC: %.3f' % (class_name, img_roc_auc))
        
        # get optimal threshold
        gt_mask = np.asarray(gt_mask_list)
        precision, recall, thresholds = precision_recall_curve(gt_mask.flatten(), scores.flatten())
        a = 2 * precision * recall
        b = precision + recall
        f1 = np.divide(a, b, out=np.zeros_like(a), where=b != 0)
        self.threshold = thresholds[np.argmax(f1)]

        print('Threshold: ' + str(self.threshold))

        # calculate per-pixel level ROCAUC
        fpr, tpr, _ = roc_curve(gt_mask.flatten(), scores.flatten())
        per_pixel_rocauc = roc_auc_score(gt_mask.flatten(), scores.flatten())
        print('pixel ROCAUC: %.3f' % (per_pixel_rocauc))
                
        det_auc_score = roc_auc_score(gt_list, img_scores)
        det_pr_score = average_precision_score(gt_list, img_scores)
        
        # auc score (per pixel level) for segmentation
        seg_auc_score = roc_auc_score(gt_mask.flatten(), scores.flatten())
        seg_pr_score = average_precision_score(gt_mask.flatten(), scores.flatten())

        # metrics over all data
        print(f"Det AUC: {det_auc_score:.4f}, Seg AUC: {seg_auc_score:.4f}")
        print(f"Det PR: {det_pr_score:.4f}, Seg PR: {seg_pr_score:.4f}")

        
        self.threshold = self.threshold * 0.6

        scores_boolean = img_scores > self.threshold

        confm = confusion_matrix(gt_list, scores_boolean)
        print(confm)

        tn, fp, fn, tp = confusion_matrix(gt_list, scores_boolean).ravel()
        print(f"tn: {tn}      fp: {fp}      fn: {fn}      tp: {tp}")

        det_auc_score = roc_auc_score(gt_list, scores_boolean)
        det_pr_score = average_precision_score(gt_list, scores_boolean)

        print(f"Det AUC: {det_auc_score:.4f}")
        print(f"Det PR: {det_pr_score:.4f}")

        fig_pixel_rocauc.plot(fpr, tpr, label='%s ROCAUC: %.3f' % (class_name, per_pixel_rocauc))
        save_dir = save_path + '/' + f'pictures_{self.arch}'
        os.makedirs(save_dir, exist_ok=True)
        self.plot_fig(test_imgs, scores, gt_mask_list, self.threshold, save_dir, class_name)

        fig.tight_layout()
        fig.savefig(os.path.join(save_path, 'roc_curve.png'), dpi=100)
    def evaluate(self, data_path, class_name, save_path):
        test_dataset = mvtec.MVTecDataset(data_path, class_name=class_name, is_train=False, resize=(IMG_SIZE,IMG_SIZE), cropsize = IMG_SIZE)
        test_dataloader = DataLoader(test_dataset, batch_size=32, pin_memory=True)
        test_outputs = OrderedDict([('layer1', []), ('layer2', []), ('layer3', [])])
        
        gt_list = []
        gt_mask_list = []
        test_imgs = []
        test_imgs_names = []

        outputs = []

        def hook(module, input, output):
            outputs.append(output)

        self.model.layer1[-1].register_forward_hook(hook)
        self.model.layer2[-1].register_forward_hook(hook)
        self.model.layer3[-1].register_forward_hook(hook)

        # extract test set features
        for (x, y, mask, file) in tqdm(test_dataloader, '| feature extraction | test | %s |' % class_name):
            test_imgs.extend(x.cpu().detach().numpy())
            test_imgs_names.extend(file)
            gt_list.extend(y.cpu().detach().numpy())
            gt_mask_list.extend(mask.cpu().detach().numpy())

            # model prediction
            with torch.no_grad():
                _ = self.model(x.to(self.device))

            # get intermediate layer outputs
            for k, v in zip(test_outputs.keys(), outputs):
                test_outputs[k].append(v.cpu().detach())

            # clear hook outputs
            outputs = []

        for k, v in test_outputs.items():
            test_outputs[k] = torch.cat(v, 0)
        
        # Embedding concat
        embedding_vectors = test_outputs['layer1']
        for layer_name in ['layer2', 'layer3']:
            embedding_vectors = self.embedding_concat(embedding_vectors, test_outputs[layer_name])

        # randomly select d dimension
        embedding_vectors = torch.index_select(embedding_vectors, 1, self.idx)
        
        # calculate distance matrix
        B, C, H, W = embedding_vectors.size()
        embedding_vectors = embedding_vectors.view(B, C, H * W).numpy()
        dist_list = []

        for i in range(H * W):
            mean = self.train_outputs[0][:, i]
            conv_inv = self.train_outputs[1][:, :, i]
            dist = [self.mahalanobis_squared(sample[:, i], mean, conv_inv) for sample in embedding_vectors]
            dist_list.append(dist)

        dist_list = np.array(dist_list).transpose(1, 0).reshape(B, H, W)

        # upsample
        dist_list = torch.tensor(dist_list)
        score_map = F.interpolate(dist_list.unsqueeze(1), size=x.size(2), mode='bilinear', align_corners=False).squeeze().numpy()
        
        # apply gaussian smoothing on the score map
        for i in range(score_map.shape[0]):
            score_map[i] = gaussian_filter(score_map[i], sigma=4)
        
        # Normalization
        self.max_score = score_map.max()
        self.min_score = score_map.min()
        scores = (score_map - self.min_score) / (self.max_score - self.min_score)
        
        # calculate image-level ROC AUC score
        img_scores = scores.reshape(scores.shape[0], -1).max(axis=1)
        gt_list = np.asarray(gt_list)
        fpr, tpr, _ = roc_curve(gt_list, img_scores)
        img_roc_auc = roc_auc_score(gt_list, img_scores)

        fig, ax = plt.subplots(1, 2, figsize=(20, 10))
        fig_img_rocauc = ax[0]
        fig_pixel_rocauc = ax[1]
           
        print('image ROCAUC: %.3f' % (img_roc_auc))
        fig_img_rocauc.plot(fpr, tpr, label='%s img_ROCAUC: %.3f' % (class_name, img_roc_auc))
        
        # get optimal threshold
        gt_mask = np.asarray(gt_mask_list)
        precision, recall, thresholds = precision_recall_curve(gt_mask.flatten(), scores.flatten())
        a = 2 * precision * recall
        b = precision + recall
        f1 = np.divide(a, b, out=np.zeros_like(a), where=b != 0)
        self.threshold = thresholds[np.argmax(f1)]

        print('Threshold: ' + str(self.threshold))

        # calculate per-pixel level ROCAUC
        fpr, tpr, _ = roc_curve(gt_mask.flatten(), scores.flatten())
        per_pixel_rocauc = roc_auc_score(gt_mask.flatten(), scores.flatten())
        print('pixel ROCAUC: %.3f' % (per_pixel_rocauc))
                
        det_auc_score = roc_auc_score(gt_list, img_scores)
        det_pr_score = average_precision_score(gt_list, img_scores)
        
        # auc score (per pixel level) for segmentation
        seg_auc_score = roc_auc_score(gt_mask.flatten(), scores.flatten())
        seg_pr_score = average_precision_score(gt_mask.flatten(), scores.flatten())

        # metrics over all data
        print(f"Det AUC: {det_auc_score:.4f}, Seg AUC: {seg_auc_score:.4f}")
        print(f"Det PR: {det_pr_score:.4f}, Seg PR: {seg_pr_score:.4f}")
        
        #self.threshold = 0.36940035223960876
        self.threshold = 0.3166288733482361

        scores_boolean = img_scores > self.threshold

        confm = confusion_matrix(gt_list, scores_boolean)
        print(confm)

        tn, fp, fn, tp = confusion_matrix(gt_list, scores_boolean).ravel()
        print(f"tn: {tn}      fp: {fp}      fn: {fn}      tp: {tp}")

        det_auc_score = roc_auc_score(gt_list, scores_boolean)
        det_pr_score = average_precision_score(gt_list, scores_boolean)

        print(f"Det AUC: {det_auc_score:.4f}")
        print(f"Det PR: {det_pr_score:.4f}")

        fig_pixel_rocauc.plot(fpr, tpr, label='%s ROCAUC: %.3f' % (class_name, per_pixel_rocauc))
        save_dir = save_path + '/' + f'pictures_{self.arch}'

        os.makedirs(save_dir, exist_ok=True)
        self.plot_fig(test_imgs, test_imgs_names, scores, gt_mask_list, self.threshold, save_dir, class_name)

        fig.tight_layout()
        fig.savefig(os.path.join(save_path, 'roc_curve.png'), dpi=100)
Esempio n. 5
0
def main():

    args = parse_args()

    # load model
    if args.arch == 'resnet18':
        model = resnet18(pretrained=True, progress=True)
        t_d = 448
        d = 100
    elif args.arch == 'wide_resnet50_2':
        model = wide_resnet50_2(pretrained=True, progress=True)
        t_d = 1792
        d = 550
    model.to(device)
    model.eval()
    random.seed(1024)
    torch.manual_seed(1024)
    if use_cuda:
        torch.cuda.manual_seed_all(1024)

    idx = torch.tensor(sample(range(0, t_d), d))

    # set model's intermediate outputs
    outputs = []

    def hook(module, input, output):
        outputs.append(output)

    model.layer1[-1].register_forward_hook(hook)
    model.layer2[-1].register_forward_hook(hook)
    model.layer3[-1].register_forward_hook(hook)

    os.makedirs(os.path.join(args.save_path, 'temp_%s' % args.arch),
                exist_ok=True)
    fig, ax = plt.subplots(1, 2, figsize=(20, 10))
    fig_img_rocauc = ax[0]
    fig_pixel_rocauc = ax[1]

    total_roc_auc = []
    total_pixel_roc_auc = []

    class_name = args.class_name

    train_dataset = mvtec.MVTecDataset(args.data_path,
                                       class_name=class_name,
                                       is_train=True)
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=32,
                                  pin_memory=True)
    test_dataset = mvtec.MVTecDataset(args.data_path,
                                      class_name=class_name,
                                      is_train=False)
    test_dataloader = DataLoader(test_dataset, batch_size=32, pin_memory=True)

    train_outputs = OrderedDict([('layer1', []), ('layer2', []),
                                 ('layer3', [])])
    test_outputs = OrderedDict([('layer1', []), ('layer2', []),
                                ('layer3', [])])

    # extract train set features
    train_feature_filepath = os.path.join(args.save_path,
                                          'temp_%s' % args.arch,
                                          'train_%s.pkl' % class_name)
    if not os.path.exists(train_feature_filepath):
        for (x, _,
             _) in tqdm(train_dataloader,
                        '| feature extraction | train | %s |' % class_name):
            # model prediction
            with torch.no_grad():
                _ = model(x.to(device))
            # get intermediate layer outputs
            for k, v in zip(train_outputs.keys(), outputs):
                train_outputs[k].append(v.cpu().detach())
            # initialize hook outputs
            outputs = []
        for k, v in train_outputs.items():
            train_outputs[k] = torch.cat(v, 0)

        # Embedding concat
        embedding_vectors = train_outputs['layer1']
        for layer_name in ['layer2', 'layer3']:
            embedding_vectors = embedding_concat(embedding_vectors,
                                                 train_outputs[layer_name])

        # randomly select d dimension
        embedding_vectors = torch.index_select(embedding_vectors, 1, idx)
        # calculate multivariate Gaussian distribution
        B, C, H, W = embedding_vectors.size()
        embedding_vectors = embedding_vectors.view(B, C, H * W)
        mean = torch.mean(embedding_vectors, dim=0).numpy()
        cov = torch.zeros(C, C, H * W).numpy()
        I = np.identity(C)
        for i in range(H * W):
            # cov[:, :, i] = LedoitWolf().fit(embedding_vectors[:, :, i].numpy()).covariance_
            cov[:, :, i] = np.cov(embedding_vectors[:, :, i].numpy(),
                                  rowvar=False) + 0.01 * I
        # save learned distribution
        train_outputs = [mean, cov]
        with open(train_feature_filepath, 'wb') as f:
            pickle.dump(train_outputs, f)
    else:
        print('load train set feature from: %s' % train_feature_filepath)
        with open(train_feature_filepath, 'rb') as f:
            train_outputs = pickle.load(f)

    gt_list = []
    gt_mask_list = []
    test_imgs = []

    # extract test set features
    for (x, y,
         mask) in tqdm(test_dataloader,
                       '| feature extraction | test | %s |' % class_name):
        test_imgs.extend(x.cpu().detach().numpy())
        gt_list.extend(y.cpu().detach().numpy())
        gt_mask_list.extend(mask.cpu().detach().numpy())
        # model prediction
        with torch.no_grad():
            _ = model(x.to(device))
        # get intermediate layer outputs
        for k, v in zip(test_outputs.keys(), outputs):
            test_outputs[k].append(v.cpu().detach())
        # initialize hook outputs
        outputs = []
    for k, v in test_outputs.items():
        test_outputs[k] = torch.cat(v, 0)

    # Embedding concat
    embedding_vectors = test_outputs['layer1']
    for layer_name in ['layer2', 'layer3']:
        embedding_vectors = embedding_concat(embedding_vectors,
                                             test_outputs[layer_name])

    # randomly select d dimension
    embedding_vectors = torch.index_select(embedding_vectors, 1, idx)

    # calculate distance matrix
    B, C, H, W = embedding_vectors.size()
    embedding_vectors = embedding_vectors.view(B, C, H * W).numpy()
    dist_list = []
    for i in range(H * W):
        mean = train_outputs[0][:, i]
        conv_inv = np.linalg.inv(train_outputs[1][:, :, i])
        dist = [
            mahalanobis(sample[:, i], mean, conv_inv)
            for sample in embedding_vectors
        ]
        dist_list.append(dist)

    dist_list = np.array(dist_list).transpose(1, 0).reshape(B, H, W)

    # upsample
    dist_list = torch.tensor(dist_list)
    score_map = F.interpolate(dist_list.unsqueeze(1),
                              size=x.size(2),
                              mode='bilinear',
                              align_corners=False).squeeze().numpy()

    # apply gaussian smoothing on the score map
    for i in range(score_map.shape[0]):
        score_map[i] = gaussian_filter(score_map[i], sigma=4)

    # Normalization
    max_score = score_map.max()
    min_score = score_map.min()
    scores = (score_map - min_score) / (max_score - min_score)

    # calculate image-level ROC AUC score
    img_scores = scores.reshape(scores.shape[0], -1).max(axis=1)
    gt_list = np.asarray(gt_list)
    fpr, tpr, _ = roc_curve(gt_list, img_scores)
    img_roc_auc = roc_auc_score(gt_list, img_scores)
    total_roc_auc.append(img_roc_auc)
    print('image ROCAUC: %.3f' % (img_roc_auc))
    fig_img_rocauc.plot(fpr,
                        tpr,
                        label='%s img_ROCAUC: %.3f' %
                        (class_name, img_roc_auc))

    # get optimal threshold
    gt_mask = np.asarray(gt_mask_list)
    precision, recall, thresholds = precision_recall_curve(
        gt_mask.flatten(), scores.flatten())
    a = 2 * precision * recall
    b = precision + recall
    f1 = np.divide(a, b, out=np.zeros_like(a), where=b != 0)
    threshold = thresholds[np.argmax(f1)]

    # calculate per-pixel level ROCAUC
    fpr, tpr, _ = roc_curve(gt_mask.flatten(), scores.flatten())
    per_pixel_rocauc = roc_auc_score(gt_mask.flatten(), scores.flatten())
    total_pixel_roc_auc.append(per_pixel_rocauc)
    print('pixel ROCAUC: %.3f' % (per_pixel_rocauc))

    fig_pixel_rocauc.plot(fpr,
                          tpr,
                          label='%s ROCAUC: %.3f' %
                          (class_name, per_pixel_rocauc))
    save_dir = args.save_path + '/' + f'pictures_{args.arch}'
    os.makedirs(save_dir, exist_ok=True)
    plot_fig(test_imgs, scores, gt_mask_list, threshold, save_dir, class_name)

    print('Average ROCAUC: %.3f' % np.mean(total_roc_auc))
    fig_img_rocauc.title.set_text('Average image ROCAUC: %.3f' %
                                  np.mean(total_roc_auc))
    fig_img_rocauc.legend(loc="lower right")

    print('Average pixel ROCUAC: %.3f' % np.mean(total_pixel_roc_auc))
    fig_pixel_rocauc.title.set_text('Average pixel ROCAUC: %.3f' %
                                    np.mean(total_pixel_roc_auc))
    fig_pixel_rocauc.legend(loc="lower right")

    fig.tight_layout()
    fig.savefig(os.path.join(args.save_path, 'roc_curve.png'), dpi=100)
def main():

    args = parse_args()

    # loading pretrained EfficientNet-B5
    model = EfficientNet.from_pretrained('efficientnet-b5')
    model.to(device)
    model.eval()
    random.seed(1024)
    torch.manual_seed(1024)
    if use_cuda:
        torch.cuda.manual_seed_all(1024)

    # set model's intermediate outputs
    outputs = []

    os.makedirs(os.path.join(args.save_path, 'temp_%s' % args.arch),
                exist_ok=True)
    fig, ax = plt.subplots(1, 2, figsize=(20, 10))
    fig_img_rocauc = ax[0]
    fig_pixel_rocauc = ax[1]

    total_roc_auc = []
    total_pixel_roc_auc = []

    for class_name in mvtec.CLASS_NAMES:

        train_dataset = mvtec.MVTecDataset(args.data_path,
                                           class_name=class_name,
                                           is_train=True)
        train_dataloader = DataLoader(train_dataset,
                                      batch_size=32,
                                      pin_memory=True)
        test_dataset = mvtec.MVTecDataset(args.data_path,
                                          class_name=class_name,
                                          is_train=False)
        test_dataloader = DataLoader(test_dataset,
                                     batch_size=32,
                                     pin_memory=True)

        train_outputs = OrderedDict([('layer1', []), ('layer2', []),
                                     ('layer3', [])])
        test_outputs = OrderedDict([('layer1', []), ('layer2', []),
                                    ('layer3', [])])

        # extract train set features
        train_feature_filepath = os.path.join(args.save_path,
                                              'temp_%s' % args.arch,
                                              'train_%s.pkl' % class_name)
        if not os.path.exists(train_feature_filepath):
            for (x, _, _) in tqdm(
                    train_dataloader,
                    '| feature extraction | train | %s |' % class_name):

                # model prediction
                with torch.no_grad():
                    endpoints = model.extract_endpoints(x.to(device))

                outputs.append(endpoints['reduction_2']
                               )  # patch embedding vector from level 2
                outputs.append(endpoints['reduction_4']
                               )  # patch embedding vector from level 4
                outputs.append(endpoints['reduction_5']
                               )  # patch embedding vector from level 5

                # get intermediate layer outputs
                for k, v in zip(train_outputs.keys(), outputs):
                    train_outputs[k].append(v.cpu().detach())

                # initialize hook outputs
                outputs = []

            for k, v in train_outputs.items():
                train_outputs[k] = torch.cat(v, 0)

            # Embedding concat
            embedding_vectors = train_outputs[
                'layer1']  # torch.Tensor, (N, 40, 56, 56)

            for layer_name in ['layer2', 'layer3']:
                # layer2 : (N, 176, 14, 14)
                # lyaer3 : (N, 512, 7, 7)
                embedding_vectors = embedding_concat(embedding_vectors,
                                                     train_outputs[layer_name])

            # print("Final embedding: ", embedding_vectors.shape) # (N, 728, 56, 56) > 40+176+176

            # calculate multivariate Gaussian distribution
            B, C, H, W = embedding_vectors.size()
            embedding_vectors = embedding_vectors.view(B, C, H *
                                                       W)  # (N, 728, 56*56)
            mean = torch.mean(embedding_vectors, dim=0).numpy()  # (728, 56*56)
            cov = torch.zeros(C, C, H * W).numpy()  # (728, 728, 56*56)
            I = np.identity(C)
            for i in range(H * W):
                cov[:, :, i] = np.cov(embedding_vectors[:, :, i].numpy(),
                                      rowvar=False) + 0.01 * I

            # save learned distribution
            train_outputs = [mean, cov]
            # Can't save Gaussian parameters as pickle file, because this data is very heavy.
            #with open(train_feature_filepath, 'wb') as f:
            #    pickle.dump(train_outputs, f)
        else:
            pass
            #print('load train set feature from: %s' % train_feature_filepath)
            #with open(train_feature_filepath, 'rb') as f:
            #    train_outputs = pickle.load(f)

        gt_list = []
        gt_mask_list = []
        test_imgs = []
        outputs = []

        # extract test set features
        for (x, y,
             mask) in tqdm(test_dataloader,
                           '| feature extraction | test | %s |' % class_name):
            test_imgs.extend(x.cpu().detach().numpy())
            gt_list.extend(y.cpu().detach().numpy())
            gt_mask_list.extend(mask.cpu().detach().numpy())

            # model prediction
            with torch.no_grad():
                endpoints_test = model.extract_endpoints(x.to(device))

            outputs.append(endpoints_test['reduction_2']
                           )  # patch embedding vector from level 2
            outputs.append(endpoints_test['reduction_4']
                           )  # patch embedding vector from level 4
            outputs.append(endpoints_test['reduction_5']
                           )  # patch embedding vector from level 5

            # get intermediate layer outputs
            for k, v in zip(test_outputs.keys(), outputs):
                test_outputs[k].append(v.cpu().detach())

            # initialize hook outputs
            outputs = []

        for k, v in test_outputs.items():
            test_outputs[k] = torch.cat(v, 0)

        # Embedding concat
        embedding_vectors = test_outputs['layer1']
        for layer_name in ['layer2', 'layer3']:
            embedding_vectors = embedding_concat(embedding_vectors,
                                                 test_outputs[layer_name])

        # calculate distance matrix
        B, C, H, W = embedding_vectors.size()  # (N, 728, 56, 56)
        embedding_vectors = embedding_vectors.view(
            B, C, H * W).numpy()  # (N, 728, 56*56)
        dist_list = []
        for i in range(H * W):
            mean = train_outputs[0][:, i]
            conv_inv = np.linalg.inv(train_outputs[1][:, :, i])
            dist = [
                mahalanobis(sample[:, i], mean, conv_inv)
                for sample in embedding_vectors
            ]
            dist_list.append(dist)

        #print("dist_list shape: ", np.array(dist_list).shape) # (56*56, N)
        dist_list = np.array(dist_list).transpose(1,
                                                  0).reshape(B, H,
                                                             W)  # (N, 56, 56)

        # upsample
        dist_list = torch.tensor(dist_list)

        score_map = F.interpolate(
            dist_list.unsqueeze(1),
            size=x.size(2),
            mode='bilinear',
            align_corners=False).squeeze().numpy()  # (N, 224, 224)

        # apply gaussian smoothing on the score map
        for i in range(score_map.shape[0]):
            score_map[i] = gaussian_filter(score_map[i], sigma=4)

        # Normalization
        max_score = score_map.max()
        min_score = score_map.min()
        scores = (score_map - min_score) / (max_score - min_score)

        # calculate image-level ROC AUC score
        img_scores = scores.reshape(scores.shape[0],
                                    -1).max(axis=1)  # test image에 대한 scores
        gt_list = np.asarray(gt_list)
        print("gt_list", gt_list.shape)
        print("img_scores: ", img_scores.shape)
        fpr, tpr, _ = roc_curve(gt_list, img_scores)  # fpr, tpr, thres
        img_roc_auc = roc_auc_score(gt_list, img_scores)  # auroc 값
        total_roc_auc.append(img_roc_auc)
        print('image ROCAUC: %.3f' % (img_roc_auc))
        fig_img_rocauc.plot(fpr,
                            tpr,
                            label='%s img_ROCAUC: %.3f' %
                            (class_name, img_roc_auc))

        # get optimal threshold
        gt_mask = np.asarray(gt_mask_list)
        precision, recall, thresholds = precision_recall_curve(
            gt_mask.flatten(), scores.flatten())
        a = 2 * precision * recall
        b = precision + recall
        f1 = np.divide(a, b, out=np.zeros_like(a), where=b != 0)
        threshold = thresholds[np.argmax(f1)]

        # calculate per-pixel level ROCAUC
        fpr, tpr, _ = roc_curve(gt_mask.flatten(), scores.flatten())
        per_pixel_rocauc = roc_auc_score(gt_mask.flatten(), scores.flatten())
        total_pixel_roc_auc.append(per_pixel_rocauc)

        print('pixel ROCAUC: %.3f' % (per_pixel_rocauc))

        fig_pixel_rocauc.plot(fpr,
                              tpr,
                              label='%s ROCAUC: %.3f' %
                              (class_name, per_pixel_rocauc))
        save_dir = args.save_path + '/' + f'pictures_{args.arch}'  # Folder location
        os.makedirs(save_dir, exist_ok=True)
        save_dir = save_dir + '/' + class_name  # Class location in folder
        os.makedirs(save_dir, exist_ok=True)
        plot_fig(test_imgs, scores, gt_mask_list, threshold, save_dir,
                 class_name)

    print('Average ROCAUC: %.3f' % np.mean(total_roc_auc))
    fig_img_rocauc.title.set_text('Average image ROCAUC: %.3f' %
                                  np.mean(total_roc_auc))
    fig_img_rocauc.legend(loc="lower right")

    print('Average pixel ROCUAC: %.3f' % np.mean(total_pixel_roc_auc))
    fig_pixel_rocauc.title.set_text('Average pixel ROCAUC: %.3f' %
                                    np.mean(total_pixel_roc_auc))
    fig_pixel_rocauc.legend(loc="lower right")

    fig.tight_layout()
    fig.savefig(os.path.join(args.save_path, 'roc_curve.png'), dpi=100)
Esempio n. 7
0
def main():

    args = parse_args()
    assert args.model_name.startswith(
        'efficientnet-b'
    ), 'only support efficientnet variants, not %s' % args.model_name

    # device setup
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    # load model
    model = EfficientNetModified.from_pretrained(args.model_name)
    model.to(device)
    model.eval()

    os.makedirs(os.path.join(args.save_path, 'temp'), exist_ok=True)

    total_roc_auc = []

    for class_name in mvtec.CLASS_NAMES:

        train_dataset = mvtec.MVTecDataset(class_name=class_name,
                                           is_train=True)
        train_dataloader = DataLoader(train_dataset,
                                      batch_size=32,
                                      pin_memory=True)
        test_dataset = mvtec.MVTecDataset(class_name=class_name,
                                          is_train=False)
        test_dataloader = DataLoader(test_dataset,
                                     batch_size=32,
                                     pin_memory=True)

        train_outputs = [[] for _ in range(9)]
        test_outputs = [[] for _ in range(9)]

        # extract train set features
        train_feat_filepath = os.path.join(
            args.save_path, 'temp',
            'train_%s_%s.pkl' % (class_name, args.model_name))
        if not os.path.exists(train_feat_filepath):
            for (x, y, mask) in tqdm(
                    train_dataloader,
                    '| feature extraction | train | %s |' % class_name):
                # model prediction
                with torch.no_grad():
                    feats = model.extract_features(x.to(device))
                for f_idx, feat in enumerate(feats):
                    train_outputs[f_idx].append(feat)

            # fitting a multivariate gaussian to features extracted from every level of ImageNet pre-trained model
            for t_idx, train_output in enumerate(train_outputs):
                mean = torch.mean(torch.cat(train_output, 0).squeeze(),
                                  dim=0).cpu().detach().numpy()
                # covariance estimation by using the Ledoit. Wolf et al. method
                cov = LedoitWolf().fit(
                    torch.cat(train_output,
                              0).squeeze().cpu().detach().numpy()).covariance_
                train_outputs[t_idx] = [mean, cov]

            # save extracted feature
            with open(train_feat_filepath, 'wb') as f:
                pickle.dump(train_outputs, f)
        else:
            print('load train set feature distribution from: %s' %
                  train_feat_filepath)
            with open(train_feat_filepath, 'rb') as f:
                train_outputs = pickle.load(f)

        gt_list = []

        # extract test set features
        for (x, y,
             mask) in tqdm(test_dataloader,
                           '| feature extraction | test | %s |' % class_name):
            gt_list.extend(y.cpu().detach().numpy())
            # model prediction
            with torch.no_grad():
                feats = model.extract_features(x.to(device))
            for f_idx, feat in enumerate(feats):
                test_outputs[f_idx].append(feat)
        for t_idx, test_output in enumerate(test_outputs):
            test_outputs[t_idx] = torch.cat(
                test_output, 0).squeeze().cpu().detach().numpy()

        # calculate Mahalanobis distance per each level of EfficientNet
        dist_list = []
        for t_idx, test_output in enumerate(test_outputs):
            mean = train_outputs[t_idx][0]
            cov_inv = np.linalg.inv(train_outputs[t_idx][1])
            dist = [
                mahalanobis(sample, mean, cov_inv) for sample in test_output
            ]
            dist_list.append(np.array(dist))

        # Anomaly score is followed by unweighted summation of the Mahalanobis distances
        scores = np.sum(np.array(dist_list), axis=0)

        # calculate image-level ROC AUC score
        fpr, tpr, _ = roc_curve(gt_list, scores)
        roc_auc = roc_auc_score(gt_list, scores)
        total_roc_auc.append(roc_auc)
        print('%s ROCAUC: %.3f' % (class_name, roc_auc))
        plt.plot(fpr, tpr, label='%s ROCAUC: %.3f' % (class_name, roc_auc))

    print('Average ROCAUC: %.3f' % np.mean(total_roc_auc))
    plt.title('Average image ROCAUC: %.3f' % np.mean(total_roc_auc))
    plt.legend(loc='lower right')
    plt.savefig(os.path.join(args.save_path,
                             'roc_curve_%s.png' % args.model_name),
                dpi=200)
Esempio n. 8
0
def main():

    args = parse_args()

    # device setup
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    # load model
    model = wide_resnet50_2(pretrained=True, progress=True)
    model.to(device)
    model.eval()

    # set model's intermediate outputs
    outputs = []
    def hook(module, input, output):
        outputs.append(output)
    model.layer1[-1].register_forward_hook(hook)
    model.layer2[-1].register_forward_hook(hook)
    model.layer3[-1].register_forward_hook(hook)
    model.avgpool.register_forward_hook(hook)

    os.makedirs(os.path.join(args.save_path, 'temp'), exist_ok=True)

    fig, ax = plt.subplots(1, 2, figsize=(20, 10))
    fig_img_rocauc = ax[0]
    fig_pixel_rocauc = ax[1]

    total_roc_auc = []
    total_pixel_roc_auc = []

    for class_name in mvtec.CLASS_NAMES:

        train_dataset = mvtec.MVTecDataset(class_name=class_name, is_train=True)
        train_dataloader = DataLoader(train_dataset, batch_size=32, pin_memory=True)
        test_dataset = mvtec.MVTecDataset(class_name=class_name, is_train=False)
        test_dataloader = DataLoader(test_dataset, batch_size=32, pin_memory=True)

        train_outputs = OrderedDict([('layer1', []), ('layer2', []), ('layer3', []), ('avgpool', [])])
        test_outputs = OrderedDict([('layer1', []), ('layer2', []), ('layer3', []), ('avgpool', [])])

        # extract train set features
        train_feature_filepath = os.path.join(args.save_path, 'temp', 'train_%s.pkl' % class_name)
        if not os.path.exists(train_feature_filepath):
            for (x, y, mask) in tqdm(train_dataloader, '| feature extraction | train | %s |' % class_name):
                # model prediction
                with torch.no_grad():
                    pred = model(x.to(device))
                # get intermediate layer outputs
                for k, v in zip(train_outputs.keys(), outputs):
                    train_outputs[k].append(v)
                # initialize hook outputs
                outputs = []
            for k, v in train_outputs.items():
                train_outputs[k] = torch.cat(v, 0)
            # save extracted feature
            with open(train_feature_filepath, 'wb') as f:
                pickle.dump(train_outputs, f)
        else:
            print('load train set feature from: %s' % train_feature_filepath)
            with open(train_feature_filepath, 'rb') as f:
                train_outputs = pickle.load(f)

        gt_list = []
        gt_mask_list = []
        test_imgs = []

        # extract test set features
        for (x, y, mask) in tqdm(test_dataloader, '| feature extraction | test | %s |' % class_name):
            test_imgs.extend(x.cpu().detach().numpy())
            gt_list.extend(y.cpu().detach().numpy())
            gt_mask_list.extend(mask.cpu().detach().numpy())
            # model prediction
            with torch.no_grad():
                pred = model(x.to(device))
            # get intermediate layer outputs
            for k, v in zip(test_outputs.keys(), outputs):
                test_outputs[k].append(v)
            # initialize hook outputs
            outputs = []
        for k, v in test_outputs.items():
            test_outputs[k] = torch.cat(v, 0)

        # calculate distance matrix
        dist_matrix = calc_dist_matrix(torch.flatten(test_outputs['avgpool'], 1),
                                       torch.flatten(train_outputs['avgpool'], 1))

        # select K nearest neighbor and take average
        topk_values, topk_indexes = torch.topk(dist_matrix, k=args.top_k, dim=1, largest=False)
        scores = torch.mean(topk_values, 1).cpu().detach().numpy()

        # calculate image-level ROC AUC score
        fpr, tpr, _ = roc_curve(gt_list, scores)
        roc_auc = roc_auc_score(gt_list, scores)
        total_roc_auc.append(roc_auc)
        print('%s ROCAUC: %.3f' % (class_name, roc_auc))
        fig_img_rocauc.plot(fpr, tpr, label='%s ROCAUC: %.3f' % (class_name, roc_auc))

        score_map_list = []
        for t_idx in tqdm(range(test_outputs['avgpool'].shape[0]), '| localization | test | %s |' % class_name):
            score_maps = []
            for layer_name in ['layer1', 'layer2', 'layer3']:  # for each layer

                # construct a gallery of features at all pixel locations of the K nearest neighbors
                topk_feat_map = train_outputs[layer_name][topk_indexes[t_idx]]
                test_feat_map = test_outputs[layer_name][t_idx:t_idx + 1]
                feat_gallery = topk_feat_map.transpose(3, 1).flatten(0, 2).unsqueeze(-1).unsqueeze(-1)

                # calculate distance matrix
                dist_matrix_list = []
                for d_idx in range(feat_gallery.shape[0] // 100):
                    dist_matrix = torch.pairwise_distance(feat_gallery[d_idx * 100:d_idx * 100 + 100], test_feat_map)
                    dist_matrix_list.append(dist_matrix)
                dist_matrix = torch.cat(dist_matrix_list, 0)

                # k nearest features from the gallery (k=1)
                score_map = torch.min(dist_matrix, dim=0)[0]
                score_map = F.interpolate(score_map.unsqueeze(0).unsqueeze(0), size=224,
                                          mode='bilinear', align_corners=False)
                score_maps.append(score_map)

            # average distance between the features
            score_map = torch.mean(torch.cat(score_maps, 0), dim=0)

            # apply gaussian smoothing on the score map
            score_map = gaussian_filter(score_map.squeeze().cpu().detach().numpy(), sigma=4)
            score_map_list.append(score_map)

        flatten_gt_mask_list = np.concatenate(gt_mask_list).ravel()
        flatten_score_map_list = np.concatenate(score_map_list).ravel()

        # calculate per-pixel level ROCAUC
        fpr, tpr, _ = roc_curve(flatten_gt_mask_list, flatten_score_map_list)
        per_pixel_rocauc = roc_auc_score(flatten_gt_mask_list, flatten_score_map_list)
        total_pixel_roc_auc.append(per_pixel_rocauc)
        print('%s pixel ROCAUC: %.3f' % (class_name, per_pixel_rocauc))
        fig_pixel_rocauc.plot(fpr, tpr, label='%s ROCAUC: %.3f' % (class_name, per_pixel_rocauc))

        # get optimal threshold
        precision, recall, thresholds = precision_recall_curve(flatten_gt_mask_list, flatten_score_map_list)
        a = 2 * precision * recall
        b = precision + recall
        f1 = np.divide(a, b, out=np.zeros_like(a), where=b != 0)
        threshold = thresholds[np.argmax(f1)]

        # visualize localization result
        visualize_loc_result(test_imgs, gt_mask_list, score_map_list, threshold, args.save_path, class_name, vis_num=5)

    print('Average ROCAUC: %.3f' % np.mean(total_roc_auc))
    fig_img_rocauc.title.set_text('Average image ROCAUC: %.3f' % np.mean(total_roc_auc))
    fig_img_rocauc.legend(loc="lower right")

    print('Average pixel ROCUAC: %.3f' % np.mean(total_pixel_roc_auc))
    fig_pixel_rocauc.title.set_text('Average pixel ROCAUC: %.3f' % np.mean(total_pixel_roc_auc))
    fig_pixel_rocauc.legend(loc="lower right")

    fig.tight_layout()
    fig.savefig(os.path.join(args.save_path, 'roc_curve.png'), dpi=100)