Beispiel #1
0
def main(out_data: str = 'chexpert'):
    models = wsl_model_dir.glob('*')
    # all_configs = []

    for idx, path in enumerate(models):
        if 'debug' in str(path):  # Debugging model
            continue
        elif not (path / 'configs.json').exists():  # Model not completed
            continue
        else:
            with open(path / 'configs.json') as f:
                configs = json.load(f)
                # print(configs)
        print(f'Model {idx} : {path}')

        # ------------------------------------------------------
        train_dataset = Loader(data=configs['data'],
                               split='train',
                               extension=configs['extension'],
                               classes=configs['classes'],
                               column=configs['column'],
                               regression=configs['regression'])
        train_loader = DataLoader(  # type: ignore
            train_dataset,
            batch_size=configs['batchsize'],
            num_workers=4,
            pin_memory=True,
            shuffle=True)

        valid_dataset = Loader(data=configs['data'],
                               split='valid',
                               extension=configs['extension'],
                               classes=configs['classes'],
                               column=configs['column'],
                               regression=configs['regression'])
        valid_loader = DataLoader(  # type: ignore
            valid_dataset,
            batch_size=configs['batchsize'],
            num_workers=4,
            pin_memory=True,
            shuffle=True)

        out_dataset = Loader(data=out_data,
                             split='valid',
                             extension=configs['extension'],
                             classes=configs['classes'],
                             column=configs['column'],
                             regression=configs['regression'])
        out_loader = DataLoader(  # type: ignore
            out_dataset,
            batch_size=configs['batchsize'],
            num_workers=4,
            pin_memory=True,
            shuffle=True)

        checkpoint = torch.load(
            path / 'best.pt',
            map_location='cuda:0' if torch.cuda.is_available() else 'cpu')
        checkpoint['model'] = checkpoint['model'].module
        checkpoint['model'].network = configs['network']
        checkpoint['model'].get_map = False
        checkpoint['model'].eval()
        # sigmoid = torch.nn.Sigmoid()
        group_lasso = EmpiricalCovariance(assume_centered=False)
        layer_names = {}

        # ------------------------------------------------------
        def get_mean_precision(loader):

            print('building hook function...')
            features = {}

            def hook(layer, inp, out):
                name = layer_names[layer]
                if name not in features:
                    features[name] = out.detach().data.view(
                        out.size(0), out.size(1), -1).mean(dim=-1)
                else:
                    features[name] = torch.cat(
                        (features[name], out.detach().data.view(
                            out.size(0), out.size(1), -1).mean(dim=-1)),
                        dim=0)

            handles = checkpoint['model'].register_forward_hooks(
                checkpoint['model'], hook, layer_names)

            start = time.time()
            with torch.set_grad_enabled(False):
                for idx, data in enumerate(loader):
                    imgs = data[0].cuda().float()
                    _ = data[1]
                    _ = checkpoint['model'](imgs)
                    speed = configs['batchsize'] * idx // (time.time() - start)
                    print('Iter:',
                          idx,
                          'Speed:',
                          int(speed),
                          'img/s',
                          end='\r',
                          flush=True)
                    if idx > 20:
                        break
            print('Total time:', time.time() - start, 'secs')

            print('calculating sample mean...')
            mean = {}
            precision = {}
            for key, value in features.items():
                mean[key] = value.mean(dim=0)
                features[key] -= mean[key]
                group_lasso.fit(features[key].cpu().numpy())
                precision[key] = torch.from_numpy(
                    group_lasso.precision_).float().cuda()

            for handle in handles:
                handle.remove()
            return mean, precision

        train_mean, train_precision = get_mean_precision(train_loader)

        # ------------------------------------------------------
        def get_mahalanobis_score(loader: Any, features: Any,
                                  magnitude: float):

            scores = {}
            gaussian = {}
            for layer, name in layer_names.items():
                checkpoint['optimizer'].zero_grad()

                def hook(layer, inp, out):
                    zero_feat = out.view(out.size(0), out.size(1),
                                         -1).mean(dim=-1) - train_mean[name]
                    gaussian[name] = -0.5 * torch.mm(
                        torch.mm(zero_feat, train_precision[name]),
                        zero_feat.t()).diag()

                handle = layer.register_forward_hook(hook)

                start = time.time()
                for idx, data in enumerate(loader):
                    with torch.set_grad_enabled(True):
                        imgs = data[1].cuda().float()
                        imgs.requires_grad = True
                        _ = checkpoint['model'](imgs)

                        loss = gaussian[name].mean()
                        loss.backward()

                        gradient = torch.ge(imgs.grad.data, 0)
                        gradient = (gradient.float() - 0.5) * 2

                    with torch.set_grad_enabled(False):
                        noisy_imgs = torch.add(imgs.data,
                                               gradient,
                                               alpha=-magnitude)
                        _ = checkpoint['model'](noisy_imgs)
                        if name not in scores:
                            scores[name] = gaussian[name].detach().data
                        else:
                            scores[name] = torch.cat(
                                (scores[name], gaussian[name].detach().data),
                                dim=0)
                        print(scores[name].mean())

                    checkpoint['optimizer'].zero_grad()
                    speed = configs['batchsize'] * idx // (time.time() - start)
                    print(name,
                          'Iter:',
                          idx,
                          'Speed:',
                          int(speed),
                          'img/s',
                          end='\r',
                          flush=True)

                handle.remove()
                print()
            return scores

        print('get mahalanobis scores...')
        magnitudes = [0.0, 0.01, 0.005, 0.002, 0.0014, 0.001, 0.0005]
        maha_valid_scores = {}
        maha_out_scores = {}
        for magnitude in magnitudes:
            print('Noise:', magnitude)
            print('Data - Assumed negative class:', configs['data'])
            maha_valid_scores[magnitude] = get_mahalanobis_score(
                valid_loader, layer_names, magnitude)
            print('Data - Assumed positive class:', out_data)
            maha_out_scores[magnitude] = get_mahalanobis_score(
                out_loader, layer_names, magnitude)
            print()

        print('merge mahalanobis scores...')
Beispiel #2
0
def main(debug: bool,
         data: str,
         column: str,
         extension: str,
         classes: int,
         augmentation: bool,
         network: str,
         depth: int,
         wildcat: bool,
         pretrained: bool,
         optim: str,
         resume: bool,
         name: str,
         lr: float,
         batchsize: int,
         workers: int,
         patience: int,
         balanced: bool,
         maps: int,
         alpha: float,
         variable_type: str,
         error_range: int,
         ID: str):

    # ------------------------------------------------------
    print('Initializing model...', end='')
    if resume:
        assert len(wsl_model_dir.glob(f'*{name}')) == 1
        full_mname = wsl_model_dir.glob(f'*{name}')[0]
        mname = str(full_mname).split('_')[-1]
    else:
        if debug:
            mname = 'debug'

        elif ID == 'placeholder':
            try:
                # Get a random word to use as a more readable name
                response = requests.get("https://random-word-api.herokuapp.com/word")
                assert response.status_code == 200
                mname = response.json()[0]
            except Exception:
                # As a fallback use the date and time
                mname = datetime.datetime.now().strftime('%d_%m_%H_%M_%S')

        else:
            mname = ID

        full_mname = (data + '_' + column + '_' +
                      f'lr{lr}_bs{batchsize}_{optim}' +
                      ('_pre' if pretrained else '') +
                      ('_bal' if balanced else '') + '_' +
                      f'{network}{depth}' +
                      (f'_wildcat_maps{maps}_alpha{alpha}' if wildcat else '') + '_' +
                      mname)

        model_dir = wsl_model_dir / full_mname
    print('done')
    print('Model Name:', mname)

    # ------------------------------------------------------
    print('Initializing loaders...', end='', flush=True)
    print('train...', end='', flush=True)
    train_dataset = Loader(data,
                           split='train',
                           extension=extension,
                           classes=classes,
                           column=column,
                           variable_type=variable_type,
                           augmentation=augmentation,
                           debug=debug)
    train_loader = DataLoader(  # type: ignore
        train_dataset, batch_size=batchsize, num_workers=workers, shuffle=True
    )

    print('test...', end='', flush=True)
    test_dataset = Loader(data,
                          split='valid',
                          extension=extension,
                          classes=classes,
                          column=column,
                          variable_type=variable_type,
                          debug=debug)
    test_loader = DataLoader(  # type: ignore
        test_dataset, batch_size=batchsize, num_workers=workers, shuffle=True
    )
    print('done')

    # ------------------------------------------------------
    print('Initializing optim/criterion...', end='')
    if resume:
        checkpoint = torch.load(full_mname / 'best.pt', map_location='cuda:0' if torch.cuda.is_available() else 'cpu')
    else:
        if variable_type == 'continous':
            criterion = nn.MSELoss()
        elif variable_type == 'categorical':
            criterion = nn.CrossEntropyLoss()
        elif variable_type == 'binary':
            if balanced:
                criterion = nn.BCEWithLogitsLoss(pos_weight=torch.Tensor(train_dataset.pos_weight))
            else:
                criterion = nn.BCEWithLogitsLoss()
        else:
            raise ValueError('Variable type should be one of binary/categorical/continous.')
        criterion = criterion.cuda()

        model = Architecture(network, depth, wildcat, classes, maps, alpha, pretrained)
        model = nn.DataParallel(model).cuda()

        if optim == 'sgd':
            optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.1, weight_decay=1e-4)
        elif optim == 'adam':
            optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.999))
        else:
            raise ValueError(f'{optim} is not supported')

        checkpoint = {
            'model': model,
            'optimizer': optimizer,
            'criterion': criterion,
            'epoch': 0,
            'loss': 100,
            'train_loss_all': [],
            'test_loss_all': [],
            'train_rmetric_all': [],
            'test_rmetric_all': []
        }

    best_epoch = checkpoint['epoch']
    best_loss = checkpoint['loss']
    print('done')
    # ------------------------------------------------------

    while (checkpoint['epoch'] - best_epoch <= patience) and checkpoint['epoch'] < 150:
        start = time.time()
        checkpoint['epoch'] += 1
        print('Epoch:', checkpoint['epoch'], '-Training')
        checkpoint['model'].train()
        checkpoint['loss'], rmetric, summary_train = engine(train_loader, checkpoint, batchsize,
                                                            classes, variable_type, error_range, is_train=True)
        checkpoint['train_loss_all'].append(checkpoint['loss'])
        checkpoint['train_rmetric_all'].append(rmetric)

        print('Epoch:', checkpoint['epoch'], '-Testing')
        checkpoint['model'].eval()
        checkpoint['loss'], rmetric, summary_test = engine(test_loader, checkpoint, batchsize,
                                                           classes, variable_type, error_range, is_train=False)
        checkpoint['test_loss_all'].append(checkpoint['loss'])
        checkpoint['test_rmetric_all'].append(rmetric)

        os.makedirs(model_dir, exist_ok=True)
        torch.save(checkpoint, model_dir / 'current.pt')

        if best_loss > checkpoint['loss']:
            print('Best model updated')
            best_loss = checkpoint['loss']
            best_epoch = checkpoint['epoch']
            torch.save(checkpoint, model_dir / 'best.pt')
        else:
            print('Best model unchanged- Epoch:', best_epoch, 'Loss:', best_loss)

        with open(model_dir / 'summary.txt', 'a+') as file:
            epoch = checkpoint['epoch']
            file.write(f'Epoch: {epoch} \n Train:{summary_train} \n Test:{summary_test}')

        plt.figure(figsize=(12, 18))
        plt.subplot(2, 1, 1)
        plt.plot(checkpoint['train_loss_all'], label='Train loss')
        plt.plot(checkpoint['test_loss_all'], label='Valid loss')
        plt.legend()
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.subplot(2, 1, 2)
        plt.plot(checkpoint['train_rmetric_all'], label='Train rmetric')
        plt.plot(checkpoint['test_rmetric_all'], label='Test rmetric')
        plt.legend()
        plt.xlabel('Epoch')
        plt.ylabel('rmetric')
        plt.savefig(model_dir / 'graphs.png', dpi=300)
        plt.close()

        print('Time taken:', int(time.time() - start), 'secs')

        if debug:
            print('Breaking early since we are in debug mode')
            print('You can find the trained model at -', model_dir)
            break

    configs = {
        'name': mname,
        'time': datetime.datetime.now().strftime('%d_%m_%H_%M_%S'),
        'data': data,
        'column': column,
        'extension': extension,
        'classes': classes,
        'network': network,
        'depth': depth,
        'wildcat': wildcat,
        'pretrained': pretrained,
        'optim': optim,
        'learning_rate': lr,
        'batchsize': batchsize,
        'balanced': balanced,
        'maps': maps if wildcat else None,
        'alpha': alpha if wildcat else None,
        'variable_type': variable_type,
        'error_range': error_range if variable_type == 'continous' else None,
        'best_epoch': best_epoch,
        'best_loss': best_loss,
        'rmetric': checkpoint['test_rmetric_all'][best_epoch - 1],
    }
    with open(model_dir / 'configs.json', 'w') as fp:
        json.dump(configs, fp)
    return
Beispiel #3
0
def main(debug: bool, data: str, column: str, extension: str, classes: int,
         depth: int, pretrained: bool, optim: str, resume: bool, results: bool,
         name: str, lr: float, batchsize: int, workers: int, patience: int,
         ID: str):

    # ------------------------------------------------------
    if resume:
        matching_models = list(wsl_model_dir.glob(f'*{name}'))
        assert len(matching_models) == 1
        model_dir = matching_models[0]
        mname = str(model_dir).split('_')[-1]
        print(mname)

    elif results:
        if 'retinanet' in name:
            matching_models = list(
                wsl_model_dir.glob(f'*{name}*/configs.json'))
        else:
            matching_models = list(
                wsl_model_dir.glob(f'*retinanet*{name}*/configs.json'))
        model_dirs = [model_dir.parent for model_dir in matching_models]

    else:
        print('Initializing model...', end='')
        if debug:
            mname = 'debug'

        elif ID == 'placeholder':
            try:
                # Get a random word to use as a more readable name
                response = requests.get(
                    "https://random-word-api.herokuapp.com/word")
                assert response.status_code == 200
                mname = response.json()[0]
            except Exception:
                # As a fallback use the date and time
                mname = datetime.datetime.now().strftime('%d_%m_%H_%M_%S')

        else:
            mname = ID

        full_mname = (data + '_' + column + '_' +
                      f'lr{lr}_bs{batchsize}_{optim}' + '_' +
                      ('pre_' if pretrained else '') + f'retinanet{depth}' +
                      '_' + mname)

        model_dir = wsl_model_dir / full_mname
        print('done')
        print('Model Name:', mname)

    # ------------------------------------------------------
    print('Initializing loaders...', end='', flush=True)
    print('train...', end='', flush=True)
    train_dataset = Loader(data,
                           split='train',
                           extension=extension,
                           classes=classes,
                           column=column,
                           debug=debug)
    train_loader = DataLoader(  # type: ignore
        train_dataset,
        batch_size=batchsize,
        num_workers=workers,
        pin_memory=True,
        shuffle=True)

    print('test...', end='', flush=True)
    test_dataset = Loader(data,
                          split='valid',
                          extension=extension,
                          classes=classes,
                          column=column,
                          debug=debug)
    test_loader = DataLoader(  # type: ignore
        test_dataset,
        batch_size=batchsize,
        num_workers=workers,
        pin_memory=True,
        shuffle=True)
    print('done')

    if classes > 1:
        print('Class List: ', train_dataset.class_names)

    # ------------------------------------------------------

    if results:
        for model_dir in model_dirs:
            print('Initializing optim/checkpoint...')
            checkpoint = torch.load(
                model_dir / 'best.pt',
                map_location='cuda:0' if torch.cuda.is_available() else 'cpu')
            with open(model_dir / 'configs.json') as f:
                configs = json.load(f)
            print('Calculating box results...')
            checkpoint['model'].eval()
            df, rmetric, wild_metric, pointwise = engine_boxes(
                test_dataset, checkpoint)
            df.to_csv(model_dir / 'results.csv')

            configs['rmetric'] = rmetric
            configs['wild_metric'] = wild_metric
            configs['utility'] = wild_metric
            configs['pointwise'] = pointwise

            with open(model_dir / 'configs.json', 'w') as fp:
                json.dump(configs, fp)
            print('Finished:', rmetric, wild_metric, pointwise)
            print(
                f'You can find the calculated results at - {model_dir}/results.csv'
            )
        return

    # ------------------------------------------------------
    print('Initializing optim/checkpoint...', end='')
    if resume or results:
        checkpoint = torch.load(
            model_dir / 'best.pt',
            map_location='cuda:0' if torch.cuda.is_available() else 'cpu')
    else:
        if depth == 18:
            model = architecture.resnet18(classes, pretrained)
        elif depth == 34:
            model = architecture.resnet34(classes, pretrained)
        elif depth == 50:
            model = architecture.resnet50(classes, pretrained)
        elif depth == 101:
            model = architecture.resnet101(classes, pretrained)
        elif depth == 152:
            model = architecture.resnet152(classes, pretrained)
        else:
            raise ValueError(
                'Unsupported model depth, must be one of 18, 34, 50, 101, 152')
        model = nn.DataParallel(model).cuda()

        if optim == 'sgd':
            optimizer = torch.optim.SGD(model.parameters(),
                                        lr=lr,
                                        momentum=0.1,
                                        weight_decay=1e-4)
        elif optim == 'adam':
            optimizer = torch.optim.Adam(model.parameters(),
                                         lr=lr,
                                         betas=(0.9, 0.999))
        else:
            raise ValueError(f'{optim} is not supported')

        checkpoint = {
            'model': model,
            'optimizer': optimizer,
            'epoch': 0,
            'loss': 100,
            'train_loss_all': [],
            'test_loss_all': [],
            'train_class_loss_all': [],
            'test_class_loss_all': [],
            'train_reg_loss_all': [],
            'test_reg_loss_all': []
        }

    best_epoch = checkpoint['epoch']
    best_loss = checkpoint['loss']
    print('done')

    # ------------------------------------------------------

    while (checkpoint['epoch'] - best_epoch <=
           patience) and checkpoint['epoch'] < 150:
        start = time.time()
        checkpoint['epoch'] += 1
        print('Epoch:', checkpoint['epoch'], '-Training')
        checkpoint['model'].train()
        class_loss, reg_loss, checkpoint['loss'], summary_train = engine(
            train_loader, checkpoint, batchsize, classes, is_train=True)
        checkpoint['train_loss_all'].append(checkpoint['loss'])
        checkpoint['train_class_loss_all'].append(class_loss)
        checkpoint['train_reg_loss_all'].append(reg_loss)

        print('Epoch:', checkpoint['epoch'], '-Testing')
        # checkpoint['model'].eval() - In eval mode model prints boxes
        class_loss, reg_loss, checkpoint['loss'], summary_test = engine(
            test_loader, checkpoint, batchsize, classes, is_train=False)
        checkpoint['test_loss_all'].append(checkpoint['loss'])
        checkpoint['test_class_loss_all'].append(class_loss)
        checkpoint['test_reg_loss_all'].append(reg_loss)

        os.makedirs(model_dir, exist_ok=True)
        torch.save(checkpoint, model_dir / 'current.pt')

        if best_loss > checkpoint['loss']:
            print('Best model updated')
            best_loss = checkpoint['loss']
            best_epoch = checkpoint['epoch']
            torch.save(checkpoint, model_dir / 'best.pt')
        else:
            print('Best model unchanged- Epoch:', best_epoch, 'Loss:',
                  best_loss)

        with open(model_dir / 'summary.txt', 'a+') as file:
            epoch = checkpoint['epoch']
            file.write(
                f'Epoch: {epoch} \n Train:{summary_train} \n Test:{summary_test}'
            )

        plt.figure(figsize=(12, 18))
        plt.subplot(3, 1, 1)
        plt.plot(checkpoint['train_loss_all'], label='Train loss')
        plt.plot(checkpoint['test_loss_all'], label='Valid loss')
        plt.legend()
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.subplot(3, 1, 2)
        plt.plot(checkpoint['train_class_loss_all'], label='Train Class loss')
        plt.plot(checkpoint['test_class_loss_all'], label='Valid Class loss')
        plt.legend()
        plt.xlabel('Epoch')
        plt.ylabel('Class Loss'),
        plt.subplot(3, 1, 3)
        plt.plot(checkpoint['train_reg_loss_all'], label='Train Reg loss')
        plt.plot(checkpoint['test_reg_loss_all'], label='Valid Reg loss')
        plt.legend()
        plt.xlabel('Epoch')
        plt.ylabel('Reg Loss')
        plt.savefig(model_dir / 'graphs.png', dpi=300)
        plt.close()

        print('Time taken:', int(time.time() - start), 'secs')

        if debug:
            print('Breaking early since we are in debug mode')
            print('You can find the trained model at -', model_dir)
            break

    checkpoint = torch.load(
        model_dir / 'best.pt',
        map_location='cuda:0' if torch.cuda.is_available() else 'cpu')
    df, rmetric, wild_metric, pointwise = engine_boxes(test_dataset,
                                                       checkpoint)
    df.to_csv(model_dir / 'results.csv')
    print('Finished:', rmetric)
    print(f'You can find the calculated results at - {model_dir}/results.csv')

    configs = {
        'name': mname,
        'time': datetime.datetime.now().strftime('%d_%m_%H_%M_%S'),
        'data': data,
        'column': column,
        'extension': extension,
        'classes': classes,
        'network': 'retinanet',
        'depth': depth,
        'pretrained': pretrained,
        'optim': optim,
        'learning_rate': lr,
        'batchsize': batchsize,
        'best_epoch': best_epoch,
        'best_loss': best_loss,
        'rmetric': rmetric,
        'wild_metric': wild_metric,
        'pointwise': pointwise
    }
    with open(model_dir / 'configs.json', 'w') as fp:
        json.dump(configs, fp)
    return
Beispiel #4
0
def main(name: str, start: int, plot: bool):

    if name == 'all':
        model_dirs = wsl_model_dir.glob('rsna*')
    else:
        if 'rsna' in name:
            model_dirs = wsl_model_dir.glob(f'*{name}*')
        else:
            model_dirs = wsl_model_dir.glob(f'rsna*{name}*')

    model_dirs = list(model_dirs)
    num_model_dirs = 50
    print(f'Number of potential model directory matches = {len(model_dirs)}, but doing top {num_model_dirs} models for now.')
    model_dirs = model_dirs[start:start + num_model_dirs]

    if plot:
        ncolors = 256
        color_array = plt.get_cmap('viridis')(range(ncolors))
        # change alpha values
        color_array[:, -1] = np.linspace(1.0, 0.0, ncolors)

    for m_idx, model_dir in enumerate(model_dirs):

        if 'debug' in str(model_dir):  # Debugging model
            print('Debugging model')
            continue

        elif not (model_dir / 'configs.json').exists():  # Model not completed
            print('Model not completed')
            continue

        else:
            with open(model_dir / 'configs.json') as f:
                configs = json.load(f)
            dataset = Loader(data=configs['data'],
                             split='test',
                             extension=configs['extension'],
                             classes=configs['classes'],
                             column=configs['column'],
                             variable_type=configs['variable_type'])
            print('Number of images -', len(dataset))

        print(f'Model {m_idx} : {model_dir}')

        if configs['data'] in known_tasks:
            task = known_tasks[configs['data']]

        checkpoint = torch.load(model_dir / 'best.pt', map_location='cuda:0' if torch.cuda.is_available() else 'cpu')
        checkpoint['model'].gradient = None
        checkpoint['model'].eval()

        # currently hardcoded, should ideally be inferred from image
        org_size = (1024, 1024)
        new_size = (224, 224)

        all_scores = defaultdict(list)

        GD = BackProp(checkpoint['model'])
        GBP = BackProp(checkpoint['model'], True)

        start_time = time.time()

        for idx, data in enumerate(dataset):
            checkpoint['model'].zero_grad()
            name, img, label = data
            label = label.squeeze().cuda()

            if label != 1:
                continue

            # Make the ground map
            if task == 'detect':
                ground_map = box_to_map(dataset.df[dataset.df.Id == name].box.to_list(), np.zeros(org_size))
            elif task == 'segment':
                ground_map = np.zeros(org_size)
                eps = dataset.df[dataset.df.Id == name].EncodedPixels.to_list()
                for ep in eps:
                    ground_map += rle2mask(ep, np.zeros(org_size)).T

            ground_map = cv2.resize(ground_map, new_size, interpolation=cv2.INTER_NEAREST).clip(0, 1)

            # Make the saliency map
            checkpoint['model'].get_map = True
            if configs['wildcat']:
                _, _, wild, handle = checkpoint['model'](img.unsqueeze(dim=0).cuda().float())
                handle.remove()
                wild = torch.max(wild, dim=1)[0]
                wild = wild.squeeze().cpu().data.numpy()
                wild = (wild - wild.min()) / (wild.max() - wild.min())
                wild = cv2.resize(wild, new_size, interpolation=cv2.INTER_NEAREST)
            else:
                wild = np.zeros_like(ground_map)

            gcam = GD.generate_cam(img.unsqueeze(dim=0).cuda().float()).squeeze()

            checkpoint['model'].get_map = False
            grad = GD.generate_gradients(img.unsqueeze(dim=0).cuda().float())
            ig = GD.generate_integrated_gradients(img.unsqueeze(dim=0).cuda().float(), 25)

            sg = GD.generate_smooth_grad(img.unsqueeze(dim=0).cuda().float(), 5, 0.1, 0)
            sig = GD.generate_smooth_grad(img.unsqueeze(dim=0).cuda().float(), 5, 0.1, 10)

            gbp = GBP.generate_gradients(img.unsqueeze(dim=0).cuda().float())
            ggcam = np.multiply(gcam, gbp)

            all_scores['WILD'].append(aupr(ground_map.flatten(), wild.flatten()))
            all_scores['GRAD'].append(aupr(ground_map.flatten(), grad.flatten()))
            all_scores['SG'].append(aupr(ground_map.flatten(), sg.flatten()))
            all_scores['IG'].append(aupr(ground_map.flatten(), ig.flatten()))
            all_scores['SIG'].append(aupr(ground_map.flatten(), sig.flatten()))
            all_scores['GBP'].append(aupr(ground_map.flatten(), gbp.flatten()))
            all_scores['GCAM'].append(aupr(ground_map.flatten(), gcam.flatten()))
            all_scores['GGCAM'].append(aupr(ground_map.flatten(), ggcam.flatten()))

            if plot:
                row, col = 2, 5
                map_names = [['XRAY', 'WILD', 'GRAD', 'GCAM', 'GGCAM'], ['MASK', 'GBP', 'SG', 'IG', 'SIG']]
                maps = [[img, wild, grad, gcam, ggcam], [ground_map, gbp, sg, ig, sig]]
                x = LinearSegmentedColormap.from_list(name='rainbow', colors=color_array)
                plt.register_cmap(cmap=x)

                fig, ax = plt.subplots(row, col, figsize=(18, 8))
                for i in range(row):
                    for j in range(col):
                        ax[i, j].imshow(np.transpose(img, (1, 2, 0)))
                        if not (i == 0 and j == 0):
                            ax[i, j].imshow(maps[i][j], alpha=0.8, cmap='rainbow')
                        ax[i, j].text(0, 220, map_names[i][j], fontsize='x-large', color='white', weight='bold', bbox=dict(fill=True, linewidth=0))
                        ax[i, j].axis('off')
                plt.subplots_adjust(wspace=0.05, hspace=0.05)
                plt.savefig(f'{wsl_plot_dir}/saliency_{name}.png', dpi=300, bbox_inches='tight')
                plt.show()
                plt.close()

            print_str = f'{idx}: | '
            for key, value in all_scores.items():
                print_str += f'{key}-{int(np.mean(value) * 100)} | '
            print_str += str(round((time.time() - start_time) / (idx + 1), 2)) + ' s/img'
            print(print_str, end='\r')

        for key in all_scores.keys():
            configs[key] = np.mean(all_scores[key])
            print(key, ' ', configs[key])

        with open(model_dir / 'configs.json', 'w') as fp:
            json.dump(configs, fp)
Beispiel #5
0
def main(name: str, task: str):

    if name == 'all':
        models = wsl_model_dir.glob('*')
    else:
        models = wsl_model_dir.glob(f'*{name}*')

    models = list(models)
    models.reverse()
    print('Number of potential model matches =', len(models))
    all_configs = []

    for m, path in enumerate(models):

        if 'debug' in str(path):  # Debugging model
            print('Debugging model')
            continue
        elif 'wild' not in str(path):
            continue
        elif not (path / 'configs.json').exists():  # Model not completed
            print('Model not completed')
            continue
        else:
            with open(path / 'configs.json') as f:
                configs = json.load(f)
                print(configs)
            dataset = Loader(data=configs['data'],
                             split='valid',
                             extension=configs['extension'],
                             classes=configs['classes'],
                             column=configs['column'],
                             regression=configs['regression'])

        print(f'Model {m} : {path}')
        if configs['data'] in known_tasks:
            task = known_tasks[configs['data']]

        checkpoint = torch.load(
            path / 'best.pt',
            map_location='cuda:0' if torch.cuda.is_available() else 'cpu')
        checkpoint['model'] = checkpoint['model'].module
        checkpoint['model'].get_map = True
        checkpoint['model'].eval()

        org_size = (1024, 1024)
        new_size = (224, 224)
        sigmoid = torch.nn.Sigmoid().cuda()
        all_scores = []

        with torch.set_grad_enabled(False):
            for idx, data in enumerate(dataset):
                img, label = data
                name = dataset.names[idx]
                labels = dataset.labels[idx]

                predicted_map = checkpoint['model'](
                    img.unsqueeze(dim=0).cuda().float()).squeeze(dim=0)
                predicted_map = sigmoid(
                    predicted_map.sum(dim=0)).cpu().data.numpy()

                score = []
                for i, label in enumerate(labels):
                    if label == 0:
                        continue

                    if task == 'detect':
                        ground_map = box_to_map(
                            dataset.df[dataset.df.Id == name].box.to_list(),
                            np.zeros(org_size))
                    elif task == 'segment':
                        ground_map = np.zeros(org_size)
                        eps = dataset.df[dataset.df.Id ==
                                         name].EncodedPixels.to_list()
                        for ep in eps:
                            ground_map += rle2mask(ep, np.zeros(org_size)).T
                    else:
                        print('Ground truth not available.')

                    # plt.imshow(ground_map)
                    ground_map = cv2.resize(
                        ground_map, new_size,
                        interpolation=cv2.INTER_NEAREST).clip(0, 1)
                    re_pred_map = cv2.resize(predicted_map[i],
                                             new_size,
                                             interpolation=cv2.INTER_AREA)
                    score.append(
                        roc_auc_score(ground_map.flatten(),
                                      re_pred_map.flatten()))

                all_scores += score
                if (len(all_scores) + 1) % 32 == 0:
                    print('Idx:', idx, 'Mean:', np.mean(all_scores), end='\r')

        configs['wild_metric'] = np.mean(all_scores)
        print('Wild Metric:', configs['wild_metric'])
        with open(path / 'configs.json', 'w') as fp:
            json.dump(configs, fp)
Beispiel #6
0
def main(store: bool = False):
    models = wsl_model_dir.glob('*')
    all_configs = []
    for idx, path in enumerate(models):

        if 'debug' in str(path):  # Debugging model
            continue
        elif 'wildcat' not in str(path):  # Model is not wildcat
            continue
        elif not (path / 'configs.json').exists():  # Model not completed
            continue
        else:
            with open(path / 'configs.json') as f:
                configs = json.load(f)
            dataset = Loader(data=configs['data'],
                             split='valid',
                             extension=configs['extension'],
                             classes=configs['classes'],
                             col_name=configs['column'],
                             regression=configs['regression'])
            print(configs)

        print(f'Model {idx} : {path}')

        checkpoint = torch.load(
            path / 'best.pt',
            map_location='cuda:0' if torch.cuda.is_available() else 'cpu')
        checkpoint['model'] = checkpoint['model'].module
        checkpoint['model'].get_map = True
        checkpoint['model'].eval()

        org_size = (1024, 1024)
        new_size = (224, 224)
        sigmoid = torch.nn.Sigmoid().cuda()
        all_scores = []

        with torch.set_grad_enabled(False):
            for idx, data in enumerate(dataset):
                img, label = data
                name = dataset.names[idx]
                labels = dataset.labels[idx]

                predicted_map = checkpoint['model'](
                    img.unsqueeze(dim=0).cuda().float()).squeeze(dim=0)
                predicted_map = sigmoid(
                    predicted_map.sum(dim=0)).cpu().data.numpy()

                score = []
                for i, label in enumerate(labels):
                    if label == 0:
                        continue

                    ground_map = box_to_map(
                        dataset.df[dataset.df.Id == name].to_dict(
                            orient='row'), configs['column'], org_size,
                        new_size)
                    re_pred_map = cv2.resize(predicted_map[i],
                                             new_size,
                                             interpolation=cv2.INTER_AREA)
                    score.append(
                        roc_auc_score(ground_map.flatten(),
                                      re_pred_map.flatten()))

                all_scores += score
                if (len(all_scores) + 1) % 32 == 0:
                    print('Mean:', np.mean(all_scores), end='\r')

            configs['wild'] = np.mean(all_scores)

        all_configs.append(configs)

    df = pd.DataFrame.from_dict(all_configs)
    print(df)
    time = datetime.datetime.now().strftime('%H_%d_%m')
    if store:
        df.to_csv(wsl_summary_dir / f'wild_{time}')
Beispiel #7
0
def main(name: str, start: int, plot: bool):

    if name == 'all':
        model_dirs = wsl_model_dir.glob('*')
    else:
        model_dirs = wsl_model_dir.glob(f'*{name}*')

    model_dirs = list(model_dirs)
    model_dirs = model_dirs[start:]

    print('Number of potential model directory matches =', len(model_dirs))

    if plot:
        ncolors = 256
        color_array = plt.get_cmap('viridis')(range(ncolors))
        # change alpha values
        color_array[:, -1] = np.linspace(1.0, 0.0, ncolors)

    for m_idx, model_dir in enumerate(model_dirs):

        if 'debug' in str(model_dir):  # Debugging model
            print('Debugging model')
            continue

        elif not (model_dir / 'configs.json').exists():  # Model not completed
            print('Model not completed')
            continue

        else:
            with open(model_dir / 'configs.json') as f:
                configs = json.load(f)
            dataset = Loader(data=configs['data'],
                             split='test',
                             extension=configs['extension'],
                             classes=configs['classes'],
                             column=configs['column'],
                             regression=configs['regression'])
            print('Number of images -', len(dataset))

        print(f'Model {m_idx} : {model_dir}')

        if configs['data'] in known_tasks:
            task = known_tasks[configs['data']]

        checkpoint = torch.load(
            model_dir / 'best.pt',
            map_location='cuda:0' if torch.cuda.is_available() else 'cpu')
        checkpoint['model'] = checkpoint['model'].module
        checkpoint['model'].gradient = None
        checkpoint['model'].eval()

        org_size = (1024, 1024)
        new_size = (224, 224)

        all_scores = defaultdict(list)

        GD = BackProp(checkpoint['model'])
        GBP = BackProp(checkpoint['model'], True)

        start_time = time.time()

        for idx, data in enumerate(dataset):
            print(
                f'{idx} speed-{(time.time() - start_time) // (idx + 1)} s/img',
                end='\r')
            checkpoint['model'].zero_grad()
            name, img, label = data
            label = label.squeeze().cuda()

            if label != 1:
                continue

            # Make the ground map
            if task == 'detect':
                ground_map = box_to_map(
                    dataset.df[dataset.df.Id == name].box.to_list(),
                    np.zeros(org_size))
            elif task == 'segment':
                ground_map = np.zeros(org_size)
                eps = dataset.df[dataset.df.Id == name].EncodedPixels.to_list()
                for ep in eps:
                    ground_map += rle2mask(ep, np.zeros(org_size)).T

            ground_map = cv2.resize(ground_map,
                                    new_size,
                                    interpolation=cv2.INTER_NEAREST).clip(
                                        0, 1)

            # Make the saliency map
            if configs['wildcat']:
                checkpoint['model'].get_map = True
                with torch.set_grad_enabled(False):
                    wild, _, handle = checkpoint['model'](
                        img.unsqueeze(dim=0).cuda().float())
                    handle.remove_hook()
                    wild = wild.squeeze().cpu().data.numpy()
                wild = (wild - wild.min()) / (wild.max() - wild.min())
                wild = cv2.resize(wild,
                                  new_size,
                                  interpolation=cv2.INTER_NEAREST)

                all_scores['WILD'].append(
                    aupr(ground_map.flatten(), wild.flatten()))

                if plot:
                    plt.figure(figsize=(4, 12))
                    x = LinearSegmentedColormap.from_list(name='rainbow',
                                                          colors=color_array)
                    plt.register_cmap(cmap=x)
                    plt.subplot(1, 3, 1)
                    plt.imshow(np.transpose(img, (1, 2, 0)))
                    plt.subplot(1, 3, 2)
                    plt.imshow(ground_map, alpha=0.8, cmap='rainbow')
                    plt.subplot(1, 3, 2)
                    plt.imshow(wild, alpha=0.8, cmap='rainbow')
                    plt.savefig(f'{wsl_plot_dir}/wild_{name}.png',
                                dpi=300,
                                bbox_inches='tight')
                    plt.show()
                    plt.close()

            else:
                checkpoint['model'].get_map = False
                grad = GD.generate_gradients(
                    img.unsqueeze(dim=0).cuda().float())
                ig = GD.generate_integrated_gradients(
                    img.unsqueeze(dim=0).cuda().float(), 100)

                sg = GD.generate_smooth_grad(
                    img.unsqueeze(dim=0).cuda().float(), 5, 0.3, 0)
                sig = GD.generate_smooth_grad(
                    img.unsqueeze(dim=0).cuda().float(), 5, 0.3, 0)

                gbp = GBP.generate_gradients(
                    img.unsqueeze(dim=0).cuda().float())

                checkpoint['model'].get_map = True
                gcam = GD.generate_cam(
                    img.unsqueeze(dim=0).cuda().float()).squeeze()
                ggcam = np.multiply(gcam, gbp)

                all_scores['GRAD'].append(
                    aupr(ground_map.flatten(), grad.flatten()))
                all_scores['SG'].append(
                    aupr(ground_map.flatten(), sg.flatten()))
                all_scores['IG'].append(
                    aupr(ground_map.flatten(), ig.flatten()))
                all_scores['SIG'].append(
                    aupr(ground_map.flatten(), sig.flatten()))
                all_scores['GBP'].append(
                    aupr(ground_map.flatten(), gbp.flatten()))
                all_scores['GCAM'].append(
                    aupr(ground_map.flatten(), gcam.flatten()))
                all_scores['GGCAM'].append(
                    aupr(ground_map.flatten(), ggcam.flatten()))

                if plot:
                    row, col = range(2), range(4)
                    map_names = [['MASK', 'GRAD', 'SG', 'IG'],
                                 ['SIG', 'GCAM', 'GBP', 'GGCAM']]
                    maps = [[ground_map, grad, sg, ig],
                            [sig, gcam, gbp, ggcam]]
                    x = LinearSegmentedColormap.from_list(name='rainbow',
                                                          colors=color_array)
                    plt.register_cmap(cmap=x)

                    fig, ax = plt.subplots(2, 4, figsize=(18, 8))
                    for i in row:
                        for j in col:
                            ax[i, j].imshow(np.transpose(img, (1, 2, 0)))
                            ax[i, j].imshow(maps[i][j],
                                            alpha=0.8,
                                            cmap='rainbow')
                            ax[i, j].text(0,
                                          220,
                                          map_names[i][j],
                                          fontsize='x-large',
                                          color='white',
                                          weight='bold',
                                          bbox=dict(fill=True, linewidth=0))
                            ax[i, j].axis('off')
                    plt.subplots_adjust(wspace=0.05, hspace=0.05)
                    plt.savefig(f'{wsl_plot_dir}/saliency_{name}.png',
                                dpi=300,
                                bbox_inches='tight')
                    plt.show()
                    plt.close()

            del data

        for key in all_scores.keys():
            configs[key] = np.mean(all_scores[key])
            print(key, ' ', configs[key])

        with open(path / 'configs.json', 'w') as fp:
            json.dump(configs, fp)
Beispiel #8
0
def main(name: str,
		 task: str,
		 store: bool):

	if name == 'all':
		models = wsl_model_dir.glob('*')
	else:
		models = wsl_model_dir.glob(f'*{name}*')
	models = list(models)
	device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 

	print('Number of potential model matches =', len(models))
	all_configs = []
	for m, path in enumerate(models):

		if 'debug' in str(path):  # Debugging models
			print('Debugging model')
			continue
		elif 'vgg19' not in str(path):  # Model is not wildcat
			print('Model is not densenet')
			continue
		elif 'wildcat' in str(path):  # Model is not wildcat
			print('Model is wildcat')
			continue
		elif not (path / 'configs.json').exists():  # Model not completed
			print('Model not completed')
			continue
		else:
			with open(path / 'configs.json') as f:
				configs = json.load(f)
				print(configs)
				# if configs['pretrained'] == False:
				# 	continue
			dataset = Loader(data=configs['data'],
							 split='test',
							 extension=configs['extension'],
							 classes=configs['classes'],
							 column=configs['column'],
							 regression=configs['regression'])

		# print(f'Model {m} : {path}')
		try:
			checkpoint = torch.load(path / 'best.pt', map_location='cuda:0' if torch.cuda.is_available() else 'cpu')
		except:
			continue
		# checkpoint = torch.load(path / 'best.pt', map_location='cpu')
		checkpoint['model'] = checkpoint['model'].module
		checkpoint['model'].get_map = True
		checkpoint['model'].eval()
		# summary(checkpoint['model'],(3,224,224))
		org_size = (1024, 1024)
		new_size = (224, 224)
		sigmoid = torch.nn.Sigmoid().cuda()
		all_scores = defaultdict(list)

		VBP = vanilla_backprop.VanillaBackprop(checkpoint['model'])
		IG = integrated_gradients.IntegratedGradients(checkpoint['model'])
		GBP = guided_backprop.GuidedBackprop(checkpoint['model'])
		GCAM = gradcam.GradCam(checkpoint['model'],target_layer=34)
		# with torch.set_grad_enabled(False):
		print(len(dataset))
		for idx, data in tqdm(enumerate(dataset)):
			img, label = data
			name = dataset.names[idx]
			labels = dataset.labels[idx]

			saliency_label = 1

			for i, label in enumerate(labels):
				if label == 0:
					saliency_label = 0
					break

			if saliency_label == 0:
				continue
			# saliency_label = torch.tensor(saliency_label)
			saliency_label = torch.tensor(saliency_label).to(device)

			vanilla_grads = VBP.generate_gradients(img.unsqueeze(dim=0).cuda().float(), saliency_label)
			grayscale_vanilla_grads = vanilla_backprop.convert_to_grayscale(vanilla_grads)
			# print(np.shape(grayscale_vanilla_grads))
			# vanilla_backprop.save_gradient_images(grayscale_vanilla_grads, '/data/2015P002510/nishanth/WSL/wsl/wsl/Example_maps/GRAD')
			integrated_grads = IG.generate_integrated_gradients(img.unsqueeze(dim=0).cuda().float(), saliency_label, 100)
			grayscale_integrated_grads = integrated_gradients.convert_to_grayscale(integrated_grads)
			# vanilla_backprop.save_gradient_images(grayscale_integrated_grads, '/data/2015P002510/nishanth/WSL/wsl/wsl/Example_maps/IG')
			guided_grads = GBP.generate_gradients(img.unsqueeze(dim=0).cuda().float(), saliency_label)
			grayscale_guided_grads = guided_backprop.convert_to_grayscale(guided_grads)
			# vanilla_backprop.save_gradient_images(grayscale_guided_grads, '/data/2015P002510/nishanth/WSL/wsl/wsl/Example_maps/GBP')
			smooth_grad_mask = smooth_grad.generate_smooth_grad(VBP, img.unsqueeze(dim=0).cuda().float(), saliency_label, 5, 0.3)
			grayscale_smooth_grad = smooth_grad.convert_to_grayscale(smooth_grad_mask)

			smooth_grad_mask = smooth_grad.generate_smooth_grad(IG, img.unsqueeze(dim=0).cuda().float(), saliency_label, 5, 0.3)
			grayscale_smooth_ig = smooth_grad.convert_to_grayscale(smooth_grad_mask)
			cam = GCAM.generate_cam(img.unsqueeze(dim=0).cuda().float(), saliency_label)
			# grayscale_cam = guided_backprop.convert_to_grayscale(cam)

			cam_gb = guided_gradcam.guided_grad_cam(cam, guided_grads)
			grayscale_cam_gb = guided_gradcam.convert_to_grayscale(cam_gb)
			# vanilla_backprop.save_gradient_images(cam, '/data/2015P002510/nishanth/WSL/wsl/wsl/Example_maps/GCAM')

			# # Save mask2
			# save_class_activation_images(img, cam, '/data/2015P002510/nishanth/WSL/wsl/wsl/Example_maps/GCAM_color')
			# score = []
			# np.save('/data/2015P002510/nishanth/WSL/wsl/wsl/AUPRC_scores/{}_{}.npy'.format('GRAD','resnet18'),np.zeros((2,2))) #test

			for i, label in enumerate(labels):
				if label == 0:
					continue

				if task == 'detect':
					ground_map = box_to_map(dataset.df[dataset.df.Id == name].box.to_list(),
											np.zeros(org_size))
					ground_map = cv2.resize(ground_map, new_size, interpolation=cv2.INTER_NEAREST).clip(0, 1)

					all_scores['GRAD'].append(aupr(grayscale_vanilla_grads,ground_map))
					all_scores['SG'].append(aupr(grayscale_smooth_grad,ground_map))
					all_scores['IG'].append(aupr(grayscale_integrated_grads,ground_map))
					all_scores['SIG'].append(aupr(grayscale_smooth_ig,ground_map))
					all_scores['GBP'].append(aupr(grayscale_guided_grads,ground_map))
					all_scores['GCAM'].append(aupr(cam,ground_map))
					all_scores['GGCAM'].append(aupr(grayscale_cam_gb,ground_map))
					# all_scores['GRAD'].append(aupr(cv2.resize(grayscale_vanilla_grads, new_size, interpolation=cv2.INTER_AREA),ground_map))
				
				elif task == 'segment':
					ground_map = np.zeros(org_size)
					eps = dataset.df[dataset.df.Id == name].EncodedPixels.to_list()
					for ep in eps:
						ground_map += rle2mask(ep, np.zeros(org_size)).T
				else:
					print('Ground truth not available.')

		for key in all_scores.keys():
			print(key, ' ', np.mean(all_scores[key]))
			np.save('/data/2015P002510/nishanth/WSL/wsl/wsl/AUPRC_scores/{}_{}.npy'.format(key,'vgg_test'),all_scores[key])


		configs['wild'] = np.mean(all_scores)
		
		all_configs.append(configs)

	df = pd.DataFrame.from_dict(all_configs)
	print(df)
	time = datetime.datetime.now().strftime('%H_%d_%m')
	if store:
		df.to_csv(wsl_summary_dir / f'wild_{time}')