def main(out_data: str = 'chexpert'): models = wsl_model_dir.glob('*') # all_configs = [] for idx, path in enumerate(models): if 'debug' in str(path): # Debugging model continue elif not (path / 'configs.json').exists(): # Model not completed continue else: with open(path / 'configs.json') as f: configs = json.load(f) # print(configs) print(f'Model {idx} : {path}') # ------------------------------------------------------ train_dataset = Loader(data=configs['data'], split='train', extension=configs['extension'], classes=configs['classes'], column=configs['column'], regression=configs['regression']) train_loader = DataLoader( # type: ignore train_dataset, batch_size=configs['batchsize'], num_workers=4, pin_memory=True, shuffle=True) valid_dataset = Loader(data=configs['data'], split='valid', extension=configs['extension'], classes=configs['classes'], column=configs['column'], regression=configs['regression']) valid_loader = DataLoader( # type: ignore valid_dataset, batch_size=configs['batchsize'], num_workers=4, pin_memory=True, shuffle=True) out_dataset = Loader(data=out_data, split='valid', extension=configs['extension'], classes=configs['classes'], column=configs['column'], regression=configs['regression']) out_loader = DataLoader( # type: ignore out_dataset, batch_size=configs['batchsize'], num_workers=4, pin_memory=True, shuffle=True) checkpoint = torch.load( path / 'best.pt', map_location='cuda:0' if torch.cuda.is_available() else 'cpu') checkpoint['model'] = checkpoint['model'].module checkpoint['model'].network = configs['network'] checkpoint['model'].get_map = False checkpoint['model'].eval() # sigmoid = torch.nn.Sigmoid() group_lasso = EmpiricalCovariance(assume_centered=False) layer_names = {} # ------------------------------------------------------ def get_mean_precision(loader): print('building hook function...') features = {} def hook(layer, inp, out): name = layer_names[layer] if name not in features: features[name] = out.detach().data.view( out.size(0), out.size(1), -1).mean(dim=-1) else: features[name] = torch.cat( (features[name], out.detach().data.view( out.size(0), out.size(1), -1).mean(dim=-1)), dim=0) handles = checkpoint['model'].register_forward_hooks( checkpoint['model'], hook, layer_names) start = time.time() with torch.set_grad_enabled(False): for idx, data in enumerate(loader): imgs = data[0].cuda().float() _ = data[1] _ = checkpoint['model'](imgs) speed = configs['batchsize'] * idx // (time.time() - start) print('Iter:', idx, 'Speed:', int(speed), 'img/s', end='\r', flush=True) if idx > 20: break print('Total time:', time.time() - start, 'secs') print('calculating sample mean...') mean = {} precision = {} for key, value in features.items(): mean[key] = value.mean(dim=0) features[key] -= mean[key] group_lasso.fit(features[key].cpu().numpy()) precision[key] = torch.from_numpy( group_lasso.precision_).float().cuda() for handle in handles: handle.remove() return mean, precision train_mean, train_precision = get_mean_precision(train_loader) # ------------------------------------------------------ def get_mahalanobis_score(loader: Any, features: Any, magnitude: float): scores = {} gaussian = {} for layer, name in layer_names.items(): checkpoint['optimizer'].zero_grad() def hook(layer, inp, out): zero_feat = out.view(out.size(0), out.size(1), -1).mean(dim=-1) - train_mean[name] gaussian[name] = -0.5 * torch.mm( torch.mm(zero_feat, train_precision[name]), zero_feat.t()).diag() handle = layer.register_forward_hook(hook) start = time.time() for idx, data in enumerate(loader): with torch.set_grad_enabled(True): imgs = data[1].cuda().float() imgs.requires_grad = True _ = checkpoint['model'](imgs) loss = gaussian[name].mean() loss.backward() gradient = torch.ge(imgs.grad.data, 0) gradient = (gradient.float() - 0.5) * 2 with torch.set_grad_enabled(False): noisy_imgs = torch.add(imgs.data, gradient, alpha=-magnitude) _ = checkpoint['model'](noisy_imgs) if name not in scores: scores[name] = gaussian[name].detach().data else: scores[name] = torch.cat( (scores[name], gaussian[name].detach().data), dim=0) print(scores[name].mean()) checkpoint['optimizer'].zero_grad() speed = configs['batchsize'] * idx // (time.time() - start) print(name, 'Iter:', idx, 'Speed:', int(speed), 'img/s', end='\r', flush=True) handle.remove() print() return scores print('get mahalanobis scores...') magnitudes = [0.0, 0.01, 0.005, 0.002, 0.0014, 0.001, 0.0005] maha_valid_scores = {} maha_out_scores = {} for magnitude in magnitudes: print('Noise:', magnitude) print('Data - Assumed negative class:', configs['data']) maha_valid_scores[magnitude] = get_mahalanobis_score( valid_loader, layer_names, magnitude) print('Data - Assumed positive class:', out_data) maha_out_scores[magnitude] = get_mahalanobis_score( out_loader, layer_names, magnitude) print() print('merge mahalanobis scores...')
def main(debug: bool, data: str, column: str, extension: str, classes: int, augmentation: bool, network: str, depth: int, wildcat: bool, pretrained: bool, optim: str, resume: bool, name: str, lr: float, batchsize: int, workers: int, patience: int, balanced: bool, maps: int, alpha: float, variable_type: str, error_range: int, ID: str): # ------------------------------------------------------ print('Initializing model...', end='') if resume: assert len(wsl_model_dir.glob(f'*{name}')) == 1 full_mname = wsl_model_dir.glob(f'*{name}')[0] mname = str(full_mname).split('_')[-1] else: if debug: mname = 'debug' elif ID == 'placeholder': try: # Get a random word to use as a more readable name response = requests.get("https://random-word-api.herokuapp.com/word") assert response.status_code == 200 mname = response.json()[0] except Exception: # As a fallback use the date and time mname = datetime.datetime.now().strftime('%d_%m_%H_%M_%S') else: mname = ID full_mname = (data + '_' + column + '_' + f'lr{lr}_bs{batchsize}_{optim}' + ('_pre' if pretrained else '') + ('_bal' if balanced else '') + '_' + f'{network}{depth}' + (f'_wildcat_maps{maps}_alpha{alpha}' if wildcat else '') + '_' + mname) model_dir = wsl_model_dir / full_mname print('done') print('Model Name:', mname) # ------------------------------------------------------ print('Initializing loaders...', end='', flush=True) print('train...', end='', flush=True) train_dataset = Loader(data, split='train', extension=extension, classes=classes, column=column, variable_type=variable_type, augmentation=augmentation, debug=debug) train_loader = DataLoader( # type: ignore train_dataset, batch_size=batchsize, num_workers=workers, shuffle=True ) print('test...', end='', flush=True) test_dataset = Loader(data, split='valid', extension=extension, classes=classes, column=column, variable_type=variable_type, debug=debug) test_loader = DataLoader( # type: ignore test_dataset, batch_size=batchsize, num_workers=workers, shuffle=True ) print('done') # ------------------------------------------------------ print('Initializing optim/criterion...', end='') if resume: checkpoint = torch.load(full_mname / 'best.pt', map_location='cuda:0' if torch.cuda.is_available() else 'cpu') else: if variable_type == 'continous': criterion = nn.MSELoss() elif variable_type == 'categorical': criterion = nn.CrossEntropyLoss() elif variable_type == 'binary': if balanced: criterion = nn.BCEWithLogitsLoss(pos_weight=torch.Tensor(train_dataset.pos_weight)) else: criterion = nn.BCEWithLogitsLoss() else: raise ValueError('Variable type should be one of binary/categorical/continous.') criterion = criterion.cuda() model = Architecture(network, depth, wildcat, classes, maps, alpha, pretrained) model = nn.DataParallel(model).cuda() if optim == 'sgd': optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.1, weight_decay=1e-4) elif optim == 'adam': optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.999)) else: raise ValueError(f'{optim} is not supported') checkpoint = { 'model': model, 'optimizer': optimizer, 'criterion': criterion, 'epoch': 0, 'loss': 100, 'train_loss_all': [], 'test_loss_all': [], 'train_rmetric_all': [], 'test_rmetric_all': [] } best_epoch = checkpoint['epoch'] best_loss = checkpoint['loss'] print('done') # ------------------------------------------------------ while (checkpoint['epoch'] - best_epoch <= patience) and checkpoint['epoch'] < 150: start = time.time() checkpoint['epoch'] += 1 print('Epoch:', checkpoint['epoch'], '-Training') checkpoint['model'].train() checkpoint['loss'], rmetric, summary_train = engine(train_loader, checkpoint, batchsize, classes, variable_type, error_range, is_train=True) checkpoint['train_loss_all'].append(checkpoint['loss']) checkpoint['train_rmetric_all'].append(rmetric) print('Epoch:', checkpoint['epoch'], '-Testing') checkpoint['model'].eval() checkpoint['loss'], rmetric, summary_test = engine(test_loader, checkpoint, batchsize, classes, variable_type, error_range, is_train=False) checkpoint['test_loss_all'].append(checkpoint['loss']) checkpoint['test_rmetric_all'].append(rmetric) os.makedirs(model_dir, exist_ok=True) torch.save(checkpoint, model_dir / 'current.pt') if best_loss > checkpoint['loss']: print('Best model updated') best_loss = checkpoint['loss'] best_epoch = checkpoint['epoch'] torch.save(checkpoint, model_dir / 'best.pt') else: print('Best model unchanged- Epoch:', best_epoch, 'Loss:', best_loss) with open(model_dir / 'summary.txt', 'a+') as file: epoch = checkpoint['epoch'] file.write(f'Epoch: {epoch} \n Train:{summary_train} \n Test:{summary_test}') plt.figure(figsize=(12, 18)) plt.subplot(2, 1, 1) plt.plot(checkpoint['train_loss_all'], label='Train loss') plt.plot(checkpoint['test_loss_all'], label='Valid loss') plt.legend() plt.xlabel('Epoch') plt.ylabel('Loss') plt.subplot(2, 1, 2) plt.plot(checkpoint['train_rmetric_all'], label='Train rmetric') plt.plot(checkpoint['test_rmetric_all'], label='Test rmetric') plt.legend() plt.xlabel('Epoch') plt.ylabel('rmetric') plt.savefig(model_dir / 'graphs.png', dpi=300) plt.close() print('Time taken:', int(time.time() - start), 'secs') if debug: print('Breaking early since we are in debug mode') print('You can find the trained model at -', model_dir) break configs = { 'name': mname, 'time': datetime.datetime.now().strftime('%d_%m_%H_%M_%S'), 'data': data, 'column': column, 'extension': extension, 'classes': classes, 'network': network, 'depth': depth, 'wildcat': wildcat, 'pretrained': pretrained, 'optim': optim, 'learning_rate': lr, 'batchsize': batchsize, 'balanced': balanced, 'maps': maps if wildcat else None, 'alpha': alpha if wildcat else None, 'variable_type': variable_type, 'error_range': error_range if variable_type == 'continous' else None, 'best_epoch': best_epoch, 'best_loss': best_loss, 'rmetric': checkpoint['test_rmetric_all'][best_epoch - 1], } with open(model_dir / 'configs.json', 'w') as fp: json.dump(configs, fp) return
def main(debug: bool, data: str, column: str, extension: str, classes: int, depth: int, pretrained: bool, optim: str, resume: bool, results: bool, name: str, lr: float, batchsize: int, workers: int, patience: int, ID: str): # ------------------------------------------------------ if resume: matching_models = list(wsl_model_dir.glob(f'*{name}')) assert len(matching_models) == 1 model_dir = matching_models[0] mname = str(model_dir).split('_')[-1] print(mname) elif results: if 'retinanet' in name: matching_models = list( wsl_model_dir.glob(f'*{name}*/configs.json')) else: matching_models = list( wsl_model_dir.glob(f'*retinanet*{name}*/configs.json')) model_dirs = [model_dir.parent for model_dir in matching_models] else: print('Initializing model...', end='') if debug: mname = 'debug' elif ID == 'placeholder': try: # Get a random word to use as a more readable name response = requests.get( "https://random-word-api.herokuapp.com/word") assert response.status_code == 200 mname = response.json()[0] except Exception: # As a fallback use the date and time mname = datetime.datetime.now().strftime('%d_%m_%H_%M_%S') else: mname = ID full_mname = (data + '_' + column + '_' + f'lr{lr}_bs{batchsize}_{optim}' + '_' + ('pre_' if pretrained else '') + f'retinanet{depth}' + '_' + mname) model_dir = wsl_model_dir / full_mname print('done') print('Model Name:', mname) # ------------------------------------------------------ print('Initializing loaders...', end='', flush=True) print('train...', end='', flush=True) train_dataset = Loader(data, split='train', extension=extension, classes=classes, column=column, debug=debug) train_loader = DataLoader( # type: ignore train_dataset, batch_size=batchsize, num_workers=workers, pin_memory=True, shuffle=True) print('test...', end='', flush=True) test_dataset = Loader(data, split='valid', extension=extension, classes=classes, column=column, debug=debug) test_loader = DataLoader( # type: ignore test_dataset, batch_size=batchsize, num_workers=workers, pin_memory=True, shuffle=True) print('done') if classes > 1: print('Class List: ', train_dataset.class_names) # ------------------------------------------------------ if results: for model_dir in model_dirs: print('Initializing optim/checkpoint...') checkpoint = torch.load( model_dir / 'best.pt', map_location='cuda:0' if torch.cuda.is_available() else 'cpu') with open(model_dir / 'configs.json') as f: configs = json.load(f) print('Calculating box results...') checkpoint['model'].eval() df, rmetric, wild_metric, pointwise = engine_boxes( test_dataset, checkpoint) df.to_csv(model_dir / 'results.csv') configs['rmetric'] = rmetric configs['wild_metric'] = wild_metric configs['utility'] = wild_metric configs['pointwise'] = pointwise with open(model_dir / 'configs.json', 'w') as fp: json.dump(configs, fp) print('Finished:', rmetric, wild_metric, pointwise) print( f'You can find the calculated results at - {model_dir}/results.csv' ) return # ------------------------------------------------------ print('Initializing optim/checkpoint...', end='') if resume or results: checkpoint = torch.load( model_dir / 'best.pt', map_location='cuda:0' if torch.cuda.is_available() else 'cpu') else: if depth == 18: model = architecture.resnet18(classes, pretrained) elif depth == 34: model = architecture.resnet34(classes, pretrained) elif depth == 50: model = architecture.resnet50(classes, pretrained) elif depth == 101: model = architecture.resnet101(classes, pretrained) elif depth == 152: model = architecture.resnet152(classes, pretrained) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') model = nn.DataParallel(model).cuda() if optim == 'sgd': optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.1, weight_decay=1e-4) elif optim == 'adam': optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.999)) else: raise ValueError(f'{optim} is not supported') checkpoint = { 'model': model, 'optimizer': optimizer, 'epoch': 0, 'loss': 100, 'train_loss_all': [], 'test_loss_all': [], 'train_class_loss_all': [], 'test_class_loss_all': [], 'train_reg_loss_all': [], 'test_reg_loss_all': [] } best_epoch = checkpoint['epoch'] best_loss = checkpoint['loss'] print('done') # ------------------------------------------------------ while (checkpoint['epoch'] - best_epoch <= patience) and checkpoint['epoch'] < 150: start = time.time() checkpoint['epoch'] += 1 print('Epoch:', checkpoint['epoch'], '-Training') checkpoint['model'].train() class_loss, reg_loss, checkpoint['loss'], summary_train = engine( train_loader, checkpoint, batchsize, classes, is_train=True) checkpoint['train_loss_all'].append(checkpoint['loss']) checkpoint['train_class_loss_all'].append(class_loss) checkpoint['train_reg_loss_all'].append(reg_loss) print('Epoch:', checkpoint['epoch'], '-Testing') # checkpoint['model'].eval() - In eval mode model prints boxes class_loss, reg_loss, checkpoint['loss'], summary_test = engine( test_loader, checkpoint, batchsize, classes, is_train=False) checkpoint['test_loss_all'].append(checkpoint['loss']) checkpoint['test_class_loss_all'].append(class_loss) checkpoint['test_reg_loss_all'].append(reg_loss) os.makedirs(model_dir, exist_ok=True) torch.save(checkpoint, model_dir / 'current.pt') if best_loss > checkpoint['loss']: print('Best model updated') best_loss = checkpoint['loss'] best_epoch = checkpoint['epoch'] torch.save(checkpoint, model_dir / 'best.pt') else: print('Best model unchanged- Epoch:', best_epoch, 'Loss:', best_loss) with open(model_dir / 'summary.txt', 'a+') as file: epoch = checkpoint['epoch'] file.write( f'Epoch: {epoch} \n Train:{summary_train} \n Test:{summary_test}' ) plt.figure(figsize=(12, 18)) plt.subplot(3, 1, 1) plt.plot(checkpoint['train_loss_all'], label='Train loss') plt.plot(checkpoint['test_loss_all'], label='Valid loss') plt.legend() plt.xlabel('Epoch') plt.ylabel('Loss') plt.subplot(3, 1, 2) plt.plot(checkpoint['train_class_loss_all'], label='Train Class loss') plt.plot(checkpoint['test_class_loss_all'], label='Valid Class loss') plt.legend() plt.xlabel('Epoch') plt.ylabel('Class Loss'), plt.subplot(3, 1, 3) plt.plot(checkpoint['train_reg_loss_all'], label='Train Reg loss') plt.plot(checkpoint['test_reg_loss_all'], label='Valid Reg loss') plt.legend() plt.xlabel('Epoch') plt.ylabel('Reg Loss') plt.savefig(model_dir / 'graphs.png', dpi=300) plt.close() print('Time taken:', int(time.time() - start), 'secs') if debug: print('Breaking early since we are in debug mode') print('You can find the trained model at -', model_dir) break checkpoint = torch.load( model_dir / 'best.pt', map_location='cuda:0' if torch.cuda.is_available() else 'cpu') df, rmetric, wild_metric, pointwise = engine_boxes(test_dataset, checkpoint) df.to_csv(model_dir / 'results.csv') print('Finished:', rmetric) print(f'You can find the calculated results at - {model_dir}/results.csv') configs = { 'name': mname, 'time': datetime.datetime.now().strftime('%d_%m_%H_%M_%S'), 'data': data, 'column': column, 'extension': extension, 'classes': classes, 'network': 'retinanet', 'depth': depth, 'pretrained': pretrained, 'optim': optim, 'learning_rate': lr, 'batchsize': batchsize, 'best_epoch': best_epoch, 'best_loss': best_loss, 'rmetric': rmetric, 'wild_metric': wild_metric, 'pointwise': pointwise } with open(model_dir / 'configs.json', 'w') as fp: json.dump(configs, fp) return
def main(name: str, start: int, plot: bool): if name == 'all': model_dirs = wsl_model_dir.glob('rsna*') else: if 'rsna' in name: model_dirs = wsl_model_dir.glob(f'*{name}*') else: model_dirs = wsl_model_dir.glob(f'rsna*{name}*') model_dirs = list(model_dirs) num_model_dirs = 50 print(f'Number of potential model directory matches = {len(model_dirs)}, but doing top {num_model_dirs} models for now.') model_dirs = model_dirs[start:start + num_model_dirs] if plot: ncolors = 256 color_array = plt.get_cmap('viridis')(range(ncolors)) # change alpha values color_array[:, -1] = np.linspace(1.0, 0.0, ncolors) for m_idx, model_dir in enumerate(model_dirs): if 'debug' in str(model_dir): # Debugging model print('Debugging model') continue elif not (model_dir / 'configs.json').exists(): # Model not completed print('Model not completed') continue else: with open(model_dir / 'configs.json') as f: configs = json.load(f) dataset = Loader(data=configs['data'], split='test', extension=configs['extension'], classes=configs['classes'], column=configs['column'], variable_type=configs['variable_type']) print('Number of images -', len(dataset)) print(f'Model {m_idx} : {model_dir}') if configs['data'] in known_tasks: task = known_tasks[configs['data']] checkpoint = torch.load(model_dir / 'best.pt', map_location='cuda:0' if torch.cuda.is_available() else 'cpu') checkpoint['model'].gradient = None checkpoint['model'].eval() # currently hardcoded, should ideally be inferred from image org_size = (1024, 1024) new_size = (224, 224) all_scores = defaultdict(list) GD = BackProp(checkpoint['model']) GBP = BackProp(checkpoint['model'], True) start_time = time.time() for idx, data in enumerate(dataset): checkpoint['model'].zero_grad() name, img, label = data label = label.squeeze().cuda() if label != 1: continue # Make the ground map if task == 'detect': ground_map = box_to_map(dataset.df[dataset.df.Id == name].box.to_list(), np.zeros(org_size)) elif task == 'segment': ground_map = np.zeros(org_size) eps = dataset.df[dataset.df.Id == name].EncodedPixels.to_list() for ep in eps: ground_map += rle2mask(ep, np.zeros(org_size)).T ground_map = cv2.resize(ground_map, new_size, interpolation=cv2.INTER_NEAREST).clip(0, 1) # Make the saliency map checkpoint['model'].get_map = True if configs['wildcat']: _, _, wild, handle = checkpoint['model'](img.unsqueeze(dim=0).cuda().float()) handle.remove() wild = torch.max(wild, dim=1)[0] wild = wild.squeeze().cpu().data.numpy() wild = (wild - wild.min()) / (wild.max() - wild.min()) wild = cv2.resize(wild, new_size, interpolation=cv2.INTER_NEAREST) else: wild = np.zeros_like(ground_map) gcam = GD.generate_cam(img.unsqueeze(dim=0).cuda().float()).squeeze() checkpoint['model'].get_map = False grad = GD.generate_gradients(img.unsqueeze(dim=0).cuda().float()) ig = GD.generate_integrated_gradients(img.unsqueeze(dim=0).cuda().float(), 25) sg = GD.generate_smooth_grad(img.unsqueeze(dim=0).cuda().float(), 5, 0.1, 0) sig = GD.generate_smooth_grad(img.unsqueeze(dim=0).cuda().float(), 5, 0.1, 10) gbp = GBP.generate_gradients(img.unsqueeze(dim=0).cuda().float()) ggcam = np.multiply(gcam, gbp) all_scores['WILD'].append(aupr(ground_map.flatten(), wild.flatten())) all_scores['GRAD'].append(aupr(ground_map.flatten(), grad.flatten())) all_scores['SG'].append(aupr(ground_map.flatten(), sg.flatten())) all_scores['IG'].append(aupr(ground_map.flatten(), ig.flatten())) all_scores['SIG'].append(aupr(ground_map.flatten(), sig.flatten())) all_scores['GBP'].append(aupr(ground_map.flatten(), gbp.flatten())) all_scores['GCAM'].append(aupr(ground_map.flatten(), gcam.flatten())) all_scores['GGCAM'].append(aupr(ground_map.flatten(), ggcam.flatten())) if plot: row, col = 2, 5 map_names = [['XRAY', 'WILD', 'GRAD', 'GCAM', 'GGCAM'], ['MASK', 'GBP', 'SG', 'IG', 'SIG']] maps = [[img, wild, grad, gcam, ggcam], [ground_map, gbp, sg, ig, sig]] x = LinearSegmentedColormap.from_list(name='rainbow', colors=color_array) plt.register_cmap(cmap=x) fig, ax = plt.subplots(row, col, figsize=(18, 8)) for i in range(row): for j in range(col): ax[i, j].imshow(np.transpose(img, (1, 2, 0))) if not (i == 0 and j == 0): ax[i, j].imshow(maps[i][j], alpha=0.8, cmap='rainbow') ax[i, j].text(0, 220, map_names[i][j], fontsize='x-large', color='white', weight='bold', bbox=dict(fill=True, linewidth=0)) ax[i, j].axis('off') plt.subplots_adjust(wspace=0.05, hspace=0.05) plt.savefig(f'{wsl_plot_dir}/saliency_{name}.png', dpi=300, bbox_inches='tight') plt.show() plt.close() print_str = f'{idx}: | ' for key, value in all_scores.items(): print_str += f'{key}-{int(np.mean(value) * 100)} | ' print_str += str(round((time.time() - start_time) / (idx + 1), 2)) + ' s/img' print(print_str, end='\r') for key in all_scores.keys(): configs[key] = np.mean(all_scores[key]) print(key, ' ', configs[key]) with open(model_dir / 'configs.json', 'w') as fp: json.dump(configs, fp)
def main(name: str, task: str): if name == 'all': models = wsl_model_dir.glob('*') else: models = wsl_model_dir.glob(f'*{name}*') models = list(models) models.reverse() print('Number of potential model matches =', len(models)) all_configs = [] for m, path in enumerate(models): if 'debug' in str(path): # Debugging model print('Debugging model') continue elif 'wild' not in str(path): continue elif not (path / 'configs.json').exists(): # Model not completed print('Model not completed') continue else: with open(path / 'configs.json') as f: configs = json.load(f) print(configs) dataset = Loader(data=configs['data'], split='valid', extension=configs['extension'], classes=configs['classes'], column=configs['column'], regression=configs['regression']) print(f'Model {m} : {path}') if configs['data'] in known_tasks: task = known_tasks[configs['data']] checkpoint = torch.load( path / 'best.pt', map_location='cuda:0' if torch.cuda.is_available() else 'cpu') checkpoint['model'] = checkpoint['model'].module checkpoint['model'].get_map = True checkpoint['model'].eval() org_size = (1024, 1024) new_size = (224, 224) sigmoid = torch.nn.Sigmoid().cuda() all_scores = [] with torch.set_grad_enabled(False): for idx, data in enumerate(dataset): img, label = data name = dataset.names[idx] labels = dataset.labels[idx] predicted_map = checkpoint['model']( img.unsqueeze(dim=0).cuda().float()).squeeze(dim=0) predicted_map = sigmoid( predicted_map.sum(dim=0)).cpu().data.numpy() score = [] for i, label in enumerate(labels): if label == 0: continue if task == 'detect': ground_map = box_to_map( dataset.df[dataset.df.Id == name].box.to_list(), np.zeros(org_size)) elif task == 'segment': ground_map = np.zeros(org_size) eps = dataset.df[dataset.df.Id == name].EncodedPixels.to_list() for ep in eps: ground_map += rle2mask(ep, np.zeros(org_size)).T else: print('Ground truth not available.') # plt.imshow(ground_map) ground_map = cv2.resize( ground_map, new_size, interpolation=cv2.INTER_NEAREST).clip(0, 1) re_pred_map = cv2.resize(predicted_map[i], new_size, interpolation=cv2.INTER_AREA) score.append( roc_auc_score(ground_map.flatten(), re_pred_map.flatten())) all_scores += score if (len(all_scores) + 1) % 32 == 0: print('Idx:', idx, 'Mean:', np.mean(all_scores), end='\r') configs['wild_metric'] = np.mean(all_scores) print('Wild Metric:', configs['wild_metric']) with open(path / 'configs.json', 'w') as fp: json.dump(configs, fp)
def main(store: bool = False): models = wsl_model_dir.glob('*') all_configs = [] for idx, path in enumerate(models): if 'debug' in str(path): # Debugging model continue elif 'wildcat' not in str(path): # Model is not wildcat continue elif not (path / 'configs.json').exists(): # Model not completed continue else: with open(path / 'configs.json') as f: configs = json.load(f) dataset = Loader(data=configs['data'], split='valid', extension=configs['extension'], classes=configs['classes'], col_name=configs['column'], regression=configs['regression']) print(configs) print(f'Model {idx} : {path}') checkpoint = torch.load( path / 'best.pt', map_location='cuda:0' if torch.cuda.is_available() else 'cpu') checkpoint['model'] = checkpoint['model'].module checkpoint['model'].get_map = True checkpoint['model'].eval() org_size = (1024, 1024) new_size = (224, 224) sigmoid = torch.nn.Sigmoid().cuda() all_scores = [] with torch.set_grad_enabled(False): for idx, data in enumerate(dataset): img, label = data name = dataset.names[idx] labels = dataset.labels[idx] predicted_map = checkpoint['model']( img.unsqueeze(dim=0).cuda().float()).squeeze(dim=0) predicted_map = sigmoid( predicted_map.sum(dim=0)).cpu().data.numpy() score = [] for i, label in enumerate(labels): if label == 0: continue ground_map = box_to_map( dataset.df[dataset.df.Id == name].to_dict( orient='row'), configs['column'], org_size, new_size) re_pred_map = cv2.resize(predicted_map[i], new_size, interpolation=cv2.INTER_AREA) score.append( roc_auc_score(ground_map.flatten(), re_pred_map.flatten())) all_scores += score if (len(all_scores) + 1) % 32 == 0: print('Mean:', np.mean(all_scores), end='\r') configs['wild'] = np.mean(all_scores) all_configs.append(configs) df = pd.DataFrame.from_dict(all_configs) print(df) time = datetime.datetime.now().strftime('%H_%d_%m') if store: df.to_csv(wsl_summary_dir / f'wild_{time}')
def main(name: str, start: int, plot: bool): if name == 'all': model_dirs = wsl_model_dir.glob('*') else: model_dirs = wsl_model_dir.glob(f'*{name}*') model_dirs = list(model_dirs) model_dirs = model_dirs[start:] print('Number of potential model directory matches =', len(model_dirs)) if plot: ncolors = 256 color_array = plt.get_cmap('viridis')(range(ncolors)) # change alpha values color_array[:, -1] = np.linspace(1.0, 0.0, ncolors) for m_idx, model_dir in enumerate(model_dirs): if 'debug' in str(model_dir): # Debugging model print('Debugging model') continue elif not (model_dir / 'configs.json').exists(): # Model not completed print('Model not completed') continue else: with open(model_dir / 'configs.json') as f: configs = json.load(f) dataset = Loader(data=configs['data'], split='test', extension=configs['extension'], classes=configs['classes'], column=configs['column'], regression=configs['regression']) print('Number of images -', len(dataset)) print(f'Model {m_idx} : {model_dir}') if configs['data'] in known_tasks: task = known_tasks[configs['data']] checkpoint = torch.load( model_dir / 'best.pt', map_location='cuda:0' if torch.cuda.is_available() else 'cpu') checkpoint['model'] = checkpoint['model'].module checkpoint['model'].gradient = None checkpoint['model'].eval() org_size = (1024, 1024) new_size = (224, 224) all_scores = defaultdict(list) GD = BackProp(checkpoint['model']) GBP = BackProp(checkpoint['model'], True) start_time = time.time() for idx, data in enumerate(dataset): print( f'{idx} speed-{(time.time() - start_time) // (idx + 1)} s/img', end='\r') checkpoint['model'].zero_grad() name, img, label = data label = label.squeeze().cuda() if label != 1: continue # Make the ground map if task == 'detect': ground_map = box_to_map( dataset.df[dataset.df.Id == name].box.to_list(), np.zeros(org_size)) elif task == 'segment': ground_map = np.zeros(org_size) eps = dataset.df[dataset.df.Id == name].EncodedPixels.to_list() for ep in eps: ground_map += rle2mask(ep, np.zeros(org_size)).T ground_map = cv2.resize(ground_map, new_size, interpolation=cv2.INTER_NEAREST).clip( 0, 1) # Make the saliency map if configs['wildcat']: checkpoint['model'].get_map = True with torch.set_grad_enabled(False): wild, _, handle = checkpoint['model']( img.unsqueeze(dim=0).cuda().float()) handle.remove_hook() wild = wild.squeeze().cpu().data.numpy() wild = (wild - wild.min()) / (wild.max() - wild.min()) wild = cv2.resize(wild, new_size, interpolation=cv2.INTER_NEAREST) all_scores['WILD'].append( aupr(ground_map.flatten(), wild.flatten())) if plot: plt.figure(figsize=(4, 12)) x = LinearSegmentedColormap.from_list(name='rainbow', colors=color_array) plt.register_cmap(cmap=x) plt.subplot(1, 3, 1) plt.imshow(np.transpose(img, (1, 2, 0))) plt.subplot(1, 3, 2) plt.imshow(ground_map, alpha=0.8, cmap='rainbow') plt.subplot(1, 3, 2) plt.imshow(wild, alpha=0.8, cmap='rainbow') plt.savefig(f'{wsl_plot_dir}/wild_{name}.png', dpi=300, bbox_inches='tight') plt.show() plt.close() else: checkpoint['model'].get_map = False grad = GD.generate_gradients( img.unsqueeze(dim=0).cuda().float()) ig = GD.generate_integrated_gradients( img.unsqueeze(dim=0).cuda().float(), 100) sg = GD.generate_smooth_grad( img.unsqueeze(dim=0).cuda().float(), 5, 0.3, 0) sig = GD.generate_smooth_grad( img.unsqueeze(dim=0).cuda().float(), 5, 0.3, 0) gbp = GBP.generate_gradients( img.unsqueeze(dim=0).cuda().float()) checkpoint['model'].get_map = True gcam = GD.generate_cam( img.unsqueeze(dim=0).cuda().float()).squeeze() ggcam = np.multiply(gcam, gbp) all_scores['GRAD'].append( aupr(ground_map.flatten(), grad.flatten())) all_scores['SG'].append( aupr(ground_map.flatten(), sg.flatten())) all_scores['IG'].append( aupr(ground_map.flatten(), ig.flatten())) all_scores['SIG'].append( aupr(ground_map.flatten(), sig.flatten())) all_scores['GBP'].append( aupr(ground_map.flatten(), gbp.flatten())) all_scores['GCAM'].append( aupr(ground_map.flatten(), gcam.flatten())) all_scores['GGCAM'].append( aupr(ground_map.flatten(), ggcam.flatten())) if plot: row, col = range(2), range(4) map_names = [['MASK', 'GRAD', 'SG', 'IG'], ['SIG', 'GCAM', 'GBP', 'GGCAM']] maps = [[ground_map, grad, sg, ig], [sig, gcam, gbp, ggcam]] x = LinearSegmentedColormap.from_list(name='rainbow', colors=color_array) plt.register_cmap(cmap=x) fig, ax = plt.subplots(2, 4, figsize=(18, 8)) for i in row: for j in col: ax[i, j].imshow(np.transpose(img, (1, 2, 0))) ax[i, j].imshow(maps[i][j], alpha=0.8, cmap='rainbow') ax[i, j].text(0, 220, map_names[i][j], fontsize='x-large', color='white', weight='bold', bbox=dict(fill=True, linewidth=0)) ax[i, j].axis('off') plt.subplots_adjust(wspace=0.05, hspace=0.05) plt.savefig(f'{wsl_plot_dir}/saliency_{name}.png', dpi=300, bbox_inches='tight') plt.show() plt.close() del data for key in all_scores.keys(): configs[key] = np.mean(all_scores[key]) print(key, ' ', configs[key]) with open(path / 'configs.json', 'w') as fp: json.dump(configs, fp)
def main(name: str, task: str, store: bool): if name == 'all': models = wsl_model_dir.glob('*') else: models = wsl_model_dir.glob(f'*{name}*') models = list(models) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print('Number of potential model matches =', len(models)) all_configs = [] for m, path in enumerate(models): if 'debug' in str(path): # Debugging models print('Debugging model') continue elif 'vgg19' not in str(path): # Model is not wildcat print('Model is not densenet') continue elif 'wildcat' in str(path): # Model is not wildcat print('Model is wildcat') continue elif not (path / 'configs.json').exists(): # Model not completed print('Model not completed') continue else: with open(path / 'configs.json') as f: configs = json.load(f) print(configs) # if configs['pretrained'] == False: # continue dataset = Loader(data=configs['data'], split='test', extension=configs['extension'], classes=configs['classes'], column=configs['column'], regression=configs['regression']) # print(f'Model {m} : {path}') try: checkpoint = torch.load(path / 'best.pt', map_location='cuda:0' if torch.cuda.is_available() else 'cpu') except: continue # checkpoint = torch.load(path / 'best.pt', map_location='cpu') checkpoint['model'] = checkpoint['model'].module checkpoint['model'].get_map = True checkpoint['model'].eval() # summary(checkpoint['model'],(3,224,224)) org_size = (1024, 1024) new_size = (224, 224) sigmoid = torch.nn.Sigmoid().cuda() all_scores = defaultdict(list) VBP = vanilla_backprop.VanillaBackprop(checkpoint['model']) IG = integrated_gradients.IntegratedGradients(checkpoint['model']) GBP = guided_backprop.GuidedBackprop(checkpoint['model']) GCAM = gradcam.GradCam(checkpoint['model'],target_layer=34) # with torch.set_grad_enabled(False): print(len(dataset)) for idx, data in tqdm(enumerate(dataset)): img, label = data name = dataset.names[idx] labels = dataset.labels[idx] saliency_label = 1 for i, label in enumerate(labels): if label == 0: saliency_label = 0 break if saliency_label == 0: continue # saliency_label = torch.tensor(saliency_label) saliency_label = torch.tensor(saliency_label).to(device) vanilla_grads = VBP.generate_gradients(img.unsqueeze(dim=0).cuda().float(), saliency_label) grayscale_vanilla_grads = vanilla_backprop.convert_to_grayscale(vanilla_grads) # print(np.shape(grayscale_vanilla_grads)) # vanilla_backprop.save_gradient_images(grayscale_vanilla_grads, '/data/2015P002510/nishanth/WSL/wsl/wsl/Example_maps/GRAD') integrated_grads = IG.generate_integrated_gradients(img.unsqueeze(dim=0).cuda().float(), saliency_label, 100) grayscale_integrated_grads = integrated_gradients.convert_to_grayscale(integrated_grads) # vanilla_backprop.save_gradient_images(grayscale_integrated_grads, '/data/2015P002510/nishanth/WSL/wsl/wsl/Example_maps/IG') guided_grads = GBP.generate_gradients(img.unsqueeze(dim=0).cuda().float(), saliency_label) grayscale_guided_grads = guided_backprop.convert_to_grayscale(guided_grads) # vanilla_backprop.save_gradient_images(grayscale_guided_grads, '/data/2015P002510/nishanth/WSL/wsl/wsl/Example_maps/GBP') smooth_grad_mask = smooth_grad.generate_smooth_grad(VBP, img.unsqueeze(dim=0).cuda().float(), saliency_label, 5, 0.3) grayscale_smooth_grad = smooth_grad.convert_to_grayscale(smooth_grad_mask) smooth_grad_mask = smooth_grad.generate_smooth_grad(IG, img.unsqueeze(dim=0).cuda().float(), saliency_label, 5, 0.3) grayscale_smooth_ig = smooth_grad.convert_to_grayscale(smooth_grad_mask) cam = GCAM.generate_cam(img.unsqueeze(dim=0).cuda().float(), saliency_label) # grayscale_cam = guided_backprop.convert_to_grayscale(cam) cam_gb = guided_gradcam.guided_grad_cam(cam, guided_grads) grayscale_cam_gb = guided_gradcam.convert_to_grayscale(cam_gb) # vanilla_backprop.save_gradient_images(cam, '/data/2015P002510/nishanth/WSL/wsl/wsl/Example_maps/GCAM') # # Save mask2 # save_class_activation_images(img, cam, '/data/2015P002510/nishanth/WSL/wsl/wsl/Example_maps/GCAM_color') # score = [] # np.save('/data/2015P002510/nishanth/WSL/wsl/wsl/AUPRC_scores/{}_{}.npy'.format('GRAD','resnet18'),np.zeros((2,2))) #test for i, label in enumerate(labels): if label == 0: continue if task == 'detect': ground_map = box_to_map(dataset.df[dataset.df.Id == name].box.to_list(), np.zeros(org_size)) ground_map = cv2.resize(ground_map, new_size, interpolation=cv2.INTER_NEAREST).clip(0, 1) all_scores['GRAD'].append(aupr(grayscale_vanilla_grads,ground_map)) all_scores['SG'].append(aupr(grayscale_smooth_grad,ground_map)) all_scores['IG'].append(aupr(grayscale_integrated_grads,ground_map)) all_scores['SIG'].append(aupr(grayscale_smooth_ig,ground_map)) all_scores['GBP'].append(aupr(grayscale_guided_grads,ground_map)) all_scores['GCAM'].append(aupr(cam,ground_map)) all_scores['GGCAM'].append(aupr(grayscale_cam_gb,ground_map)) # all_scores['GRAD'].append(aupr(cv2.resize(grayscale_vanilla_grads, new_size, interpolation=cv2.INTER_AREA),ground_map)) elif task == 'segment': ground_map = np.zeros(org_size) eps = dataset.df[dataset.df.Id == name].EncodedPixels.to_list() for ep in eps: ground_map += rle2mask(ep, np.zeros(org_size)).T else: print('Ground truth not available.') for key in all_scores.keys(): print(key, ' ', np.mean(all_scores[key])) np.save('/data/2015P002510/nishanth/WSL/wsl/wsl/AUPRC_scores/{}_{}.npy'.format(key,'vgg_test'),all_scores[key]) configs['wild'] = np.mean(all_scores) all_configs.append(configs) df = pd.DataFrame.from_dict(all_configs) print(df) time = datetime.datetime.now().strftime('%H_%d_%m') if store: df.to_csv(wsl_summary_dir / f'wild_{time}')