def create_data_bunch(bs, img_sz, train_sz=None, valid_sz=None, use_cache=False): train_ds = QuickDraw(PREPARED, train=True, take_subset=True, use_cache=use_cache, subset_size=train_sz) valid_ds = QuickDraw(PREPARED, train=False, take_subset=True, use_cache=use_cache, subset_size=valid_sz) bunch = ImageDataBunch.create(train_ds, valid_ds, bs=bs, size=img_sz, ds_tfms=get_transforms()) bunch.normalize(imagenet_stats) return bunch
def load_data(): dataset = build_dataset('cifar_10', val_size=val_size) x_train, y_train = dataset.dataset('train') x_val, y_val = dataset.dataset('val') shape = (-1, 3, 32, 32) x_train = ((x_train - 128) / 128).reshape(shape) x_val = ((x_val - 128) / 128).reshape(shape) train_tfms = [*rand_pad(4, 32), flip_lr(p=0.5)] train_ds = ImageArrayDS(x_train, y_train, train_tfms) val_ds = ImageArrayDS(x_val, y_val) data = ImageDataBunch.create(train_ds, val_ds, bs=256) return data, x_train, y_train, x_val, y_val
def train_classifier(model, config, x_train, y_train, x_val, y_val, train_tfms=None): loss_func = torch.nn.CrossEntropyLoss() if train_tfms is None: train_tfms = [] train_ds = ImageArrayDS(x_train, y_train, train_tfms) val_ds = ImageArrayDS(x_val, y_val) data = ImageDataBunch.create(train_ds, val_ds, bs=config['batch_size']) callbacks = [partial(EarlyStoppingCallback, min_delta=1e-3, patience=config['patience'])] learner = Learner(data, model, metrics=accuracy, loss_func=loss_func, callback_fns=callbacks) learner.fit(config['epochs'], config['start_lr'], wd=config['weight_decay']) return learner
BensProcessing(), transforms.ToTensor(), # transforms.Normalize((0.7, 0.54, 0.50), (0.17, 0.17, 0.19)) ]) train_dataset = MoleDataset(training_image_paths, training_labels, transform=transform_train) test_dataset = MoleDataset(testing_image_paths, testing_labels, transform=transform_test) if not os.path.exists('models/pytorch_model.pt') or train: print('Training Model') # ---------------------Use FastAi for easier training with less boilerplate code--------------------- data = ImageDataBunch.create(train_dataset, test_dataset) learner = cnn_learner(data, models.resnet50, metrics=accuracy) # Either train the cnn layers or not if unfreeze_cnn_layers: print('Unfreezing Model') learner.unfreeze() total_params = sum(p.numel() for p in learner.model.parameters()) trainable_params = sum(p.numel() for p in learner.model.parameters() if p.requires_grad) print( f'Total Paramaters: {total_params}, Trainable Parameters: {trainable_params}' ) # We can plot some images to take a look at them
# Load data dataset = build_dataset('cifar_10', val_size=val_size) x_set, y_set = dataset.dataset('train') x_val, y_val = dataset.dataset('val') shape = (-1, 3, 32, 32) x_set = ((x_set - 128) / 128).reshape(shape) x_val = ((x_val - 128) / 128).reshape(shape) # x_pool, x_train, y_pool, y_train = train_test_split(x_set, y_set, test_size=start_size, stratify=y_set) x_train, y_train = x_set, y_set train_tfms = [*rand_pad(4, 32), flip_lr(p=0.5)] train_ds = ImageArrayDS(x_train, y_train, train_tfms) val_ds = ImageArrayDS(x_val, y_val) data = ImageDataBunch.create(train_ds, val_ds, bs=256) loss_func = torch.nn.CrossEntropyLoss() np.set_printoptions(threshold=sys.maxsize, suppress=True) model = AnotherConv() # model = resnet_masked(pretrained=True) # model = resnet_linear(pretrained=True, dropout_rate=0.5, freeze=False) # learner = Learner(data, model, metrics=accuracy, loss_func=loss_func) # # model_path = "experiments/data/model.pt" # if reload and os.path.exists(model_path): # model.load_state_dict(torch.load(model_path)) # else:
c = len(classes) loss_func = torch.nn.CrossEntropyLoss() def __init__(self, file_path): super(H5Dataset, self).__init__() h5_file = h5py.File(file_path) self.data = h5_file.get('sen2') self.target = h5_file.get('labels') def __getitem__(self, index): x = self.data[index, :, :, 0:3].astype(np.float32) x = x[..., ::-1] #from BGR to RGB x = x.transpose(2, 0, 1) # y = self.target[index].astype(np.float32) y = np.argmax(self.target[index]) return (torch.from_numpy(np.copy(x)).float(), torch.tensor(y)) def __len__(self): return self.data.shape[0] dataset_train = H5Dataset('../data/subset_training.hdf5') dataset_val = H5Dataset('../data/subset_validation.hdf5') data = ImageDataBunch.create(dataset_train, dataset_val) learn = create_cnn(data, models.resnet18, metrics=accuracy) learn.fit_one_cycle(150, 1e-2) import IPython IPython.embed()
def benchmark_uncertainty(config): results = [] plt.figure(figsize=(10, 8)) for i in range(config['repeats']): x_set, y_set, x_val, y_val, train_tfms = config['prepare_dataset']( config) if len(x_set) > config['train_size']: _, x_train, _, y_train = train_test_split( x_set, y_set, test_size=config['train_size'], stratify=y_set) else: x_train, y_train = x_set, y_set train_ds = ImageArrayDS(x_train, y_train, train_tfms) val_ds = ImageArrayDS(x_val, y_val) data = ImageDataBunch.create(train_ds, val_ds, bs=config['batch_size']) loss_func = torch.nn.CrossEntropyLoss() np.set_printoptions(threshold=sys.maxsize, suppress=True) model = build_model(config['model_type']) callbacks = [ partial(EarlyStoppingCallback, min_delta=1e-3, patience=config['patience']) ] learner = Learner(data, model, metrics=accuracy, loss_func=loss_func, callback_fns=callbacks) learner.fit(config['epochs'], config['start_lr'], wd=config['weight_decay']) images = torch.FloatTensor(x_val).cuda() probabilities = F.softmax(model(images), dim=1).detach().cpu().numpy() predictions = np.argmax(probabilities, axis=-1) for name in config['estimators']: ue = calc_ue(model, images, probabilities, name, config['nn_runs']) mistake = 1 - (predictions == y_val).astype(np.int) roc_auc = roc_auc_score(mistake, ue) print(name, roc_auc) results.append((name, roc_auc)) if i == config['repeats'] - 1: fpr, tpr, thresholds = roc_curve(mistake, ue, pos_label=1) plt.plot(fpr, tpr, label=name, alpha=0.8) plt.xlabel('FPR') plt.ylabel('TPR') dir = Path(ROOT_DIR) / 'experiments' / 'data' / 'ood' plt.title(f"{config['name']} uncertainty ROC") plt.legend() file = f"var_{label}_roc_{config['name']}_{config['train_size']}_{config['nn_runs']}" plt.savefig(dir / file) # plt.show() df = pd.DataFrame(results, columns=['Estimator type', 'ROC-AUC score']) df = df.replace('mc_dropout', 'MC dropout') df = df.replace('decorrelating_sc', 'decorrelation') df = df[df['Estimator type'] != 'k_dpp_noisereg'] print(df) fig, ax = plt.subplots(figsize=(8, 6)) plt.subplots_adjust(left=0.2) with sns.axes_style('whitegrid'): sns.boxplot(data=df, x='ROC-AUC score', y='Estimator type', orient='h', ax=ax) ax.yaxis.grid(True) ax.xaxis.grid(True) plt.title(f'{config["name"]} wrong prediction ROC-AUC') file = f"var_{label}_boxplot_{config['name']}_{config['train_size']}_{config['nn_runs']}" plt.savefig(dir / file) df.to_csv(dir / file + '.csv')