from torch.optim import Adam from torch.utils.data import DataLoader import argparse import torch from few_shot.datasets import OmniglotDataset, MiniImageNet from few_shot.models import get_few_shot_encoder from few_shot.core import NShotTaskSampler, EvaluateFewShot, prepare_nshot_task from few_shot.proto import proto_net_episode from few_shot.train import fit from few_shot.callbacks import * from few_shot.utils import setup_dirs from config import PATH setup_dirs() assert torch.cuda.is_available() device = torch.device('cuda') torch.backends.cudnn.benchmark = True ############## # Parameters # ############## parser = argparse.ArgumentParser() parser.add_argument('--dataset') parser.add_argument('--distance', default='l2') parser.add_argument('--n-train', default=1, type=int) parser.add_argument('--n-test', default=1, type=int) parser.add_argument('--k-train', default=60, type=int) parser.add_argument('--k-test', default=5, type=int)
def test(self): img_filename = os.path.join( DATA_PATH ,'Omniglot', 'images_background', 'Latin.0', 'character16', '0698_01.137602.png') img = io.imread(img_filename) img = img[np.newaxis, np.newaxis, :, :] img = (img - img.min()) / (img.max() - img.min()) img = torch.from_numpy(img) print(img.size()) n = 5 k = 300 setup_dirs() assert torch.cuda.is_available() device = torch.device('cpu') torch.backends.cudnn.benchmark = True model = FewShotClassifier(1, k).to(device, dtype=torch.double) model.load_state_dict(torch.load(os.path.join("models", "semantic_classifier", "test_k=300_few_shot_classifier.pth"))) conv_out = model(img) deconv_model = FewShotDeconv(model).to(device, dtype=torch.double) conv_layer_indices = model.get_conv_layer_indices() plt.ion() # remove blocking plt.figure(figsize=(10, 5)) done = False i=0 while not done: layer = input('Layer to view (0-12, -1 to exit): ') try: layer = int(layer) except ValueError: continue if layer < 0: sys.exit(0) activ_map = model.feature_outputs[layer].data.numpy() activ_map = activ_map.transpose(1, 2, 3, 0) activ_map_grid = vis_grid(activ_map) vis_layer(activ_map_grid) # only transpose convolve from Conv2d or ReLU layers conv_layer = layer if conv_layer not in conv_layer_indices: conv_layer -= 1 if conv_layer not in conv_layer_indices: continue n_maps = activ_map.shape[0] marker = None while True: choose_map = True# input('Select map? (y/[n]): ') == 'y' if marker != None: marker.pop(0).remove() if not choose_map: break _, map_x_dim, map_y_dim, _ = activ_map.shape map_img_x_dim, map_img_y_dim, _ = activ_map_grid.shape x_step = map_img_x_dim // (map_x_dim + 1) print('Click on an activation map to continue') x_pos, y_pos = plt.ginput(1)[0] x_index = x_pos // (map_x_dim + 1) y_index = y_pos // (map_y_dim + 1) map_idx = int(x_step * y_index + x_index) if map_idx >= n_maps: print('Invalid map selected') continue decon = deconv_model(model.feature_outputs[layer][0][map_idx][None, None, :, :], conv_layer, map_idx, model.pool_indices) img = decon_img(decon) img = img.reshape((28,28)) print(img.shape) plt.subplot(121) vis_layer(activ_map_grid) marker = plt.plot(x_pos, y_pos, marker='+', color='red') plt.subplot(122) plt.imshow(img) # plt.savefig('deconvnet' + str(x_pos) + '_' + str(y_pos) + '_hiragana13_layer=' + str(layer)+ '.png') i += 1
def few_shot_training(datadir=DATA_PATH, dataset='fashion', num_input_channels=3, drop_lr_every=20, validation_episodes=200, evaluation_episodes=1000, episodes_per_epoch=100, n_epochs=80, small_dataset=False, n_train=1, n_test=1, k_train=30, k_test=5, q_train=5, q_test=1, distance='l2', pretrained=False, monitor_validation=False, n_val_classes=10, architecture='resnet18', gpu=None): setup_dirs() if dataset == 'fashion': dataset_class = FashionProductImagesSmall if small_dataset \ else FashionProductImages else: raise (ValueError, 'Unsupported dataset') param_str = f'{dataset}_nt={n_train}_kt={k_train}_qt={q_train}_' \ f'nv={n_test}_kv={k_test}_qv={q_test}_small={small_dataset}_' \ f'pretrained={pretrained}_validate={monitor_validation}' print(param_str) ################### # Create datasets # ################### # ADAPTED: data transforms including augmentation resize = (80, 60) if small_dataset else (400, 300) background_transform = transforms.Compose([ transforms.RandomResizedCrop(resize, scale=(0.8, 1.0)), # transforms.RandomGrayscale(), transforms.RandomPerspective(), transforms.RandomHorizontalFlip(), # transforms.Resize(resize), transforms.ToTensor(), # transforms.Normalize(mean=[0.485, 0.456, 0.406], # std=[0.229, 0.224, 0.225]) ]) evaluation_transform = transforms.Compose([ transforms.Resize(resize), # transforms.CenterCrop(224), transforms.ToTensor(), # transforms.Normalize(mean=[0.485, 0.456, 0.406], # std=[0.229, 0.224, 0.225]) ]) if monitor_validation: if not n_val_classes >= k_test: n_val_classes = k_test print("Warning: `n_val_classes` < `k_test`. Take a larger number" " of validation classes next time. Increased to `k_test`" " classes") # class structure for background (training), validation (validation), # evaluation (test): take a random subset of background classes validation_classes = list( np.random.choice(dataset_class.background_classes, n_val_classes)) background_classes = list( set(dataset_class.background_classes).difference( set(validation_classes))) # use keyword for evaluation classes evaluation_classes = 'evaluation' # Meta-validation set validation = dataset_class(datadir, split='all', classes=validation_classes, transform=evaluation_transform) # ADAPTED: in the original code, `episodes_per_epoch` was provided to # `NShotTaskSampler` instead of `validation_episodes`. validation_sampler = NShotTaskSampler(validation, validation_episodes, n_test, k_test, q_test) validation_taskloader = DataLoader(validation, batch_sampler=validation_sampler, num_workers=4) else: # use keyword for both background and evaluation classes background_classes = 'background' evaluation_classes = 'evaluation' # Meta-training set background = dataset_class(datadir, split='all', classes=background_classes, transform=background_transform) background_sampler = NShotTaskSampler(background, episodes_per_epoch, n_train, k_train, q_train) background_taskloader = DataLoader(background, batch_sampler=background_sampler, num_workers=4) # Meta-test set evaluation = dataset_class(datadir, split='all', classes=evaluation_classes, transform=evaluation_transform) # ADAPTED: in the original code, `episodes_per_epoch` was provided to # `NShotTaskSampler` instead of `evaluation_episodes`. evaluation_sampler = NShotTaskSampler(evaluation, evaluation_episodes, n_test, k_test, q_test) evaluation_taskloader = DataLoader(evaluation, batch_sampler=evaluation_sampler, num_workers=4) ######### # Model # ######### if torch.cuda.is_available(): if gpu is not None: device = torch.device('cuda', gpu) else: device = torch.device('cuda') torch.backends.cudnn.benchmark = True else: device = torch.device('cpu') if not pretrained: model = get_few_shot_encoder(num_input_channels) # ADAPTED model.to(device) # BEFORE # model.to(device, dtype=torch.double) else: assert torch.cuda.is_available() model = models.__dict__[architecture](pretrained=True) model.fc = Identity() if gpu is not None: model = model.cuda(gpu) else: model = model.cuda() # TODO this is too risky: I'm not sure that this can work, since in # the few-shot github repo the batch axis is actually split into # support and query samples # model = torch.nn.DataParallel(model).cuda() def lr_schedule(epoch, lr): # Drop lr every 2000 episodes if epoch % drop_lr_every == 0: return lr / 2 else: return lr ############ # Training # ############ print(f'Training Prototypical network on {dataset}...') optimiser = Adam(model.parameters(), lr=1e-3) loss_fn = torch.nn.NLLLoss().to(device) callbacks = [ # ADAPTED: this is the test monitoring now - and is only done at the # end of training. EvaluateFewShot( eval_fn=proto_net_episode, num_tasks=evaluation_episodes, # THIS IS NOT USED n_shot=n_test, k_way=k_test, q_queries=q_test, taskloader=evaluation_taskloader, prepare_batch=prepare_nshot_task(n_test, k_test, q_test, device=device), distance=distance, on_epoch_end=False, on_train_end=True, prefix='test_') ] if monitor_validation: callbacks.append( # ADAPTED: this is the validation monitoring now - computed # after every epoch. EvaluateFewShot( eval_fn=proto_net_episode, num_tasks=evaluation_episodes, # THIS IS NOT USED n_shot=n_test, k_way=k_test, q_queries=q_test, # BEFORE taskloader=evaluation_taskloader, taskloader=validation_taskloader, # ADAPTED prepare_batch=prepare_nshot_task(n_test, k_test, q_test, device=device), distance=distance, on_epoch_end=True, # ADAPTED on_train_end=False, # ADAPTED prefix='val_')) callbacks.extend([ ModelCheckpoint( filepath=PATH + f'/models/proto_nets/{param_str}.pth', monitor=f'val_{n_test}-shot_{k_test}-way_acc', verbose=1, # ADAPTED save_best_only=monitor_validation # ADAPTED ), LearningRateScheduler(schedule=lr_schedule), CSVLogger(PATH + f'/logs/proto_nets/{param_str}.csv'), ]) fit( model, optimiser, loss_fn, epochs=n_epochs, dataloader=background_taskloader, prepare_batch=prepare_nshot_task(n_train, k_train, q_train, device=device), callbacks=callbacks, metrics=['categorical_accuracy'], fit_function=proto_net_episode, fit_function_kwargs={ 'n_shot': n_train, 'k_way': k_train, 'q_queries': q_train, 'train': True, 'distance': distance }, )
def test(self): k = 200 n = 5 epochs = 20 size_binary_layer = 10 stochastic = True n_conv_layers = 4 lr = 0.01 model_name = 'Omniglot__n=5_k=20_epochs=1000__lr=__size_binary_layer=10__size_continue_layer=10__stochastic__simplified_encoder' validation_split = .2 setup_dirs() assert torch.cuda.is_available() device = torch.device('cuda') torch.backends.cudnn.benchmark = True # model = SemanticBinaryClassifier(1, k, size_binary_layer=size_binary_layer, stochastic=stochastic, # size_dense_layer_before_binary=None, # n_conv_layers=n_conv_layers) #model = FewShotClassifier(1, k) model = SemanticBinaryEncoder(1, 10, 10, stochastic=True) model.load_state_dict(torch.load(os.path.join("models", "semantic_gan", model_name+".pth"))) evaluation = OmniglotDataset('evaluation') classes = np.random.choice(evaluation.df['class_id'].unique(), size=k) for i in classes: evaluation.df[evaluation.df['class_id'] == i] = evaluation.df[evaluation.df['class_id'] == i].sample(frac=1) train_dataloader = DataLoader( evaluation, batch_sampler=BasicSampler(evaluation, validation_split, True, classes, n=n), num_workers=8 ) eval_dataloader = DataLoader( evaluation, batch_sampler=BasicSampler(evaluation, validation_split, False, classes, n=n), num_workers=8 ) test_model = TestSemanticBinaryClassifier(k, model, size_binary_layer=size_binary_layer).to(device, dtype=torch.double) loss_fn = nn.CrossEntropyLoss().to(device) optimiser = torch.optim.Adam(test_model.parameters(), lr=lr) def prepare_batch(n, k): def prepare_batch_(batch): x, y = batch x = x.double().cuda() # Create dummy 0-(num_classes - 1) label y = create_nshot_task_label(k, n).cuda() return x, y return prepare_batch_ evalmetrics = EvaluateMetrics(eval_dataloader) evalmetrics.set_params({'metrics': ['categorical_accuracy'], 'prepare_batch': prepare_batch(n, k), 'loss_fn': loss_fn}) callbacks = [ evalmetrics, ModelCheckpoint( filepath=os.path.join(PATH, 'models', 'semantic_classifier', model_name + 'test_other_class.pth'), monitor='val_' + str(n) + '-shot_' + str(k) + '-way_acc' ), ReduceLROnPlateau(patience=10, factor=0.5, monitor='val_loss'), CSVLogger(os.path.join(PATH, 'logs', 'semantic_classifier', model_name + 'test_other_class.csv')) ] #print(summary(model, (1, 28, 28))) for param in model.parameters(): param.requires_grad = False fit( test_model, optimiser, loss_fn, epochs=100, dataloader=train_dataloader, prepare_batch=prepare_batch(n, k), callbacks=callbacks, metrics=['categorical_accuracy'], fit_function=gradient_step, fit_function_kwargs={'n_shot': n, 'k_way': k, 'device': device}, )
def train_sweep(): from torch.optim import Adam from torch.utils.data import DataLoader import argparse from few_shot.datasets import OmniglotDataset, MiniImageNet, ClinicDataset, SNIPSDataset, CustomDataset from few_shot.models import XLNetForEmbedding from few_shot.core import NShotTaskSampler, EvaluateFewShot, prepare_nshot_task from few_shot.proto import proto_net_episode from few_shot.train_with_prints import fit from few_shot.callbacks import CallbackList, Callback, DefaultCallback, ProgressBarLogger, CSVLogger, EvaluateMetrics, ReduceLROnPlateau, ModelCheckpoint, LearningRateScheduler from few_shot.utils import setup_dirs from few_shot.utils import get_gpu_info from config import PATH import wandb from transformers import AdamW import torch gpu_dict = get_gpu_info() print('Total GPU Mem: {} , Used GPU Mem: {}, Used Percent: {}'.format( gpu_dict['mem_total'], gpu_dict['mem_used'], gpu_dict['mem_used_percent'])) setup_dirs() assert torch.cuda.is_available() device = torch.device('cuda') torch.backends.cudnn.benchmark = True ############## # Parameters # ############## parser = argparse.ArgumentParser() parser.add_argument('--dataset', default='Custom') parser.add_argument('--distance', default='l2') parser.add_argument('--n-train', default=2, type=int) parser.add_argument('--n-test', default=2, type=int) parser.add_argument('--k-train', default=2, type=int) parser.add_argument('--k-test', default=2, type=int) parser.add_argument('--q-train', default=2, type=int) parser.add_argument('--q-test', default=2, type=int) args = parser.parse_args() evaluation_episodes = 100 episodes_per_epoch = 10 if args.dataset == 'omniglot': n_epochs = 40 dataset_class = OmniglotDataset num_input_channels = 1 drop_lr_every = 20 elif args.dataset == 'miniImageNet': n_epochs = 80 dataset_class = MiniImageNet num_input_channels = 3 drop_lr_every = 40 elif args.dataset == 'clinic150': n_epochs = 5 dataset_class = ClinicDataset num_input_channels = 150 drop_lr_every = 2 elif args.dataset == 'SNIPS': n_epochs = 5 dataset_class = SNIPSDataset num_input_channels = 150 drop_lr_every = 2 elif args.dataset == 'Custom': n_epochs = 20 dataset_class = CustomDataset num_input_channels = 150 drop_lr_every = 5 else: raise (ValueError, 'Unsupported dataset') param_str = f'{args.dataset}_nt={args.n_train}_kt={args.k_train}_qt={args.q_train}_' \ f'nv={args.n_test}_kv={args.k_test}_qv={args.q_test}' print(param_str) from sklearn.model_selection import train_test_split ################### # Create datasets # ################### train_df = dataset_class('train') train_taskloader = DataLoader(train_df, batch_sampler=NShotTaskSampler( train_df, episodes_per_epoch, args.n_train, args.k_train, args.q_train)) val_df = dataset_class('val') evaluation_taskloader = DataLoader( val_df, batch_sampler=NShotTaskSampler(val_df, episodes_per_epoch, args.n_test, args.k_test, args.q_test)) #train_iter = iter(train_taskloader) #train_taskloader = next(train_iter) #val_iter = iter(evaluation_taskloader) #evaluation_taskloader = next(val_iter) ######### # Wandb # ######### config_defaults = { 'lr': 0.00001, 'optimiser': 'adam', 'batch_size': 16, } wandb.init(config=config_defaults) ######### # Model # ######### torch.cuda.empty_cache() try: print('Before Model Move') gpu_dict = get_gpu_info() print('Total GPU Mem: {} , Used GPU Mem: {}, Used Percent: {}'.format( gpu_dict['mem_total'], gpu_dict['mem_used'], gpu_dict['mem_used_percent'])) except: pass #from transformers import XLNetForSequenceClassification, AdamW #model = XLNetForSequenceClassification.from_pretrained('xlnet-base-cased', num_labels=150) #model.cuda() try: del model except: print("Cannot delete model. No model with name 'model' exists") model = XLNetForEmbedding(num_input_channels) model.to(device, dtype=torch.double) #param_optimizer = list(model.named_parameters()) #no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] #optimizer_grouped_parameters = [ # {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01}, # {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay':0.0} #] try: print('After Model Move') gpu_dict = get_gpu_info() print('Total GPU Mem: {} , Used GPU Mem: {}, Used Percent: {}'.format( gpu_dict['mem_total'], gpu_dict['mem_used'], gpu_dict['mem_used_percent'])) except: pass wandb.watch(model) ############ # Training # ############ from transformers import AdamW print(f'Training Prototypical network on {args.dataset}...') if wandb.config.optimiser == 'adam': optimiser = Adam(model.parameters(), lr=wandb.config.lr) else: optimiser = AdamW(model.parameters(), lr=wandb.config.lr) #optimiser = AdamW(optimizer_grouped_parameters, lr=3e-5) #loss_fn = torch.nn.NLLLoss().cuda() #loss_fn = torch.nn.CrossEntropyLoss() #max_grad_norm = 1.0 loss_fn = torch.nn.NLLLoss() def lr_schedule(epoch, lr): # Drop lr every 2000 episodes if epoch % drop_lr_every == 0: return lr / 2 else: return lr callbacks = [ EvaluateFewShot(eval_fn=proto_net_episode, num_tasks=evaluation_episodes, n_shot=args.n_test, k_way=args.k_test, q_queries=args.q_test, taskloader=evaluation_taskloader, prepare_batch=prepare_nshot_task( args.n_test, args.k_test, args.q_test), distance=args.distance), ModelCheckpoint( filepath=PATH + f'/models/proto_nets/{param_str}.pth', monitor=f'val_{args.n_test}-shot_{args.k_test}-way_acc'), LearningRateScheduler(schedule=lr_schedule), CSVLogger(PATH + f'/logs/proto_nets/{param_str}.csv'), ] try: print('Before Fit') print('optimiser :', optimiser) print('Learning Rate: ', wandb.config.lr) gpu_dict = get_gpu_info() print('Total GPU Mem: {} , Used GPU Mem: {}, Used Percent: {}'.format( gpu_dict['mem_total'], gpu_dict['mem_used'], gpu_dict['mem_used_percent'])) except: pass fit( model, optimiser, loss_fn, epochs=n_epochs, dataloader=train_taskloader, prepare_batch=prepare_nshot_task(args.n_train, args.k_train, args.q_train), callbacks=callbacks, metrics=['categorical_accuracy'], fit_function=proto_net_episode, fit_function_kwargs={ 'n_shot': args.n_train, 'k_way': args.k_train, 'q_queries': args.q_train, 'train': True, 'distance': args.distance }, )