def main(): test_seen_loader = torch.utils.data.DataLoader(AttributeDataset( args.data_dir, args.dataset, features_path=args.gan_path, mode='test_seen', generalized=True, normalize=args.normalize, sentences=args.sentences), batch_size=args.batch_size, shuffle=False) test_unseen_loader = torch.utils.data.DataLoader( AttributeDataset(args.data_dir, args.dataset, features_path=args.gan_path, mode='test_unseen', generalized=True, normalize=args.normalize, sentences=args.sentences), batch_size=args.batch_size, shuffle=False) # instanciate the models if args.mlp: mlp = MLP(args.dim_input, [args.nhidden * 2], args.nhidden) else: mlp = LinearProjection(args.dim_input, args.nhidden) embed = LinearProjection(args.nhidden, args.dim_embed) if args.sentences: cam_key = 'sentences' else: cam_key = 'emb' if args.gan_path is not None: cam_key = 'full_' + cam_key cam = torch.from_numpy(test_seen_loader.dataset.data[cam_key].T) proxies = ProxyNet(args.n_classes, args.dim_embed, proxies=cam) model = Base(mlp, embed, proxies) criterion = ProxyLoss(temperature=args.temp) if args.cuda: mlp.cuda() embed.cuda() model.cuda() proxies.cuda() # loading checkpoint = torch.load(args.model_path) model.load_state_dict(checkpoint['state_dict']) txt = ("=> loaded checkpoint '{}' (epoch {})".format( args.model_path, checkpoint['epoch'])) print(txt) compute_scores(test_seen_loader, test_unseen_loader, model, criterion)
optimizer = optim.Adam(itertools.chain(model.parameters(), mlp.parameters()), lr=args.lr, weight_decay=args.weight_decay) creterion = torch.nn.TripletMarginLoss(margin=args.margin, p=2) CE = torch.nn.CrossEntropyLoss() loss_list = RunningAvg(window_size=200) loss_list_CE = RunningAvg(window_size=200) acc_list = RunningAvg(window_size=200) loss_by_iter = [] if args.cuda: device = torch.device(args.cuda_device) model.cuda(device) mlp.cuda(device) def train_step(epoch, loss_save): for _, data_train_group in tqdm(enumerate(dataloader_train), desc='Training', total=len(dataloader_train)): # pass three times first, then back propagate the loss model.train() mlp.train() for data_train in data_train_group: vector3 = [] regularization = 0 for data_train_item in data_train: # and/or children: [[1,2],[3,4]] adj, features, labels, idx_train, idx_val, idx_test = cuda_input(
optimizer = optim.Adam(itertools.chain(model.parameters(), mlp.parameters()), lr=args.lr, weight_decay=args.weight_decay) creterion = torch.nn.TripletMarginLoss(margin=args.margin, p=2) CE = torch.nn.CrossEntropyLoss() loss_list = deque(maxlen=100) loss_list_CE = deque(maxlen=100) acc_list = deque(maxlen=100) loss_by_iter = [] if args.cuda: model.cuda() mlp.cuda() def train(epoch, loss_save): for _, data_train_group in tqdm(enumerate(dataloader_train), desc='Training', total=len(dataloader_train)): # pass three times first, then back propagate the loss model.train() mlp.train() for data_train in data_train_group: vector3 = [] regularization = 0 for data_train_item in data_train: # and/or children: [[1,2],[3,4]] adj, features, labels = cuda_input(*data_train_item[:-2])
shuffle=True) test_dataset = datasets.MNIST(root='../data/', train=False, download=True, transform=transforms.ToTensor()) loader_test = torch.utils.data.DataLoader(test_dataset, batch_size=param['test_batch_size'], shuffle=True) # Load the pretrained model net = MLP() net.load_state_dict(torch.load('models/mlp_pretrained.pkl')) if torch.cuda.is_available(): print('CUDA ensabled.') net.cuda() print("--- Pretrained network loaded ---") test(net, loader_test) # prune the weights masks = weight_prune(net, param['pruning_perc']) net.set_masks(masks) print("--- {}% parameters pruned ---".format(param['pruning_perc'])) test(net, loader_test) # Retraining criterion = nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(net.parameters(), lr=param['learning_rate'], weight_decay=param['weight_decay'])
word_ae.load_state_dict(word_ae_params) classifier = MLP(word_args.nhidden) criterion = nn.NLLLoss() optimizer = torch.optim.Adam(list(word_ae.parameters()) + list(classifier.parameters()), lr=3e-4) one = torch.FloatTensor([1]) mone = one * (-1) lamda = torch.FloatTensor([10]) if torch.cuda.is_available(): logger.info("Running on GPU") word_ae = word_ae.cuda() classifier = classifier.cuda() one = one.cuda() # D = D.cuda() # G = G.cuda() else: logger.info("Running on CPU") ############################################################################### # Training code ############################################################################### def train_GAN(batch, train_mode=True): if train_mode: word_ae.train() classifier.train() else:
def train_MLP(train_X, train_Y, test_X, test_Y, batch_size=20, epochs=100): os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "1" model = MLP(10, 20, 2) model.cuda() model.train() learn_rate = 1e-3 grad_clip = 2.0 dispFreq = 50 validFreq = 200 early_stop = 20 weight = torch.FloatTensor([2.0, 1.0]) loss_function = nn.NLLLoss() optimizer = optim.Adam(model.parameters(), lr=learn_rate) params = filter(lambda p: p.requires_grad, model.parameters()) dev_tensor = Variable(torch.FloatTensor(test_X).cuda()) curr = 0 uidx = 0 # For Early-stopping best_step = 0 for iepx in xrange(1, epochs + 1): for ibx in xrange(0, len(train_X), batch_size): if ibx + batch_size >= len(train_X): batch = Variable( torch.FloatTensor(train_X[ibx:len(train_X)]).cuda()) target = Variable( torch.LongTensor(train_Y[ibx:len(train_X)]).cuda()) else: batch = Variable( torch.FloatTensor(train_X[ibx:ibx + batch_size]).cuda()) target = Variable( torch.LongTensor(train_Y[ibx:ibx + batch_size]).cuda()) uidx += 1 pred = model(batch) loss = loss_function(pred, target) optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm(params, grad_clip) optimizer.step() if np.mod(uidx, dispFreq) == 0: print 'Epoch ', iepx, '\tUpdate ', uidx, '\tCost ', loss.data.cpu( ).numpy()[0] if np.mod(uidx, validFreq) == 0: # compute dev model.eval() out = model.forward(dev_tensor) model.train() # score = nn.NLLLoss(weight=weight)(out, vs_tensor).data[0] pred = categoryFromOutput(out) F1 = f1_score(test_Y, pred) curr_step = uidx / validFreq currscore = F1 print 'F1 on dev', F1 if currscore > curr: curr = currscore best_step = curr_step # Save model print 'Saving model...', # torch.save(model.state_dict(), '%s_model_%s.pkl' % (saveto, run)) print 'Done' if curr_step - best_step > early_stop: print 'Early stopping ...' print best_step print curr return
adj, features, labels, idx_train, idx_val, idx_test = load_data(args.dataset) adj = reduce_noise(adj, labels, noise_rate=args.noise_rate) graph = adj_to_graph(adj) # Model and optimizer model = MLP(nfeat=features.shape[1], nhid=args.hidden, nclass=labels.max().item() + 1, dropout=args.dropout) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) if args.cuda: model.cuda() features = features.cuda() adj = adj.cuda() labels = labels.cuda() def train(iteration): t = time.time() x_batch, y_batch = graph_sample(idx_train, args.batch_size, graph, args.trdep, features, labels) model.train() optimizer.zero_grad() y_pre_batch = model(x_batch) loss_train = F.nll_loss(y_pre_batch, y_batch) acc_train = accuracy(y_pre_batch, y_batch)
def train_permutedmnist(num_tasks, batch_size, hidden_size, lr, num_epochs, num_points, select_method='lambda_descend', use_cuda=True, tau=0.5): # Log console output to 'pmlog.txt' logging.basicConfig(filename='pmlog.txt') logger = logging.getLogger() logger.setLevel(logging.INFO) # Data generator datagen = PermutedMnistGenerator(max_iter=num_tasks) # Model num_classes = 10 layer_size = [784, hidden_size, hidden_size, num_classes] model = MLP(layer_size, act='relu') criterion = nn.CrossEntropyLoss() if use_cuda: criterion.cuda() model.cuda() # Optimiser opt = opt_fromp(model, lr=lr, prior_prec=1e-5, grad_clip_norm=0.01, tau=tau) # Train on tasks memorable_points = [] testloaders = [] acc_list = [] for tid in range(num_tasks): # If not first task, need to calculate and store regularisation-term-related quantities if tid > 0: def closure(task_id): memorable_points_t = memorable_points[task_id][0] if use_cuda: memorable_points_t = memorable_points_t.cuda() opt.zero_grad() logits = model.forward(memorable_points_t) return logits opt.init_task(closure, tid, eps=1e-5) # Data generator for this task itrain, itest = datagen.next_task() itrainloader = DataLoader(dataset=itrain, batch_size=batch_size, shuffle=True, num_workers=3) itestloader = DataLoader(dataset=itest, batch_size=batch_size, shuffle=False, num_workers=3) memorableloader = DataLoader(dataset=itrain, batch_size=batch_size, shuffle=False, num_workers=3) testloaders.append(itestloader) iloaders = [itrainloader, testloaders] # Train and test acc = train(model, iloaders, memorable_points, criterion, opt, task_id=tid, num_epochs=num_epochs, use_cuda=use_cuda) # Select memorable past datapoints if select_method == 'random': i_memorable_points = random_memorable_points( itrain, num_points=num_points, num_classes=num_classes) elif select_method == 'lambda_descend': i_memorable_points = select_memorable_points( memorableloader, model, num_points=num_points, num_classes=num_classes, use_cuda=use_cuda, descending=True) elif select_method == 'lambda_ascend': i_memorable_points = select_memorable_points( memorableloader, model, num_points=num_points, num_classes=num_classes, use_cuda=use_cuda, descending=False) else: raise Exception('Invalid memorable points selection method.') memorable_points.append(i_memorable_points) # Update covariance (\Sigma) update_fisher(memorableloader, model, opt, use_cuda=use_cuda) print(acc) print('Mean accuracy after task %d: %f' % (tid + 1, sum(acc) / len(acc))) logger.info('After learn task: %d' % (tid + 1)) logger.info(acc) acc_list.append(acc) return acc_list
def train_model(args, use_cuda=False): start_time = time.time() # Read values from args num_tasks = args.num_tasks batch_size = args.batch_size hidden_size = args.hidden_size lr = args.lr num_epochs = args.num_epochs num_points = args.num_points coreset_select_method = args.select_method # Some parameters dataset_generation_test = False dataset_num_samples = 2000 # Colours for plotting color = ['C0', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9'] # Load / Generate toy data datagen = ToydataGenerator(max_iter=num_tasks, num_samples=dataset_num_samples) plt.figure() datagen.reset() total_loaders = [] criterion_cl = nn.CrossEntropyLoss() # Create model layer_size = [2, hidden_size, hidden_size, 2] model = MLP(layer_size, act='sigmoid') if use_cuda: model = model.cuda() # Optimiser opt = opt_fromp(model, lr=lr, prior_prec=1e-4, grad_clip_norm=None, tau=args.tau) memorable_points = None inducing_targets = None for tid in range(num_tasks): # If not first task, need to calculate and store regularisation-term-related quantities if tid > 0: def closure(task_id): memorable_points_t = memorable_points[task_id] if use_cuda: memorable_points_t = memorable_points_t.cuda() opt.zero_grad() logits = model.forward(memorable_points_t) return logits opt.init_task(closure, tid, eps=1e-3) # Data generator for this task itrain, itest = datagen.next_task() itrainloader = DataLoader(dataset=itrain, batch_size=batch_size, shuffle=True, num_workers=8) itestloader = DataLoader(dataset=itest, batch_size=batch_size, shuffle=False, num_workers=8) inducingloader = DataLoader(dataset=itrain, batch_size=batch_size, shuffle=False, num_workers=8) iloaders = [itrainloader, itestloader] if tid == 0: total_loaders = [itrainloader] else: total_loaders.append(itrainloader) # Train and test cl_outputs = train(model, iloaders, memorable_points, criterion_cl, opt, task_id=tid, num_epochs=num_epochs, use_cuda=use_cuda) # Select memorable past datapoints if coreset_select_method == 'random': i_memorable_points, i_inducing_targets = random_memorable_points( itrain, num_points=num_points, num_classes=2) else: i_memorable_points, i_inducing_targets = select_memorable_points( inducingloader, model, num_points=num_points, use_cuda=use_cuda) # Add memory points to set if tid > 0: memorable_points.append(i_memorable_points) inducing_targets.append(i_inducing_targets) else: memorable_points = [i_memorable_points] inducing_targets = [i_inducing_targets] # Update covariance (\Sigma) update_fisher(inducingloader, model, opt, use_cuda=use_cuda) # Plot visualisation (2D figure) cl_outputs, _ = torch.max(cl_outputs, dim=-1) cl_show = 2 * cl_outputs - 1 cl_show = cl_show.detach() if use_cuda: cl_show = cl_show.cpu() cl_show = cl_show.numpy() cl_show = cl_show.reshape(datagen.test_shape) plt.figure() axs = plt.subplot(111) axs.title.set_text('FROMP') if not dataset_generation_test: plt.imshow(cl_show, cmap='gray', extent=(datagen.x_min, datagen.x_max, datagen.y_min, datagen.y_max), origin='lower') for l in range(tid + 1): idx = np.where(datagen.y == l) plt.scatter(datagen.X[idx][:, 0], datagen.X[idx][:, 1], c=color[l], s=0.03) idx = np.where(datagen.y == l + datagen.offset) plt.scatter(datagen.X[idx][:, 0], datagen.X[idx][:, 1], c=color[l + datagen.offset], s=0.03) if not dataset_generation_test: plt.scatter(memorable_points[l][:, 0], memorable_points[l][:, 1], c='m', s=0.4, marker='x') plt.show() # Calculate and print train accuracy and negative log likelihood with torch.no_grad(): if not dataset_generation_test: model.eval() N = len(itrain) metric_task_id = 0 nll_loss_avg = 0 accuracy_avg = 0 for metric_loader in total_loaders: nll_loss = 0 correct = 0 for inputs, labels in metric_loader: if use_cuda: inputs, labels = inputs.cuda(), labels.cuda() logits = model.forward(inputs) nll_loss += nn.functional.cross_entropy( torch.squeeze(logits, dim=-1), labels) * float( inputs.shape[0]) # Calculate predicted classes pred = logits.data.max(1, keepdim=True)[1] # Count number of correctly predicted datapoints correct += pred.eq(labels.data.view_as(pred)).sum() nll_loss /= N accuracy = float(correct) / float(N) * 100. print( 'Task {}, Train accuracy: {:.2f}%, Train Loglik: {:.4f}' .format(metric_task_id, accuracy, nll_loss)) metric_task_id += 1 nll_loss_avg += nll_loss accuracy_avg += accuracy print('Avg train accuracy: {:.2f}%, Avg train Loglik: {:.4f}'. format(accuracy_avg / metric_task_id, nll_loss_avg / metric_task_id)) print('Time taken: ', time.time() - start_time)
'number of shifted samples used for loss estimation (for grad methods).') parser.add_argument('--max_var', type=float, default=10.0, help='maximum variance shift') parser.add_argument('--model_path', type=str, default='', help='path to saved model if loading.') opt = parser.parse_args() #instantiate model: net = MLP(input_size=784, width=opt.netWidth) if opt.cuda: net = net.cuda() if opt.model_path != '': net.load_state_dict(torch.load(opt.model_path), strict=False) #instantiate optimizer: optimizer = get_optimizer(net=net, lr=opt.lr, opt_str=opt.optim) #getting data loaders: train_loader, test_loader = get_data_loaders(BS=opt.batchSize) #train model: if opt.model_path == '': net, stats = train(net, opt.epochs, opt.cuda, optimizer, train_loader, test_loader) # net, stats = train(torch.nn.Sequential(AddNoise(mean=0,std=np.sqrt(0.25)),net), opt.epochs, opt.cuda, optimizer, train_loader, test_loader)
def main(): if args.gan_path is None: both = False else: both = True if args.validation: train_loader = torch.utils.data.DataLoader(AttributeDataset( args.data_dir, args.dataset, features_path=args.gan_path, mode='train', both=both, normalize=args.normalize, sentences=args.sentences), batch_size=args.batch_size, shuffle=True) val_seen_loader = torch.utils.data.DataLoader( AttributeDataset(args.data_dir, args.dataset, features_path=args.gan_path, mode='val_seen', generalized=True, normalize=args.normalize, sentences=args.sentences), batch_size=args.batch_size, shuffle=False) val_unseen_loader = torch.utils.data.DataLoader( AttributeDataset(args.data_dir, args.dataset, features_path=args.gan_path, mode='val_unseen', generalized=True, normalize=args.normalize, sentences=args.sentences), batch_size=args.batch_size, shuffle=False) else: trainval_loader = torch.utils.data.DataLoader( AttributeDataset(args.data_dir, args.dataset, features_path=args.gan_path, mode='trainval', both=both, normalize=args.normalize, sentences=args.sentences), batch_size=args.batch_size, shuffle=True) test_seen_loader = torch.utils.data.DataLoader(AttributeDataset( args.data_dir, args.dataset, features_path=args.gan_path, mode='test_seen', generalized=True, normalize=args.normalize, sentences=args.sentences), batch_size=args.batch_size, shuffle=False) test_unseen_loader = torch.utils.data.DataLoader( AttributeDataset(args.data_dir, args.dataset, features_path=args.gan_path, mode='test_unseen', generalized=True, normalize=args.normalize, sentences=args.sentences), batch_size=args.batch_size, shuffle=False) # instanciate the models if args.mlp: mlp = MLP(args.dim_input, [args.nhidden * 2], args.nhidden) else: mlp = LinearProjection(args.dim_input, args.nhidden) embed = LinearProjection(args.nhidden, args.dim_embed) if args.sentences: cam_key = 'sentences' else: cam_key = 'emb' if args.validation: cam = torch.from_numpy(train_loader.dataset.data[cam_key].T) else: cam = torch.from_numpy(trainval_loader.dataset.data[cam_key].T) proxies = ProxyNet(args.n_classes, args.dim_embed, proxies=cam) model = Base(mlp, embed, proxies) criterion = ProxyLoss(temperature=args.temp) if args.cuda: mlp.cuda() embed.cuda() model.cuda() proxies.cuda() parameters_set = [] layers = [] for c in mlp.children(): if isinstance(c, nn.Linear) or isinstance(c, nn.ModuleList): layers.extend(list(c.parameters())) for c in embed.children(): if isinstance(c, nn.Linear): layers.extend(list(c.parameters())) parameters_set.append({'params': layers, 'lr': args.lr}) optimizer = optim.SGD(parameters_set, lr=args.lr, momentum=0.9, nesterov=True, weight_decay=5e-5) n_parameters = sum([p.data.nelement() for p in model.parameters()]) print(' + Number of params: {}'.format(n_parameters)) scheduler = CosineAnnealingLR(optimizer, args.epochs) best_acc = 0 print('Random results:') if args.validation: validate(val_seen_loader, val_unseen_loader, model, criterion) else: validate(test_seen_loader, test_unseen_loader, model, criterion) for epoch in range(args.start_epoch, args.epochs + 1): # update learning rate if args.lr_decay: scheduler.step() # train for one epoch if args.validation: train(train_loader, model, criterion, optimizer, epoch) validate(val_seen_loader, val_unseen_loader, model, criterion) else: train(trainval_loader, model, criterion, optimizer, epoch) validate(test_seen_loader, test_unseen_loader, model, criterion) # saving save_checkpoint({'epoch': epoch, 'state_dict': model.state_dict()}) print('\nFinal evaluation on last epoch model:') validate(test_seen_loader, test_unseen_loader, model, criterion)
def train_mlp(**kwargs): name, directory = set_directory(name=kwargs['name'], type_net=kwargs['type_net'], dof=kwargs['dof']) if kwargs['tensorboard']: writer = SummaryWriter(directory) else: writer = None train_loader, val_loader, iter_per_epoch = load_mnist( batch_size=kwargs['batch_size']) model = MLP(input_dim=784, num_classes=10, layer_dims=kwargs['ldims'], type_net=kwargs['type_net'], N=60000, dof=kwargs['dof'], beta_ema=kwargs['beta_ema']) num_parameters = sum([p.data.nelement() for p in model.parameters()]) print(f'Number of model parameters: {num_parameters}') if torch.cuda.is_available(): torch.cuda.set_device(kwargs['device']) # for training on multiple GPUs. # Use CUDA_VISIBLE_DEVICES=0,1 to specify which GPUs to use if kwargs['multi_gpu']: model = torch.nn.DataParallel(model).cuda() else: if torch.cuda.is_available(): model = model.cuda() optimizer = construct_optimizer(optimizer=kwargs['optim'], model=model, lr=kwargs['lr']) if kwargs['resume'] != '': kwargs[ 'start_epoch'], best_prec1, total_steps, model, optimizer = resume_from_checkpoint( resume_path=kwargs['resume'], model=model, optimizer=optimizer) else: total_steps = 0 best_prec1 = 0. cudnn.benchmark = True if kwargs['type_net'] == 'kerneldense': loss_function = torch.nn.CrossEntropyLoss().cuda() else: loss_function = CrossEntropyLossWithAnnealing( iter_per_epoch=iter_per_epoch, total_steps=total_steps, anneal_type=kwargs['anneal_type'], anneal_kl=kwargs['anneal_kl'], epzero=kwargs['epzero'], epmax=kwargs['epmax'], anneal_maxval=kwargs['anneal_maxval'], writer=writer) # loss_function = CrossEntropyLossWithMMD(num_samples=2) for epoch in range(kwargs['start_epoch'], kwargs['epochs']): total_steps = train_single_epoch(train_loader=train_loader, model=model, criterion=loss_function, optimizer=optimizer, epoch=epoch, clip_var=kwargs['clip_var'], total_steps=total_steps, print_freq=kwargs['print_freq'], writer=writer, thres_stds=kwargs['thres_std'], shape=[-1, 784]) prec1 = validate(val_loader=val_loader, model=model, criterion=loss_function, epoch=epoch, print_freq=kwargs['print_freq'], shape=[-1, 784], writer=writer) if kwargs['restart'] and epoch % kwargs['restart_interval'] == 0: print('Restarting optimizer...') optimizer = construct_optimizer(optimizer=kwargs['restart_optim'], model=model, lr=kwargs['restart_lr']) is_best = prec1 > best_prec1 if is_best: best_prec1 = prec1 if isinstance(model, torch.nn.DataParallel): state = { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': max(prec1, best_prec1), 'beta_ema': model.module.beta_ema, 'optimizer': optimizer.state_dict(), 'total_steps': total_steps } if model.module.beta_ema > 0: state['avg_params'] = model.module.avg_param state['steps_ema'] = model.module.steps_ema else: state = { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': max(prec1, best_prec1), 'beta_ema': model.beta_ema, 'optimizer': optimizer.state_dict(), 'total_steps': total_steps } if model.beta_ema > 0: state['avg_params'] = model.avg_param state['steps_ema'] = model.steps_ema if epoch in kwargs['save_at']: name = f'checkpoint_{epoch}.pth.tar' else: name = 'checkpoint.pth.tar' save_checkpoint(state=state, is_best=is_best, name=name) print('Best accuracy: ', best_prec1) if writer is not None: writer.close()