def main(): #torch.random.manual_seed(123) # Create a meta optimizer that wraps a model into a meta model # to keep track of the meta updates. meta_model = Model(**kwargs) if args.cuda: meta_model.cuda() #for module in meta_model.modules(): # print(module._parameters) # print(list(module.children())) if args.lr_only: meta_optimizer = LearningRateOnlyMetaOptimizer(MetaModel(meta_model), args.num_layers, args.hidden_size) elif args.fast_meta_opt: meta_optimizer = FastMetaOptimizer(MetaModel(meta_model), args.num_layers, args.hidden_size) else: meta_optimizer = MetaOptimizer(MetaModel(meta_model), args.num_layers, args.hidden_size) if args.cuda: meta_optimizer.cuda() optimizer = optim.Adam(meta_optimizer.parameters()) alpha = 0.999 d = 1 start_time = time() for epoch in range(args.max_epoch): decrease_in_loss = 0.0 final_loss = 0.0 train_iter = iter(train_loader) for i in range(args.updates_per_epoch): try: x, y = next(train_iter) except StopIteration: train_iter = iter(train_loader) x, y = next(train_iter) if args.cuda: x, y = x.cuda(), y.cuda() x, y = Variable(x), Variable(y) # Sample a new model model = Model(**kwargs) if args.cuda: model.cuda() if args.replay_trajectory: backup_model = Model(**kwargs) if args.cuda: backup_model.cuda() # Compute initial loss of the model f_x = model(x) initial_loss = F.nll_loss(f_x, y) av_loss = 0. for k in range(args.optimizer_steps // args.truncated_bptt_step): # Keep states for truncated BPTT meta_optimizer.reset_lstm(keep_states=k > 0, model=model, use_cuda=args.cuda) if args.replay_trajectory: #meta_optimizer.backup_model_params() copy_params(source=model, dest=backup_model) loss_sum = 0 prev_loss = torch.zeros(1) if args.cuda: prev_loss = prev_loss.cuda() for j in range(args.truncated_bptt_step): try: x, y = next(train_iter) except StopIteration: train_iter = iter(train_loader) x, y = next(train_iter) if args.cuda: x, y = x.cuda(), y.cuda() x, y = Variable(x), Variable(y) # First we need to compute the gradients of the model f_x = model(x) loss = F.nll_loss(f_x, y) model.zero_grad() loss.backward() if not args.replay_trajectory: av_loss = alpha * av_loss + (1 - alpha) * loss.data # Perfom a meta update using gradients from model # and return the current meta model saved in the optimizer meta_model = meta_optimizer.meta_update(model, loss.data) # Compute a loss for a step the meta optimizer f_x = meta_model(x) loss = F.nll_loss(f_x, y) loss_sum += (loss - Variable(prev_loss)) prev_loss = loss.data # Update the parameters of the meta optimizer meta_optimizer.zero_grad() #loss_sum.backward() loss.backward() for param in meta_optimizer.parameters(): param.grad.data.clamp_(-1, 1) optimizer.step() if args.replay_trajectory: meta_optimizer.reset_lstm(keep_states=k > 0, model=backup_model, use_cuda=args.cuda) copy_params(source=backup_model, dest=model) for j in range(args.truncated_bptt_step): try: x, y = next(train_iter) except StopIteration: train_iter = iter(train_loader) x, y = next(train_iter) if args.cuda: x, y = x.cuda(), y.cuda() x, y = Variable(x), Variable(y) # First we need to compute the gradients of the model f_x = model(x) loss = F.nll_loss(f_x, y) model.zero_grad() loss.backward() # Perfom a meta update using gradients from model # and return the current meta model saved in the optimizer meta_model = meta_optimizer.meta_update( model, loss.data) av_loss = alpha * av_loss + (1 - alpha) * loss.data if (k * args.truncated_bptt_step) % args.print_pause == 0: if args.lr_only: meta_optimizer.learning_rate.clamp(min=1e-8) print('av_loss = {:.3f}; lr = {:.4f}'.format( av_loss[0], meta_optimizer.learning_rate.data[0])) else: print('av_loss = {:.3f}'.format(av_loss[0])) if av_loss[0] < 0.1**d: print('model reached loss < 1e-{} in {} steps ({:.1f}s)'. format(d, k * args.truncated_bptt_step, time() - start_time)) if d >= 3: break d += 1 # Compute relative decrease in the loss function w.r.t initial # value decrease_in_loss += loss.data[0] / initial_loss.data[0] final_loss += loss.data[0] print("Epoch: {}, final loss {}, average final/initial loss ratio: {}". format(epoch, final_loss / args.updates_per_epoch, decrease_in_loss / args.updates_per_epoch))
def main(): # Create a meta optimizer that wraps a model into a meta model # to keep track of the meta updates. meta_model = Model() if args.cuda: meta_model.cuda() meta_optimizer = FastMetaOptimizer(MetaModel(meta_model), args.num_layers, args.hidden_size) if args.cuda: meta_optimizer.cuda() optimizer = optim.Adam(meta_optimizer.parameters(), lr=3e-3) for epoch in range(args.max_epoch): decrease_in_loss = 0.0 final_loss = 0.0 train_iter = iter(train_loader) for i in range(args.updates_per_epoch): # Sample a new model model = Model() if args.cuda: model.cuda() x, y = next(train_iter) if args.cuda: x, y = x.cuda(), y.cuda() x, y = Variable(x), Variable(y) # Compute initial loss of the model f_x = model(x) initial_loss = F.nll_loss(f_x, y) for k in range(args.optimizer_steps // args.truncated_bptt_step): # Keep states for truncated BPTT meta_optimizer.reset_lstm(keep_states=k > 0, model=model, use_cuda=args.cuda) loss_sum = 0 prev_loss = torch.zeros(1) if args.cuda: prev_loss = prev_loss.cuda() for j in range(args.truncated_bptt_step): x, y = next(train_iter) if args.cuda: x, y = x.cuda(), y.cuda() x, y = Variable(x), Variable(y) # First we need to compute the gradients of the model f_x = model(x) loss = F.nll_loss(f_x, y) model.zero_grad() loss.backward() # Perfom a meta update using gradients from model # and return the current meta model saved in the optimizer meta_model = meta_optimizer.meta_update(model, loss.data) # Compute a loss for a step the meta optimizer f_x = meta_model(x) loss = F.nll_loss(f_x, y) loss_sum += (loss - Variable(prev_loss)) prev_loss = loss.data # Update the parameters of the meta optimizer meta_optimizer.zero_grad() loss_sum.backward() for param in meta_optimizer.parameters(): param.grad.data.clamp_(-1, 1) optimizer.step() # Compute relative decrease in the loss function w.r.t initial # value decrease_in_loss += loss.data / initial_loss.data final_loss += loss.data print("Epoch: {}, final loss {}, average final/initial loss ratio: {}". format(epoch, final_loss / args.updates_per_epoch, decrease_in_loss / args.updates_per_epoch))
def main(): # Create a meta optimizer that wraps a model into a meta model # to keep track of the meta updates. meta_model = Model() if args.cuda: meta_model.cuda() meta_model.apply(weights_init) if args.RNN == 'Fast': meta_optimizer = FastMetaOptimizer(MetaModel(meta_model), args.num_layers, args.hidden_size) elif args.RNN == 'LSTM': meta_optimizer = MetaOptimizerLSTM(MetaModel(meta_model), args.num_layers, args.hidden_size) elif args.RNN == 'GRU': meta_optimizer = MetaOptimizerGRU(MetaModel(meta_model), args.num_layers, args.hidden_size) elif args.RNN == 'RNN': meta_optimizer = MetaOptimizerRNN(MetaModel(meta_model), args.num_layers, args.hidden_size) optimizer = optim.Adam(meta_optimizer.parameters(), lr=1e-3) if args.cuda: meta_optimizer.cuda() meta_optimizer.load_state_dict( torch.load('%s/%s_best.pth' % (args.outdir, 'meta_optimizer'))) l_val_model_best = 99999 l_val_meta_model_best = 99999 accuracy_model = [] loss_model = [] models_tested = 50 train_iter = iter(train_loader) epoch_loss = [[] for i in range( int(len(train_iter) * args.train_split // args.truncated_bptt_step))] model = Model() if args.cuda: model.cuda() model.apply(weights_init) for epoch in tqdm(range(models_tested)): train_iter = iter(train_loader) loss_train_model = [] loss_train_meta = [] loss_val_model = [] loss_val_meta = [] correct = 0 incorrect = 0 #model = Model() #if args.cuda: # model.cuda() #model.apply(weights_init) for k in range( int( len(train_iter) * args.train_split // (args.truncated_bptt_step * 2))): # Keep states for truncated BPTT meta_optimizer.reset_lstm(keep_states=k > 0, model=model, use_cuda=args.cuda) loss_sum = 0 prev_loss = torch.zeros(1) if args.cuda: prev_loss = prev_loss.cuda() for j in range(args.truncated_bptt_step * 2): x, y = next(train_iter) if args.cuda: x, y = x.cuda(), y.cuda() x, y = Variable(x), Variable(y) # First we need to compute the gradients of the model f_x = model(x) loss = F.nll_loss(f_x, y) model.zero_grad() loss.backward() meta_model = meta_optimizer.meta_update(model, loss.data) epoch_loss[k].append(loss.item()) # Compute a loss for a step the meta optimizer for k in range(int(len(train_iter) * (1 - args.train_split))): x, y = next(train_iter) if args.cuda: x, y = x.cuda(), y.cuda() x, y = Variable(x), Variable(y) f_x = meta_model(x) for output, index in zip(f_x.cpu().detach().numpy(), range(len(f_x.cpu().detach().numpy()))): if y[index] == output.argmax(): correct += 1 else: incorrect += 1 loss = F.nll_loss(f_x, y) loss_val_model.append(loss.item()) l_val_model = np.mean(loss_val_model) loss_model.append(l_val_model) accuracy_model.append(float(correct) / (correct + incorrect)) print(float(correct) / (correct + incorrect)) print '\nValidation Loss Model: ' + str(np.mean(loss_model)) print '\nValidation Accuracy: ' + str(np.mean(accuracy_model)) [np.mean(i) for i in epoch_loss] np.save('%s/loss_epoch_test.npy' % (args.outdir), [np.mean(i) for i in epoch_loss])
def main(): # Create a meta optimizer that wraps a model into a meta model # to keep track of the meta updates. meta_model = Model() if args.cuda: meta_model.cuda() meta_model.apply(weights_init) if args.RNN == 'Fast': meta_optimizer = FastMetaOptimizer(MetaModel(meta_model), args.num_layers, args.hidden_size) elif args.RNN == 'LSTM': meta_optimizer = MetaOptimizerLSTM(MetaModel(meta_model), args.num_layers, args.hidden_size) elif args.RNN == 'GRU': meta_optimizer = MetaOptimizerGRU(MetaModel(meta_model), args.num_layers, args.hidden_size) elif args.RNN == 'RNN': meta_optimizer = MetaOptimizerRNN(MetaModel(meta_model), args.num_layers, args.hidden_size) else: raise NameError('not valid RNN') if args.cuda: meta_optimizer.cuda() optimizer = optim.Adam(meta_optimizer.parameters(), lr=1e-3) l_val_model_best = 99999 l_val_meta_model_best = 999999 acc_val_best = 0 loss_epoch_val = [] accuracy_epoch_val = [] loss_epoch_optimizer_train = [] for epoch in range(args.max_epoch): print("Epoch %s\n" % epoch) decrease_in_loss = 0.0 final_loss = 0.0 train_iter = iter(train_loader) loss_train_model = [] loss_train_meta = [] loss_val_model = [] loss_val_optimizer = [] correct = 0 incorrect = 0 updates = args.updates_per_epoch for i in tqdm(range(updates)): #Sample a new model model = Model() if args.cuda: model.cuda() meta_model.apply(weights_init) #model_optimizer = optim.Adam(model.parameters(), lr=0.01) x, y = next(train_iter) if args.cuda: x, y = x.cuda(), y.cuda() x, y = Variable(x), Variable(y) # Compute initial loss of the model f_x = model(x) initial_loss = F.nll_loss(f_x, y) for k in range(args.optimizer_steps // args.truncated_bptt_step): # Keep states for truncated BPTT meta_optimizer.reset_lstm( keep_states=k > 0, model=model, use_cuda=args.cuda) loss_sum = 0 prev_loss = torch.zeros(1) if args.cuda: prev_loss = prev_loss.cuda() for j in range(args.truncated_bptt_step): x, y = next(train_iter) if args.cuda: x, y = x.cuda(), y.cuda() x, y = Variable(x), Variable(y) #Training cycle for optimizer raining # First we need to compute the gradients of the model f_x = model(x) loss = F.nll_loss(f_x, y) loss_train_model.append(loss.item()) model.zero_grad() loss.backward() # Perfom a meta update using gradients from model # and return the current meta model saved in the optimizer meta_model = meta_optimizer.meta_update(model, loss.data) # Compute a loss for a step the meta optimizer f_x = meta_model(x) loss = F.nll_loss(f_x, y) loss_sum += (loss - Variable(prev_loss)) prev_loss = loss.data # Update the parameters of the meta optimizer meta_optimizer.zero_grad() loss_train_meta.append(loss_sum.item()) loss_sum.backward() for param in meta_optimizer.parameters(): param.grad.data.clamp_(-1, 1) optimizer.step() # Compute relative decrease in the loss function w.r.t initial # value decrease_in_loss += loss.item() / initial_loss.item() final_loss += loss.item() for i in tqdm(range(updates - 6)): #Sample a new model model = Model() if args.cuda: model.cuda() meta_model.apply(weights_init) for k in range(args.optimizer_steps // args.truncated_bptt_step): # Keep states for truncated BPTT meta_optimizer.reset_lstm( keep_states=k > 0, model=model, use_cuda=args.cuda) loss_sum = 0 prev_loss = torch.zeros(1) if args.cuda: prev_loss = prev_loss.cuda() for j in range(args.truncated_bptt_step): x, y = next(train_iter) if args.cuda: x, y = x.cuda(), y.cuda() x, y = Variable(x), Variable(y) # First we need to compute the gradients of the model f_x = model(x) loss = F.nll_loss(f_x, y) model.zero_grad() loss.backward() meta_model = meta_optimizer.meta_update(model, loss.data) f_x = meta_model(x) loss = F.nll_loss(f_x, y) loss_sum += (loss - Variable(prev_loss)) prev_loss = loss.data loss_val_optimizer.append(loss_sum.item()) # Compute a loss for a step the meta optimizer for datum in range(350/(updates - 6)): x, y = next(train_iter) if args.cuda: x, y = x.cuda(), y.cuda() x, y = Variable(x), Variable(y) f_x = meta_model(x) for output, index in zip(f_x.cpu().detach().numpy(), range(len(f_x.cpu().detach().numpy()))): if y[index] == output.argmax(): correct += 1 else: incorrect += 1 loss = F.nll_loss(f_x, y) loss_val_model.append(loss.item()) l_val_model = np.mean(loss_val_model) #l_val_meta_model = np.mean(loss_val_meta) loss_epoch_val.append(l_val_model) accuracy_epoch_val.append(float(correct) / (correct + incorrect)) loss_epoch_optimizer_train.append(np.mean(loss_val_optimizer)) torch.save(meta_model.state_dict(), '%s/%s_last.pth'%(args.outdir,'meta_model')) torch.save(meta_optimizer.state_dict(), '%s/%s_last.pth'%(args.outdir,'meta_optimizer')) if l_val_model < l_val_model_best: print("new best model") l_val_model_best = l_val_model torch.save(model.state_dict(), '%s/%s_best.pth'%(args.outdir,'meta_model')) torch.save(meta_optimizer.state_dict(), '%s/%s_best.pth'%(args.outdir,'meta_optimizer')) if epoch % 100 == 0: torch.save(meta_optimizer.state_dict(), '%s/%s_%sepoch.pth'%(args.outdir,'meta_optimizer', epoch)) print("Epoch: {}, final loss {}, average final/initial loss ratio: {}".format(epoch, final_loss / args.updates_per_epoch, decrease_in_loss / args.updates_per_epoch)) print '\nValidation Loss Model: '+ str(np.mean(loss_val_model)) #print '\nValidation Loss Meta: '+ str(np.mean(loss_val_meta)) print '\nValidation Accuracy: ' + str(float(correct) / (correct + incorrect)) print '\nTraining Loss Model: '+ str(np.mean(loss_train_model)) #print '\nTraining Loss Meta: '+ str(np.mean(loss_train_meta)) np.save('%s/loss_epoch_val.npy'%(args.outdir), np.array(loss_epoch_val)) np.save('%s/accuracy_epoch_val.npy'%(args.outdir), np.array(accuracy_epoch_val)) np.save('%s/loss_epoch_optimizer_val.npy'%(args.outdir), np.array(loss_epoch_optimizer_train))
def main(): kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} train_loader = torch.utils.data.DataLoader( datasets.MNIST('../data', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ])), batch_size=args.batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader( datasets.MNIST('../data', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ])), batch_size=args.batch_size, shuffle=True, **kwargs) # Create a meta optimizer that wraps a model into a meta model # to keep track of the meta updates. meta_model = Model2() if args.cuda: meta_model.cuda() meta_optimizer = FastMetaOptimizer(MetaModel(meta_model), args.num_layers, args.hidden_size) if args.cuda: meta_optimizer.cuda() print meta_optimizer optimizer = optim.Adam(meta_optimizer.parameters(), lr=1e-3) for epoch in range(args.max_epoch): decrease_in_loss = 0.0 final_loss = 0.0 train_iter = iter(train_loader) for i in range(args.updates_per_epoch): # Sample a new model model = Model2() if args.cuda: model.cuda() x, y = next(train_iter) if args.cuda: x, y = x.cuda(), y.cuda() x, y = Variable(x), Variable(y) # Compute initial loss of the model f_x = model(x) initial_loss = F.nll_loss(f_x, y) for k in range(args.optimizer_steps // args.truncated_bptt_step): # Keep states for truncated BPTT meta_optimizer.reset_lstm( keep_states=k > 0, model=model, use_cuda=args.cuda) loss_sum = 0 prev_loss = torch.zeros(1) if args.cuda: prev_loss = prev_loss.cuda() for j in range(args.truncated_bptt_step): x, y = next(train_iter) if args.cuda: x, y = x.cuda(), y.cuda() x, y = Variable(x), Variable(y) # First we need to compute the gradients of the model f_x = model(x) loss = F.nll_loss(f_x, y) acc = (f_x.max(1)[1] == y).type(torch.FloatTensor).mean() model.zero_grad() loss.backward() # Perfom a meta update using gradients from model # and return the current meta model saved in the optimizer meta_model = meta_optimizer.meta_update(model, loss.data) # Compute a loss for a step the meta optimizer f_x = meta_model(x) loss = F.nll_loss(f_x, y) loss_sum += (loss - Variable(prev_loss)) prev_loss = loss.data # Update the parameters of the meta optimizer meta_optimizer.zero_grad() loss_sum.backward() for param in meta_optimizer.parameters(): param.grad.data.clamp_(-1, 1) optimizer.step() # Compute relative decrease in the loss function w.r.t initial # value decrease_in_loss += loss.data[0] / initial_loss.data[0] final_loss += loss.data[0] print("Epoch: {}, final loss {}, average final/initial loss ratio: {}, params: {}, acc: {}".format(epoch, final_loss / args.updates_per_epoch, decrease_in_loss / args.updates_per_epoch, [meta_optimizer.f, meta_optimizer.i], acc))
def main_meta_lstm(): TEXT = data.Field(sequential=True, include_lengths=True) LABEL = data.Field(sequential=False) train, val, test = datasets.SNLI.splits(TEXT, LABEL) TEXT.build_vocab(train, vectors="glove.840B.300d") LABEL.build_vocab(train) vocab = TEXT.vocab train_iter, val_iter, test_iter = data.Iterator.splits( (train, val, test), batch_size=50, repeat=False, shuffle=False) config = Config() criterion = nn.CrossEntropyLoss() # Create a meta optimizer that wraps a model into a meta model # to keep track of the meta updates. meta_model = Model(vocab, config) if args.cuda: meta_model.cuda() meta_optimizer = FastMetaOptimizer(MetaModel(meta_model), args.num_layers, args.hidden_size) if args.cuda: meta_optimizer.cuda() optimizer = optim.Adam(meta_optimizer.parameters(), lr=1e-3) for i in range(args.max_epoch): # Sample a new model model = Model(vocab, config) if args.cuda: model.cuda() train_acc = 0.0 train_cnt = 0 for k in range(args.optimizer_steps): # Keep states for truncated BPTT meta_optimizer.reset_lstm( keep_states=k > 0, model=model, use_cuda=args.cuda) loss_sum = 0 prev_loss = torch.zeros(1) if args.cuda: prev_loss = prev_loss.cuda() for j in range(args.truncated_bptt_step): batch = next(iter(train_iter)) x, y = batch, batch.label - 1 # First we need to compute the gradients of the model f_x = model(x) acc = (f_x.max(1)[1] == y).type(torch.FloatTensor).mean().float() train_acc += acc train_cnt += 1 loss = criterion(f_x, y) model.zero_grad() loss.backward() # Perfom a meta update using gradients from model # and return the current meta model saved in the optimizer meta_model = meta_optimizer.meta_update(model, loss.data) # Compute a loss for a step the meta optimizer f_x = meta_model(x) loss = criterion(f_x, y) loss_sum += (loss - Variable(prev_loss)) prev_loss = loss.data # Update the parameters of the meta optimizer meta_optimizer.zero_grad() loss_sum.backward() for param in meta_optimizer.parameters(): param.grad.data.clamp_(-1, 1) optimizer.step() print 'i = {}, k = {}, acc = {}, loss = {}'.format(i, k, acc, loss.float()) test_acc = 0.0 test_cnt = 0 for batch in test_iter: x, y = batch, batch.label - 1 f_x = model(x) test_acc += (f_x.max(1)[1] == y).type(torch.FloatTensor).mean().float() test_cnt += 1 print 'epoch = {}, train_acc = {}, test_acc = {}'.format(i, train_acc / train_cnt, test_acc / test_cnt)
def main2(): TEXT = data.Field(sequential=True, include_lengths=True) LABEL = data.Field(sequential=False) train, val, test = datasets.SNLI.splits(TEXT, LABEL) TEXT.build_vocab(train, vectors="glove.840B.300d") LABEL.build_vocab(train) vocab = TEXT.vocab train_iter, val_iter, test_iter = data.Iterator.splits( (train, val, test), batch_size=50, repeat=False) config = Config() criterion = nn.CrossEntropyLoss() # Create a meta optimizer that wraps a model into a meta model # to keep track of the meta updates. meta_model = CNNModel(vocab, config) if args.cuda: meta_model.cuda() meta_optimizer = FastMetaOptimizer(MetaModel(meta_model), args.num_layers, args.hidden_size) if args.cuda: meta_optimizer.cuda() optimizer = optim.Adam(meta_optimizer.parameters(), lr=1e-3) for epoch in range(args.max_epoch): decrease_in_loss = 0.0 final_loss = 0.0 for i in range(args.updates_per_epoch): # Sample a new model model = CNNModel(vocab, config) if args.cuda: model.cuda() batch = next(iter(train_iter)) x, y = batch, batch.label - 1 # Compute initial loss of the model f_x = model(x) initial_loss = criterion(f_x, y) for k in range(args.optimizer_steps // args.truncated_bptt_step): # Keep states for truncated BPTT meta_optimizer.reset_lstm( keep_states=k > 0, model=model, use_cuda=args.cuda) loss_sum = 0 prev_loss = torch.zeros(1) if args.cuda: prev_loss = prev_loss.cuda() for j in range(args.truncated_bptt_step): batch = next(iter(train_iter)) x, y = batch, batch.label - 1 # First we need to compute the gradients of the model f_x = model(x) acc = (f_x.max(1)[1] == y).type(torch.FloatTensor).mean() loss = criterion(f_x, y) model.zero_grad() loss.backward() # Perfom a meta update using gradients from model # and return the current meta model saved in the optimizer meta_model = meta_optimizer.meta_update(model, loss.data) # Compute a loss for a step the meta optimizer f_x = meta_model(x) loss = criterion(f_x, y) loss_sum += (loss - Variable(prev_loss)) prev_loss = loss.data # Update the parameters of the meta optimizer meta_optimizer.zero_grad() loss_sum.backward() for param in meta_optimizer.parameters(): param.grad.data.clamp_(-1, 1) optimizer.step() print 'acc=', acc print 'loss=', loss print 'para=', [meta_optimizer.f, meta_optimizer.i] # Compute relative decrease in the loss function w.r.t initial # value decrease_in_loss += loss.data[0] / initial_loss.data[0] final_loss += loss.data[0] print("Epoch: {}, final loss {}, average final/initial loss ratio: {}, params: {}".format(epoch, final_loss / args.updates_per_epoch, decrease_in_loss / args.updates_per_epoch, [meta_optimizer.f, meta_optimizer.i]))
def main(): # Create a meta optimizer that wraps a model into a meta model # to keep track of the meta updates. meta_model = Model() if args.cuda: meta_model.cuda() meta_optimizer = FastMetaOptimizer(MetaModel(meta_model), args.num_layers, args.hidden_size) if args.cuda: meta_optimizer.cuda() optimizer = optim.Adam(meta_optimizer.parameters(), lr=1e-3) l_val_model_best = 99999 l_val_meta_model_best = 99999 for epoch in range(args.max_epoch): print("Epoch %s\n" % epoch) decrease_in_loss = 0.0 final_loss = 0.0 train_iter = iter(train_loader) loss_train_model = [] loss_train_meta = [] loss_val_model = [] loss_val_meta = [] correct = 0 incorrect = 0 #for i in tqdm(range(args.updates_per_epoch)): #updates = int(float(args.train_split) * len(train_loader) /(((args.optimizer_steps // args.truncated_bptt_step) * args.truncated_bptt_step) + 1)) updates = int(float(args.train_split) * len(train_loader)) for i in tqdm(range(updates)): # Sample a new model model = Model() if args.cuda: model.cuda() x, y = next(train_iter) if args.cuda: x, y = x.cuda(), y.cuda() x, y = Variable(x), Variable(y) # Compute initial loss of the model f_x = model(x) initial_loss = F.nll_loss(f_x, y) for k in range(args.optimizer_steps // args.truncated_bptt_step): # Keep states for truncated BPTT meta_optimizer.reset_lstm(keep_states=k > 0, model=model, use_cuda=args.cuda) loss_sum = 0 prev_loss = torch.zeros(1) if args.cuda: prev_loss = prev_loss.cuda() for j in range(args.truncated_bptt_step): #x, y = next(train_iter) if args.cuda: x, y = x.cuda(), y.cuda() x, y = Variable(x), Variable(y) # First we need to compute the gradients of the model f_x = model(x) loss = F.nll_loss(f_x, y) loss_train_model.append(loss.item()) model.zero_grad() loss.backward() # Perfom a meta update using gradients from model # and return the current meta model saved in the optimizer meta_model = meta_optimizer.meta_update(model, loss.data) # Compute a loss for a step the meta optimizer f_x = meta_model(x) loss = F.nll_loss(f_x, y) loss_sum += (loss - Variable(prev_loss)) prev_loss = loss.data # Update the parameters of the meta optimizer meta_optimizer.zero_grad() loss_train_meta.append(loss_sum.item()) loss_sum.backward() for param in meta_optimizer.parameters(): param.grad.data.clamp_(-1, 1) optimizer.step() # Compute relative decrease in the loss function w.r.t initial # value decrease_in_loss += loss.item() / initial_loss.item() final_loss += loss.item() for i in tqdm(range(int((1 - args.train_split) * len(train_loader)))): x, y = next(train_iter) if args.cuda: x, y = x.cuda(), y.cuda() x, y = Variable(x), Variable(y) # Compute initial loss of the model f_x = model(x) for output, index in zip(f_x.cpu().detach().numpy(), range(len(f_x.cpu().detach().numpy()))): if y[index] == output.argmax(): correct += 1 else: incorrect += 1 loss_model = F.nll_loss(f_x, y) loss_val_model.append(loss_model.item()) meta_model = meta_optimizer.meta_update(model, loss.data) # Compute a loss for a step the meta optimizer f_x = meta_model(x) loss_meta = F.nll_loss(f_x, y) loss_val_meta.append(loss_meta.item()) torch.save(model.state_dict(), '%s/%s_last.pth' % (args.outdir, 'model')) torch.save(meta_model.state_dict(), '%s/%s_last.pth' % (args.outdir, 'meta_model')) l_val_model = np.mean(loss_val_model) l_val_meta_model = np.mean(loss_val_meta) if l_val_model < l_val_model_best: print("new best model") l_val_model_best = l_val_model torch.save(model.state_dict(), '%s/%s_best.pth' % (args.outdir, 'model')) if l_val_meta_model < l_val_meta_model_best: print("new best meta-model") l_val_meta_model_best = l_val_meta_model torch.save(meta_model.state_dict(), '%s/%s_best.pth' % (args.outdir, 'meta_model')) #print("Epoch: {}, final loss {}, average final/initial loss ratio: {}".format(epoch, final_loss / args.updates_per_epoch, decrease_in_loss / args.updates_per_epoch)) print '\nValidation Loss Model: ' + str(np.mean(loss_val_model)) print '\nValidation Loss Meta: ' + str(np.mean(loss_val_meta)) print '\nValidation Accuracy: ' + str( float(correct) / (correct + incorrect)) print '\nTraining Loss Model: ' + str(np.mean(loss_train_model)) print '\nTraining Loss Meta: ' + str(np.mean(loss_train_meta))
def main(): # Create a meta optimizer that wraps a model into a meta model # to keep track of the meta updates. meta_model = Model() if args.cuda: meta_model.cuda() if args.RNN == 'Fast': meta_optimizer = FastMetaOptimizer(MetaModel(meta_model), args.num_layers, args.hidden_size) elif args.RNN == 'LSTM': meta_optimizer = MetaOptimizerLSTM(MetaModel(meta_model), args.num_layers, args.hidden_size) elif args.RNN == 'GRU': meta_optimizer = MetaOptimizerGRU(MetaModel(meta_model), args.num_layers, args.hidden_size) elif args.RNN == 'RNN': meta_optimizer = MetaOptimizerRNN(MetaModel(meta_model), args.num_layers, args.hidden_size) optimizer = optim.Adam(meta_optimizer.parameters(), lr=1e-3) if args.cuda: meta_optimizer.cuda() meta_optimizer.load_state_dict(torch.load('%s/%s_best.pth'%(args.outdir,'meta_optimizer'))) #optimizer = optim.Adam(model.parameters(), lr=1e-3) l_val_model_best = 99999 l_val_meta_model_best = 99999 loss_epoch = [] accuracy_epoch = [] for epoch in range(args.max_epoch): print("Epoch %s\n" % epoch) decrease_in_loss = 0.0 final_loss = 0.0 train_iter = iter(train_loader) loss_train_model = [] loss_train_meta = [] loss_val_model = [] loss_val_meta = [] correct = 0 incorrect = 0 updates = args.updates_per_epoch for i in tqdm(range(updates)): # Sample a new model model = Model() if args.cuda: model.cuda() x, y = next(train_iter) if args.cuda: x, y = x.cuda(), y.cuda() x, y = Variable(x), Variable(y) # Compute initial loss of the model f_x = model(x) initial_loss = F.nll_loss(f_x, y) for k in range(args.optimizer_steps // args.truncated_bptt_step): # Keep states for truncated BPTT meta_optimizer.reset_lstm( keep_states=k > 0, model=model, use_cuda=args.cuda) loss_sum = 0 prev_loss = torch.zeros(1) if args.cuda: prev_loss = prev_loss.cuda() for j in range(args.truncated_bptt_step): x, y = next(train_iter) if args.cuda: x, y = x.cuda(), y.cuda() x, y = Variable(x), Variable(y) # First we need to compute the gradients of the model f_x = model(x) loss = F.nll_loss(f_x, y) loss_train_model.append(loss.item()) model.zero_grad() loss.backward() # Perfom a meta update using gradients from model # and return the current meta model saved in the optimizer meta_model = meta_optimizer.meta_update(model, loss.data) # Compute a loss for a step the meta optimizer f_x = meta_model(x) loss = F.nll_loss(f_x, y) loss_sum += (loss - Variable(prev_loss)) prev_loss = loss.data # Update the parameters of the meta optimizer loss_train_meta.append(loss_sum.item()) loss_sum.backward() #for i in tqdm(range(int((1-args.train_split) * len(train_loader)))): for i in tqdm(range(int(len(train_iter) - args.updates_per_epoch*(1+args.optimizer_steps)))): x, y = next(train_iter) if args.cuda: x, y = x.cuda(), y.cuda() x, y = Variable(x), Variable(y) #meta_optimizer.reset_lstm( # keep_states=k > 0, model=model, use_cuda=args.cuda) # Compute initial loss of the model f_x = meta_model(x) for output, index in zip(f_x.cpu().detach().numpy(), range(len(f_x.cpu().detach().numpy()))): if y[index] == output.argmax(): correct += 1 else: incorrect += 1 loss_model = F.nll_loss(f_x, y) loss_val_model.append(loss_model.item()) #meta_model = meta_optimizer.meta_update(model, loss.data) # Compute a loss for a step the meta optimizer #f_x = meta_model(x) #loss_meta = F.nll_loss(f_x, y) #loss_val_meta.append(loss_meta.item()) l_val_model = np.mean(loss_val_model) #l_val_meta_model = np.mean(loss_val_meta) loss_epoch.append(l_val_model) accuracy_epoch.append(float(correct) / (correct + incorrect)) torch.save(meta_model.state_dict(), '%s/%s_last.pth'%(args.outdir,'meta_model_test')) if l_val_model < l_val_model_best: print("new best model") l_val_model_best = l_val_model torch.save(model.state_dict(), '%s/%s_best.pth'%(args.outdir,'meta_model_test')) print '\nValidation Loss Model: '+ str(l_val_model) print '\nValidation Accuracy: ' + str(float(correct) / (correct + incorrect)) np.save('%s/loss_epoch.npy'%(args.outdir), np.array(loss_epoch)) np.save('%s/accuracy_epoch.npy'%(args.outdir), np.array(accuracy_epoch))