예제 #1
0
def main():
    #torch.random.manual_seed(123)
    # Create a meta optimizer that wraps a model into a meta model
    # to keep track of the meta updates.
    meta_model = Model(**kwargs)
    if args.cuda:
        meta_model.cuda()
    #for module in meta_model.modules():
    #    print(module._parameters)
    #    print(list(module.children()))

    if args.lr_only:
        meta_optimizer = LearningRateOnlyMetaOptimizer(MetaModel(meta_model),
                                                       args.num_layers,
                                                       args.hidden_size)
    elif args.fast_meta_opt:
        meta_optimizer = FastMetaOptimizer(MetaModel(meta_model),
                                           args.num_layers, args.hidden_size)
    else:
        meta_optimizer = MetaOptimizer(MetaModel(meta_model), args.num_layers,
                                       args.hidden_size)
    if args.cuda:
        meta_optimizer.cuda()

    optimizer = optim.Adam(meta_optimizer.parameters())

    alpha = 0.999

    d = 1

    start_time = time()

    for epoch in range(args.max_epoch):
        decrease_in_loss = 0.0
        final_loss = 0.0
        train_iter = iter(train_loader)
        for i in range(args.updates_per_epoch):

            try:
                x, y = next(train_iter)
            except StopIteration:
                train_iter = iter(train_loader)
                x, y = next(train_iter)

            if args.cuda:
                x, y = x.cuda(), y.cuda()
            x, y = Variable(x), Variable(y)

            # Sample a new model
            model = Model(**kwargs)
            if args.cuda:
                model.cuda()

            if args.replay_trajectory:
                backup_model = Model(**kwargs)
                if args.cuda:
                    backup_model.cuda()

            # Compute initial loss of the model
            f_x = model(x)
            initial_loss = F.nll_loss(f_x, y)

            av_loss = 0.

            for k in range(args.optimizer_steps // args.truncated_bptt_step):
                # Keep states for truncated BPTT
                meta_optimizer.reset_lstm(keep_states=k > 0,
                                          model=model,
                                          use_cuda=args.cuda)

                if args.replay_trajectory:
                    #meta_optimizer.backup_model_params()
                    copy_params(source=model, dest=backup_model)

                loss_sum = 0
                prev_loss = torch.zeros(1)
                if args.cuda:
                    prev_loss = prev_loss.cuda()
                for j in range(args.truncated_bptt_step):
                    try:
                        x, y = next(train_iter)
                    except StopIteration:
                        train_iter = iter(train_loader)
                        x, y = next(train_iter)

                    if args.cuda:
                        x, y = x.cuda(), y.cuda()
                    x, y = Variable(x), Variable(y)

                    # First we need to compute the gradients of the model
                    f_x = model(x)
                    loss = F.nll_loss(f_x, y)
                    model.zero_grad()
                    loss.backward()

                    if not args.replay_trajectory:
                        av_loss = alpha * av_loss + (1 - alpha) * loss.data

                    # Perfom a meta update using gradients from model
                    # and return the current meta model saved in the optimizer
                    meta_model = meta_optimizer.meta_update(model, loss.data)

                    # Compute a loss for a step the meta optimizer
                    f_x = meta_model(x)
                    loss = F.nll_loss(f_x, y)

                    loss_sum += (loss - Variable(prev_loss))

                    prev_loss = loss.data

                # Update the parameters of the meta optimizer
                meta_optimizer.zero_grad()
                #loss_sum.backward()
                loss.backward()
                for param in meta_optimizer.parameters():
                    param.grad.data.clamp_(-1, 1)
                optimizer.step()

                if args.replay_trajectory:
                    meta_optimizer.reset_lstm(keep_states=k > 0,
                                              model=backup_model,
                                              use_cuda=args.cuda)
                    copy_params(source=backup_model, dest=model)

                    for j in range(args.truncated_bptt_step):
                        try:
                            x, y = next(train_iter)
                        except StopIteration:
                            train_iter = iter(train_loader)
                            x, y = next(train_iter)

                        if args.cuda:
                            x, y = x.cuda(), y.cuda()
                        x, y = Variable(x), Variable(y)

                        # First we need to compute the gradients of the model
                        f_x = model(x)
                        loss = F.nll_loss(f_x, y)
                        model.zero_grad()
                        loss.backward()

                        # Perfom a meta update using gradients from model
                        # and return the current meta model saved in the optimizer
                        meta_model = meta_optimizer.meta_update(
                            model, loss.data)

                        av_loss = alpha * av_loss + (1 - alpha) * loss.data

                if (k * args.truncated_bptt_step) % args.print_pause == 0:
                    if args.lr_only:
                        meta_optimizer.learning_rate.clamp(min=1e-8)
                        print('av_loss = {:.3f}; lr = {:.4f}'.format(
                            av_loss[0], meta_optimizer.learning_rate.data[0]))
                    else:
                        print('av_loss = {:.3f}'.format(av_loss[0]))
                if av_loss[0] < 0.1**d:
                    print('model reached loss < 1e-{} in {} steps ({:.1f}s)'.
                          format(d, k * args.truncated_bptt_step,
                                 time() - start_time))
                    if d >= 3:
                        break
                    d += 1

            # Compute relative decrease in the loss function w.r.t initial
            # value
            decrease_in_loss += loss.data[0] / initial_loss.data[0]
            final_loss += loss.data[0]

        print("Epoch: {}, final loss {}, average final/initial loss ratio: {}".
              format(epoch, final_loss / args.updates_per_epoch,
                     decrease_in_loss / args.updates_per_epoch))
예제 #2
0
def main():
    # Create a meta optimizer that wraps a model into a meta model
    # to keep track of the meta updates.
    meta_model = Model()
    if args.cuda:
        meta_model.cuda()

    meta_optimizer = FastMetaOptimizer(MetaModel(meta_model), args.num_layers,
                                       args.hidden_size)
    if args.cuda:
        meta_optimizer.cuda()

    optimizer = optim.Adam(meta_optimizer.parameters(), lr=3e-3)

    for epoch in range(args.max_epoch):
        decrease_in_loss = 0.0
        final_loss = 0.0
        train_iter = iter(train_loader)
        for i in range(args.updates_per_epoch):

            # Sample a new model
            model = Model()
            if args.cuda:
                model.cuda()

            x, y = next(train_iter)
            if args.cuda:
                x, y = x.cuda(), y.cuda()
            x, y = Variable(x), Variable(y)

            # Compute initial loss of the model
            f_x = model(x)
            initial_loss = F.nll_loss(f_x, y)

            for k in range(args.optimizer_steps // args.truncated_bptt_step):
                # Keep states for truncated BPTT
                meta_optimizer.reset_lstm(keep_states=k > 0,
                                          model=model,
                                          use_cuda=args.cuda)

                loss_sum = 0
                prev_loss = torch.zeros(1)
                if args.cuda:
                    prev_loss = prev_loss.cuda()
                for j in range(args.truncated_bptt_step):
                    x, y = next(train_iter)
                    if args.cuda:
                        x, y = x.cuda(), y.cuda()
                    x, y = Variable(x), Variable(y)

                    # First we need to compute the gradients of the model
                    f_x = model(x)
                    loss = F.nll_loss(f_x, y)
                    model.zero_grad()
                    loss.backward()

                    # Perfom a meta update using gradients from model
                    # and return the current meta model saved in the optimizer
                    meta_model = meta_optimizer.meta_update(model, loss.data)

                    # Compute a loss for a step the meta optimizer
                    f_x = meta_model(x)
                    loss = F.nll_loss(f_x, y)

                    loss_sum += (loss - Variable(prev_loss))

                    prev_loss = loss.data

                # Update the parameters of the meta optimizer
                meta_optimizer.zero_grad()
                loss_sum.backward()
                for param in meta_optimizer.parameters():
                    param.grad.data.clamp_(-1, 1)
                optimizer.step()

            # Compute relative decrease in the loss function w.r.t initial
            # value
            decrease_in_loss += loss.data / initial_loss.data
            final_loss += loss.data

        print("Epoch: {}, final loss {}, average final/initial loss ratio: {}".
              format(epoch, final_loss / args.updates_per_epoch,
                     decrease_in_loss / args.updates_per_epoch))
예제 #3
0
def main():
    # Create a meta optimizer that wraps a model into a meta model
    # to keep track of the meta updates.

    meta_model = Model()
    if args.cuda:
        meta_model.cuda()
    meta_model.apply(weights_init)

    if args.RNN == 'Fast':
        meta_optimizer = FastMetaOptimizer(MetaModel(meta_model),
                                           args.num_layers, args.hidden_size)
    elif args.RNN == 'LSTM':
        meta_optimizer = MetaOptimizerLSTM(MetaModel(meta_model),
                                           args.num_layers, args.hidden_size)
    elif args.RNN == 'GRU':
        meta_optimizer = MetaOptimizerGRU(MetaModel(meta_model),
                                          args.num_layers, args.hidden_size)
    elif args.RNN == 'RNN':
        meta_optimizer = MetaOptimizerRNN(MetaModel(meta_model),
                                          args.num_layers, args.hidden_size)
    optimizer = optim.Adam(meta_optimizer.parameters(), lr=1e-3)
    if args.cuda:
        meta_optimizer.cuda()
    meta_optimizer.load_state_dict(
        torch.load('%s/%s_best.pth' % (args.outdir, 'meta_optimizer')))

    l_val_model_best = 99999
    l_val_meta_model_best = 99999
    accuracy_model = []
    loss_model = []

    models_tested = 50
    train_iter = iter(train_loader)
    epoch_loss = [[] for i in range(
        int(len(train_iter) * args.train_split // args.truncated_bptt_step))]

    model = Model()
    if args.cuda:
        model.cuda()
    model.apply(weights_init)
    for epoch in tqdm(range(models_tested)):
        train_iter = iter(train_loader)
        loss_train_model = []
        loss_train_meta = []
        loss_val_model = []
        loss_val_meta = []
        correct = 0
        incorrect = 0

        #model = Model()
        #if args.cuda:
        #    model.cuda()
        #model.apply(weights_init)
        for k in range(
                int(
                    len(train_iter) * args.train_split //
                    (args.truncated_bptt_step * 2))):
            # Keep states for truncated BPTT
            meta_optimizer.reset_lstm(keep_states=k > 0,
                                      model=model,
                                      use_cuda=args.cuda)

            loss_sum = 0
            prev_loss = torch.zeros(1)
            if args.cuda:
                prev_loss = prev_loss.cuda()
            for j in range(args.truncated_bptt_step * 2):
                x, y = next(train_iter)
                if args.cuda:
                    x, y = x.cuda(), y.cuda()
                x, y = Variable(x), Variable(y)

                # First we need to compute the gradients of the model
                f_x = model(x)
                loss = F.nll_loss(f_x, y)
                model.zero_grad()
                loss.backward()
                meta_model = meta_optimizer.meta_update(model, loss.data)
            epoch_loss[k].append(loss.item())

            # Compute a loss for a step the meta optimizer
        for k in range(int(len(train_iter) * (1 - args.train_split))):
            x, y = next(train_iter)
            if args.cuda:
                x, y = x.cuda(), y.cuda()
            x, y = Variable(x), Variable(y)
            f_x = meta_model(x)
            for output, index in zip(f_x.cpu().detach().numpy(),
                                     range(len(f_x.cpu().detach().numpy()))):
                if y[index] == output.argmax():
                    correct += 1
                else:
                    incorrect += 1
            loss = F.nll_loss(f_x, y)
            loss_val_model.append(loss.item())

        l_val_model = np.mean(loss_val_model)
        loss_model.append(l_val_model)
        accuracy_model.append(float(correct) / (correct + incorrect))
        print(float(correct) / (correct + incorrect))

    print '\nValidation Loss Model: ' + str(np.mean(loss_model))
    print '\nValidation Accuracy: ' + str(np.mean(accuracy_model))

    [np.mean(i) for i in epoch_loss]
    np.save('%s/loss_epoch_test.npy' % (args.outdir),
            [np.mean(i) for i in epoch_loss])
예제 #4
0
def main():
    # Create a meta optimizer that wraps a model into a meta model
    # to keep track of the meta updates.
    meta_model = Model()
    if args.cuda:
        meta_model.cuda()
    meta_model.apply(weights_init)
    
    if args.RNN == 'Fast':
        meta_optimizer = FastMetaOptimizer(MetaModel(meta_model), args.num_layers, args.hidden_size)
    elif args.RNN == 'LSTM':
        meta_optimizer = MetaOptimizerLSTM(MetaModel(meta_model), args.num_layers, args.hidden_size)
    elif args.RNN == 'GRU':
        meta_optimizer = MetaOptimizerGRU(MetaModel(meta_model), args.num_layers, args.hidden_size)
    elif args.RNN == 'RNN':
        meta_optimizer = MetaOptimizerRNN(MetaModel(meta_model), args.num_layers, args.hidden_size)
    else: 
        raise NameError('not valid RNN')
    if args.cuda:
        meta_optimizer.cuda()

    optimizer = optim.Adam(meta_optimizer.parameters(), lr=1e-3)

    l_val_model_best = 99999
    l_val_meta_model_best = 999999
    acc_val_best = 0
    loss_epoch_val = []
    accuracy_epoch_val = []
    loss_epoch_optimizer_train = []
    
    
    for epoch in range(args.max_epoch):
        print("Epoch %s\n" % epoch)
        decrease_in_loss = 0.0
        final_loss = 0.0
        train_iter = iter(train_loader)

        loss_train_model = []
        loss_train_meta = []
        loss_val_model = []
        loss_val_optimizer = []
        correct = 0 
        incorrect = 0 
        
        updates = args.updates_per_epoch

        for i in tqdm(range(updates)):
            
            #Sample a new model
            model = Model()
            if args.cuda:
                model.cuda()
            meta_model.apply(weights_init)
            #model_optimizer = optim.Adam(model.parameters(), lr=0.01)
            
            x, y = next(train_iter)
            if args.cuda:
                x, y = x.cuda(), y.cuda()
            x, y = Variable(x), Variable(y)
            
            
            # Compute initial loss of the model
            f_x = model(x)
            initial_loss = F.nll_loss(f_x, y)

             
            for k in range(args.optimizer_steps // args.truncated_bptt_step):
                # Keep states for truncated BPTT
                meta_optimizer.reset_lstm(
                    keep_states=k > 0, model=model, use_cuda=args.cuda)

                loss_sum = 0
                prev_loss = torch.zeros(1)
                if args.cuda:
                    prev_loss = prev_loss.cuda()
                for j in range(args.truncated_bptt_step):
                    x, y = next(train_iter)
                    if args.cuda:
                        x, y = x.cuda(), y.cuda()
                    x, y = Variable(x), Variable(y)

                    #Training cycle for optimizer raining
                    # First we need to compute the gradients of the model
                    f_x = model(x)
                    loss = F.nll_loss(f_x, y)
                    loss_train_model.append(loss.item())
                    model.zero_grad()
                    loss.backward()
                        
                    # Perfom a meta update using gradients from model
                    # and return the current meta model saved in the optimizer
                    meta_model = meta_optimizer.meta_update(model, loss.data)

                    # Compute a loss for a step the meta optimizer
                    f_x = meta_model(x)
                    loss = F.nll_loss(f_x, y)
                    loss_sum += (loss - Variable(prev_loss))
                    prev_loss = loss.data
                        
                # Update the parameters of the meta optimizer
                meta_optimizer.zero_grad()
                loss_train_meta.append(loss_sum.item())
                loss_sum.backward()
                      
                for param in meta_optimizer.parameters():
                    param.grad.data.clamp_(-1, 1)
                optimizer.step()
            
            # Compute relative decrease in the loss function w.r.t initial
            # value
            decrease_in_loss += loss.item() / initial_loss.item()
            final_loss += loss.item()

        for i in tqdm(range(updates - 6)):
            
            #Sample a new model
            model = Model()
            if args.cuda:
                model.cuda()
            meta_model.apply(weights_init)

            for k in range(args.optimizer_steps // args.truncated_bptt_step):
                    # Keep states for truncated BPTT
                meta_optimizer.reset_lstm(
                        keep_states=k > 0, model=model, use_cuda=args.cuda)

                loss_sum = 0
                prev_loss = torch.zeros(1)
                if args.cuda:
                    prev_loss = prev_loss.cuda()
                for j in range(args.truncated_bptt_step):
                    x, y = next(train_iter)
                    if args.cuda:
                        x, y = x.cuda(), y.cuda()
                    x, y = Variable(x), Variable(y)
                        
                                # First we need to compute the gradients of the model
                    f_x = model(x)
                    loss = F.nll_loss(f_x, y)
                    model.zero_grad()
                    loss.backward()
                    meta_model = meta_optimizer.meta_update(model, loss.data)
                        
                        
                    f_x = meta_model(x)
                    loss = F.nll_loss(f_x, y)
                    loss_sum += (loss - Variable(prev_loss))

                    prev_loss = loss.data
                    loss_val_optimizer.append(loss_sum.item())

                                # Compute a loss for a step the meta optimizer
            for datum in range(350/(updates - 6)):
                x, y = next(train_iter)
                if args.cuda:
                    x, y = x.cuda(), y.cuda()
                x, y = Variable(x), Variable(y)
                f_x = meta_model(x)
                for output, index in zip(f_x.cpu().detach().numpy(), range(len(f_x.cpu().detach().numpy()))):
                    if y[index] == output.argmax():
                        correct += 1
                    else: 
                        incorrect += 1
                loss = F.nll_loss(f_x, y)
                loss_val_model.append(loss.item())
            
        
        l_val_model = np.mean(loss_val_model)
        #l_val_meta_model = np.mean(loss_val_meta)
        
        loss_epoch_val.append(l_val_model)
        accuracy_epoch_val.append(float(correct) / (correct + incorrect))
        loss_epoch_optimizer_train.append(np.mean(loss_val_optimizer))
        
        
        torch.save(meta_model.state_dict(), '%s/%s_last.pth'%(args.outdir,'meta_model'))
        torch.save(meta_optimizer.state_dict(), '%s/%s_last.pth'%(args.outdir,'meta_optimizer'))
        if l_val_model < l_val_model_best:
            print("new best model")
            l_val_model_best = l_val_model
            torch.save(model.state_dict(), '%s/%s_best.pth'%(args.outdir,'meta_model'))
            torch.save(meta_optimizer.state_dict(), '%s/%s_best.pth'%(args.outdir,'meta_optimizer'))
        if epoch % 100 == 0: 
            torch.save(meta_optimizer.state_dict(), '%s/%s_%sepoch.pth'%(args.outdir,'meta_optimizer', epoch))

        print("Epoch: {}, final loss {}, average final/initial loss ratio: {}".format(epoch, final_loss / args.updates_per_epoch, decrease_in_loss / args.updates_per_epoch))
        print '\nValidation Loss Model: '+ str(np.mean(loss_val_model))     
        #print '\nValidation Loss Meta: '+ str(np.mean(loss_val_meta))
        print '\nValidation Accuracy: ' + str(float(correct) / (correct + incorrect))
        print '\nTraining Loss Model: '+ str(np.mean(loss_train_model))
        #print '\nTraining Loss Meta: '+ str(np.mean(loss_train_meta))
        np.save('%s/loss_epoch_val.npy'%(args.outdir), np.array(loss_epoch_val))
        np.save('%s/accuracy_epoch_val.npy'%(args.outdir), np.array(accuracy_epoch_val))
        np.save('%s/loss_epoch_optimizer_val.npy'%(args.outdir), np.array(loss_epoch_optimizer_train))  
예제 #5
0
def main():
    kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
    train_loader = torch.utils.data.DataLoader(
        datasets.MNIST('../data', train=True, download=True,
                       transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=args.batch_size, shuffle=True, **kwargs)
    test_loader = torch.utils.data.DataLoader(
        datasets.MNIST('../data', train=False, transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=args.batch_size, shuffle=True, **kwargs)

    # Create a meta optimizer that wraps a model into a meta model
    # to keep track of the meta updates.
    meta_model = Model2()
    if args.cuda:
        meta_model.cuda()

    meta_optimizer = FastMetaOptimizer(MetaModel(meta_model), args.num_layers, args.hidden_size)
    if args.cuda:
        meta_optimizer.cuda()

    print meta_optimizer
    
    optimizer = optim.Adam(meta_optimizer.parameters(), lr=1e-3)

    for epoch in range(args.max_epoch):
        decrease_in_loss = 0.0
        final_loss = 0.0
        train_iter = iter(train_loader)
        for i in range(args.updates_per_epoch):

            # Sample a new model
            model = Model2()
            if args.cuda:
                model.cuda()

            x, y = next(train_iter)
            if args.cuda:
                x, y = x.cuda(), y.cuda()
            x, y = Variable(x), Variable(y)

            # Compute initial loss of the model
            f_x = model(x)
            initial_loss = F.nll_loss(f_x, y)

            for k in range(args.optimizer_steps // args.truncated_bptt_step):
                # Keep states for truncated BPTT
                meta_optimizer.reset_lstm(
                    keep_states=k > 0, model=model, use_cuda=args.cuda)

                loss_sum = 0
                prev_loss = torch.zeros(1)
                if args.cuda:
                    prev_loss = prev_loss.cuda()
                for j in range(args.truncated_bptt_step):
                    x, y = next(train_iter)
                    if args.cuda:
                        x, y = x.cuda(), y.cuda()
                    x, y = Variable(x), Variable(y)

                    # First we need to compute the gradients of the model
                    f_x = model(x)
                    loss = F.nll_loss(f_x, y)
                    acc = (f_x.max(1)[1] == y).type(torch.FloatTensor).mean()
                    model.zero_grad()
                    loss.backward()

                    # Perfom a meta update using gradients from model
                    # and return the current meta model saved in the optimizer
                    meta_model = meta_optimizer.meta_update(model, loss.data)

                    # Compute a loss for a step the meta optimizer
                    f_x = meta_model(x)
                    loss = F.nll_loss(f_x, y)

                    loss_sum += (loss - Variable(prev_loss))

                    prev_loss = loss.data

                # Update the parameters of the meta optimizer
                meta_optimizer.zero_grad()
                loss_sum.backward()
                for param in meta_optimizer.parameters():
                    param.grad.data.clamp_(-1, 1)
                optimizer.step()

            # Compute relative decrease in the loss function w.r.t initial
            # value
            decrease_in_loss += loss.data[0] / initial_loss.data[0]
            final_loss += loss.data[0]

        print("Epoch: {}, final loss {}, average final/initial loss ratio: {}, params: {}, acc: {}".format(epoch, final_loss / args.updates_per_epoch,
                                                                       decrease_in_loss / args.updates_per_epoch, [meta_optimizer.f, meta_optimizer.i], acc))
예제 #6
0
def main_meta_lstm():
    TEXT = data.Field(sequential=True, include_lengths=True)
    LABEL = data.Field(sequential=False)
    train, val, test = datasets.SNLI.splits(TEXT, LABEL)
    TEXT.build_vocab(train, vectors="glove.840B.300d")
    LABEL.build_vocab(train)
    vocab = TEXT.vocab
    train_iter, val_iter, test_iter = data.Iterator.splits(
        (train, val, test), 
        batch_size=50,
        repeat=False, 
        shuffle=False)
    config = Config()
    
    criterion = nn.CrossEntropyLoss()

    # Create a meta optimizer that wraps a model into a meta model
    # to keep track of the meta updates.
    meta_model = Model(vocab, config)
    if args.cuda:
        meta_model.cuda()

    meta_optimizer = FastMetaOptimizer(MetaModel(meta_model), args.num_layers, args.hidden_size)
    if args.cuda:
        meta_optimizer.cuda()

    optimizer = optim.Adam(meta_optimizer.parameters(), lr=1e-3)


    for i in range(args.max_epoch):

        # Sample a new model
        model = Model(vocab, config)
        if args.cuda:
            model.cuda()

        train_acc = 0.0
        train_cnt = 0
        for k in range(args.optimizer_steps):
            # Keep states for truncated BPTT
            meta_optimizer.reset_lstm(
                keep_states=k > 0, model=model, use_cuda=args.cuda)

            loss_sum = 0
            prev_loss = torch.zeros(1)
            if args.cuda:
                prev_loss = prev_loss.cuda()
            for j in range(args.truncated_bptt_step):
                batch = next(iter(train_iter))
                x, y = batch, batch.label - 1

                # First we need to compute the gradients of the model
                f_x = model(x)
                acc = (f_x.max(1)[1] == y).type(torch.FloatTensor).mean().float()
                train_acc += acc
                train_cnt += 1
                loss = criterion(f_x, y)
                model.zero_grad()
                loss.backward()

                # Perfom a meta update using gradients from model
                # and return the current meta model saved in the optimizer
                meta_model = meta_optimizer.meta_update(model, loss.data)

                # Compute a loss for a step the meta optimizer
                f_x = meta_model(x)
                loss = criterion(f_x, y)

                loss_sum += (loss - Variable(prev_loss))

                prev_loss = loss.data

            # Update the parameters of the meta optimizer
            meta_optimizer.zero_grad()
            loss_sum.backward()
            for param in meta_optimizer.parameters():
                param.grad.data.clamp_(-1, 1)
            optimizer.step()

            print 'i = {}, k = {}, acc = {}, loss = {}'.format(i, k, acc, loss.float())

        test_acc = 0.0
        test_cnt = 0
        for batch in test_iter:
            x, y = batch, batch.label - 1
            f_x = model(x)
            test_acc += (f_x.max(1)[1] == y).type(torch.FloatTensor).mean().float()
            test_cnt += 1
        print 'epoch = {}, train_acc = {}, test_acc = {}'.format(i, train_acc / train_cnt, test_acc / test_cnt)
예제 #7
0
def main2():
    TEXT = data.Field(sequential=True, include_lengths=True)
    LABEL = data.Field(sequential=False)
    train, val, test = datasets.SNLI.splits(TEXT, LABEL)
    TEXT.build_vocab(train, vectors="glove.840B.300d")
    LABEL.build_vocab(train)
    vocab = TEXT.vocab
    train_iter, val_iter, test_iter = data.Iterator.splits(
        (train, val, test), 
        batch_size=50,
        repeat=False)
    config = Config()
    
    criterion = nn.CrossEntropyLoss()

    # Create a meta optimizer that wraps a model into a meta model
    # to keep track of the meta updates.
    meta_model = CNNModel(vocab, config)
    if args.cuda:
        meta_model.cuda()

    meta_optimizer = FastMetaOptimizer(MetaModel(meta_model), args.num_layers, args.hidden_size)
    if args.cuda:
        meta_optimizer.cuda()

    optimizer = optim.Adam(meta_optimizer.parameters(), lr=1e-3)

    for epoch in range(args.max_epoch):
        decrease_in_loss = 0.0
        final_loss = 0.0
        for i in range(args.updates_per_epoch):

            # Sample a new model
            model = CNNModel(vocab, config)
            if args.cuda:
                model.cuda()

            batch = next(iter(train_iter))
            x, y = batch, batch.label - 1

            # Compute initial loss of the model
            f_x = model(x)
            initial_loss = criterion(f_x, y)

            for k in range(args.optimizer_steps // args.truncated_bptt_step):
                # Keep states for truncated BPTT
                meta_optimizer.reset_lstm(
                    keep_states=k > 0, model=model, use_cuda=args.cuda)

                loss_sum = 0
                prev_loss = torch.zeros(1)
                if args.cuda:
                    prev_loss = prev_loss.cuda()
                for j in range(args.truncated_bptt_step):
                    batch = next(iter(train_iter))
                    x, y = batch, batch.label - 1

                    # First we need to compute the gradients of the model
                    f_x = model(x)
                    acc = (f_x.max(1)[1] == y).type(torch.FloatTensor).mean()
                    loss = criterion(f_x, y)
                    model.zero_grad()
                    loss.backward()

                    # Perfom a meta update using gradients from model
                    # and return the current meta model saved in the optimizer
                    meta_model = meta_optimizer.meta_update(model, loss.data)

                    # Compute a loss for a step the meta optimizer
                    f_x = meta_model(x)
                    loss = criterion(f_x, y)

                    loss_sum += (loss - Variable(prev_loss))

                    prev_loss = loss.data

                # Update the parameters of the meta optimizer
                meta_optimizer.zero_grad()
                loss_sum.backward()
                for param in meta_optimizer.parameters():
                    param.grad.data.clamp_(-1, 1)
                optimizer.step()

                print 'acc=', acc
                print 'loss=', loss
                print 'para=', [meta_optimizer.f, meta_optimizer.i]
            # Compute relative decrease in the loss function w.r.t initial
            # value
            decrease_in_loss += loss.data[0] / initial_loss.data[0]
            final_loss += loss.data[0]

        print("Epoch: {}, final loss {}, average final/initial loss ratio: {}, params: {}".format(epoch, final_loss / args.updates_per_epoch,
                                                                       decrease_in_loss / args.updates_per_epoch, [meta_optimizer.f, meta_optimizer.i]))
예제 #8
0
def main():
    # Create a meta optimizer that wraps a model into a meta model
    # to keep track of the meta updates.
    meta_model = Model()
    if args.cuda:
        meta_model.cuda()

    meta_optimizer = FastMetaOptimizer(MetaModel(meta_model), args.num_layers,
                                       args.hidden_size)
    if args.cuda:
        meta_optimizer.cuda()

    optimizer = optim.Adam(meta_optimizer.parameters(), lr=1e-3)

    l_val_model_best = 99999
    l_val_meta_model_best = 99999

    for epoch in range(args.max_epoch):
        print("Epoch %s\n" % epoch)
        decrease_in_loss = 0.0
        final_loss = 0.0
        train_iter = iter(train_loader)

        loss_train_model = []
        loss_train_meta = []
        loss_val_model = []
        loss_val_meta = []
        correct = 0
        incorrect = 0

        #for i in tqdm(range(args.updates_per_epoch)):

        #updates = int(float(args.train_split) * len(train_loader) /(((args.optimizer_steps // args.truncated_bptt_step) * args.truncated_bptt_step) + 1))
        updates = int(float(args.train_split) * len(train_loader))
        for i in tqdm(range(updates)):

            # Sample a new model
            model = Model()
            if args.cuda:
                model.cuda()

            x, y = next(train_iter)
            if args.cuda:
                x, y = x.cuda(), y.cuda()
            x, y = Variable(x), Variable(y)

            # Compute initial loss of the model
            f_x = model(x)
            initial_loss = F.nll_loss(f_x, y)

            for k in range(args.optimizer_steps // args.truncated_bptt_step):
                # Keep states for truncated BPTT
                meta_optimizer.reset_lstm(keep_states=k > 0,
                                          model=model,
                                          use_cuda=args.cuda)

                loss_sum = 0
                prev_loss = torch.zeros(1)
                if args.cuda:
                    prev_loss = prev_loss.cuda()
                for j in range(args.truncated_bptt_step):
                    #x, y = next(train_iter)
                    if args.cuda:
                        x, y = x.cuda(), y.cuda()
                    x, y = Variable(x), Variable(y)

                    # First we need to compute the gradients of the model
                    f_x = model(x)
                    loss = F.nll_loss(f_x, y)
                    loss_train_model.append(loss.item())
                    model.zero_grad()
                    loss.backward()

                    # Perfom a meta update using gradients from model
                    # and return the current meta model saved in the optimizer
                    meta_model = meta_optimizer.meta_update(model, loss.data)

                    # Compute a loss for a step the meta optimizer
                    f_x = meta_model(x)
                    loss = F.nll_loss(f_x, y)
                    loss_sum += (loss - Variable(prev_loss))

                    prev_loss = loss.data

                # Update the parameters of the meta optimizer
                meta_optimizer.zero_grad()
                loss_train_meta.append(loss_sum.item())
                loss_sum.backward()

                for param in meta_optimizer.parameters():
                    param.grad.data.clamp_(-1, 1)
                optimizer.step()

            # Compute relative decrease in the loss function w.r.t initial
            # value
            decrease_in_loss += loss.item() / initial_loss.item()
            final_loss += loss.item()

        for i in tqdm(range(int((1 - args.train_split) * len(train_loader)))):
            x, y = next(train_iter)
            if args.cuda:
                x, y = x.cuda(), y.cuda()
            x, y = Variable(x), Variable(y)

            # Compute initial loss of the model
            f_x = model(x)

            for output, index in zip(f_x.cpu().detach().numpy(),
                                     range(len(f_x.cpu().detach().numpy()))):
                if y[index] == output.argmax():
                    correct += 1
                else:
                    incorrect += 1

            loss_model = F.nll_loss(f_x, y)
            loss_val_model.append(loss_model.item())

            meta_model = meta_optimizer.meta_update(model, loss.data)

            # Compute a loss for a step the meta optimizer
            f_x = meta_model(x)
            loss_meta = F.nll_loss(f_x, y)
            loss_val_meta.append(loss_meta.item())

        torch.save(model.state_dict(),
                   '%s/%s_last.pth' % (args.outdir, 'model'))
        torch.save(meta_model.state_dict(),
                   '%s/%s_last.pth' % (args.outdir, 'meta_model'))

        l_val_model = np.mean(loss_val_model)
        l_val_meta_model = np.mean(loss_val_meta)
        if l_val_model < l_val_model_best:
            print("new best model")
            l_val_model_best = l_val_model
            torch.save(model.state_dict(),
                       '%s/%s_best.pth' % (args.outdir, 'model'))

        if l_val_meta_model < l_val_meta_model_best:
            print("new best meta-model")
            l_val_meta_model_best = l_val_meta_model
            torch.save(meta_model.state_dict(),
                       '%s/%s_best.pth' % (args.outdir, 'meta_model'))

        #print("Epoch: {}, final loss {}, average final/initial loss ratio: {}".format(epoch, final_loss / args.updates_per_epoch, decrease_in_loss / args.updates_per_epoch))
        print '\nValidation Loss Model: ' + str(np.mean(loss_val_model))
        print '\nValidation Loss Meta: ' + str(np.mean(loss_val_meta))
        print '\nValidation Accuracy: ' + str(
            float(correct) / (correct + incorrect))
        print '\nTraining Loss Model: ' + str(np.mean(loss_train_model))
        print '\nTraining Loss Meta: ' + str(np.mean(loss_train_meta))
def main():
    # Create a meta optimizer that wraps a model into a meta model
    # to keep track of the meta updates.
    meta_model = Model()
    if args.cuda:
        meta_model.cuda()
    
    if args.RNN == 'Fast':
        meta_optimizer = FastMetaOptimizer(MetaModel(meta_model), args.num_layers, args.hidden_size)
    elif args.RNN == 'LSTM':
        meta_optimizer = MetaOptimizerLSTM(MetaModel(meta_model), args.num_layers, args.hidden_size)
    elif args.RNN == 'GRU':
        meta_optimizer = MetaOptimizerGRU(MetaModel(meta_model), args.num_layers, args.hidden_size)
    elif args.RNN == 'RNN':
        meta_optimizer = MetaOptimizerRNN(MetaModel(meta_model), args.num_layers, args.hidden_size)
    optimizer = optim.Adam(meta_optimizer.parameters(), lr=1e-3)    
    if args.cuda:
        meta_optimizer.cuda()
    meta_optimizer.load_state_dict(torch.load('%s/%s_best.pth'%(args.outdir,'meta_optimizer')))
    
    #optimizer = optim.Adam(model.parameters(), lr=1e-3)

    l_val_model_best = 99999
    l_val_meta_model_best = 99999
    loss_epoch = []
    accuracy_epoch = []
    
    for epoch in range(args.max_epoch):
        print("Epoch %s\n" % epoch)
        decrease_in_loss = 0.0
        final_loss = 0.0
        train_iter = iter(train_loader)

        loss_train_model = []
        loss_train_meta = []
        loss_val_model = []
        loss_val_meta = []
        correct = 0 
        incorrect = 0 
        
        updates = args.updates_per_epoch
        for i in tqdm(range(updates)):
            
            # Sample a new model
            model = Model()
            if args.cuda:
                model.cuda()
        
            x, y = next(train_iter)
            if args.cuda:
                x, y = x.cuda(), y.cuda()
            x, y = Variable(x), Variable(y)
            
            
            # Compute initial loss of the model
            f_x = model(x)
            initial_loss = F.nll_loss(f_x, y)

            
            for k in range(args.optimizer_steps // args.truncated_bptt_step):
                # Keep states for truncated BPTT
                meta_optimizer.reset_lstm(
                    keep_states=k > 0, model=model, use_cuda=args.cuda)

                loss_sum = 0
                prev_loss = torch.zeros(1)
                if args.cuda:
                    prev_loss = prev_loss.cuda()
                for j in range(args.truncated_bptt_step):
                    x, y = next(train_iter)
                    if args.cuda:
                        x, y = x.cuda(), y.cuda()
                    x, y = Variable(x), Variable(y)

                    # First we need to compute the gradients of the model
                    f_x = model(x)
                    loss = F.nll_loss(f_x, y)
                    loss_train_model.append(loss.item())
                    model.zero_grad()
                    loss.backward()

                    # Perfom a meta update using gradients from model
                    # and return the current meta model saved in the optimizer
                    meta_model = meta_optimizer.meta_update(model, loss.data)

                    # Compute a loss for a step the meta optimizer
                    f_x = meta_model(x)
                    loss = F.nll_loss(f_x, y)
                    loss_sum += (loss - Variable(prev_loss))

                    prev_loss = loss.data
                    
                # Update the parameters of the meta optimizer

                loss_train_meta.append(loss_sum.item())
                loss_sum.backward()



        #for i in tqdm(range(int((1-args.train_split) * len(train_loader)))):
        for i in tqdm(range(int(len(train_iter) - args.updates_per_epoch*(1+args.optimizer_steps)))):
            x, y = next(train_iter)
            if args.cuda:
                x, y = x.cuda(), y.cuda()
            x, y = Variable(x), Variable(y)
            
            
            #meta_optimizer.reset_lstm(
            #        keep_states=k > 0, model=model, use_cuda=args.cuda)
            
            # Compute initial loss of the model
            f_x = meta_model(x)
        
            for output, index in zip(f_x.cpu().detach().numpy(), range(len(f_x.cpu().detach().numpy()))):
                if y[index] == output.argmax():
                    correct += 1
                else: 
                    incorrect += 1
                    
            loss_model = F.nll_loss(f_x, y)
            loss_val_model.append(loss_model.item())
            
            
            #meta_model = meta_optimizer.meta_update(model, loss.data)

            # Compute a loss for a step the meta optimizer
            #f_x = meta_model(x)
            #loss_meta = F.nll_loss(f_x, y)
            
            #loss_val_meta.append(loss_meta.item())
        
        l_val_model = np.mean(loss_val_model)
        #l_val_meta_model = np.mean(loss_val_meta)
        loss_epoch.append(l_val_model)
        accuracy_epoch.append(float(correct) / (correct + incorrect))
        torch.save(meta_model.state_dict(), '%s/%s_last.pth'%(args.outdir,'meta_model_test'))

        if l_val_model < l_val_model_best:
            print("new best model")
            l_val_model_best = l_val_model
            torch.save(model.state_dict(), '%s/%s_best.pth'%(args.outdir,'meta_model_test'))
            
        print '\nValidation Loss Model: '+ str(l_val_model)     
        print '\nValidation Accuracy: ' + str(float(correct) / (correct + incorrect))
    np.save('%s/loss_epoch.npy'%(args.outdir), np.array(loss_epoch))
    np.save('%s/accuracy_epoch.npy'%(args.outdir), np.array(accuracy_epoch))