Ejemplo n.º 1
0
    train_loader = DataLoader(train_dataset, cfg['batch_size'], shuffle=True, num_workers=1)

    if not cfg['use_both']:
        val_loader = DataLoader(val_dataset, cfg['batch_size'], shuffle=True, num_workers=1)



    print('Creating Model...')
    net = models[cfg['model_name']](cfg).to(device)
    net = nn.DataParallel(net)
    n_params = count_parameters(net)
    print("model: {:,} M parameters".format(n_params / 1024 / 1024))

    criterion = bce_with_logits#nn.CrossEntropyLoss()
    optimizer = optim.Adamax(net.parameters(), lr=cfg['init_lr'])
    sched = LambdaLR(optimizer, lr_lambda=lr_schedule_func_builder())

    checkpoint_path = 'checkpoint/{}'.format(cfg_name)
    if os.path.exists(checkpoint_path) is False:
        os.mkdir(checkpoint_path)

    if cfg['train_check_point'] is not None:
        net_checkpoint = torch.load(cfg['train_check_point'])
        net.load_state_dict(net_checkpoint)
        optim_checkpoint = torch.load('optim_{}'.format(cfg['train_check_point']))
        optimizer.load_state_dict(optim_checkpoint)

    logger = Logger(os.path.join(checkpoint_path, "log.txt"))
    for k, v in cfg.items():
        logger.write(k+': {}'.format(v))
Ejemplo n.º 2
0
if args.optim == 'adam':
    optimizer = optim.Adam(model.parameters(),
                           lr=args.lr,
                           eps=1e-9,
                           weight_decay=args.l2_norm,
                           betas=[0.9, 0.98])
elif args.optim == 'sparseadam':
    optimizer = optim.SparseAdam(model.parameters(),
                                 lr=args.lr,
                                 eps=1e-9,
                                 weight_decay=args.l2_norm,
                                 betas=[0.9, 0.98])
elif args.optim == 'adamax':
    optimizer = optim.Adamax(model.parameters(),
                             lr=args.lr,
                             eps=1e-9,
                             weight_decay=args.l2_norm,
                             betas=[0.9, 0.98])
elif args.optim == 'rmsprop':
    optimizer = optim.RMSprop(model.parameters(),
                              lr=args.lr,
                              eps=1e-10,
                              weight_decay=args.l2_norm,
                              momentum=0.9)
elif args.optim == 'sgd':
    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          weight_decay=args.l2_norm,
                          momentum=0.9)  # 0.01
elif args.optim == 'adagrad':
    optimizer = optim.Adagrad(model.parameters(),
Ejemplo n.º 3
0
    def __init__(self, opt, embedding=None, state_dict=None):
        self.opt = opt
        self.updates = state_dict[
            'updates'] if state_dict and 'updates' in state_dict else 0
        self.eval_embed_transfer = True
        self.train_loss = AverageMeter()

        self.network = DNetwork(opt, embedding)
        if state_dict:
            new_state = set(self.network.state_dict().keys())
            for k in list(state_dict['network'].keys()):
                if k not in new_state:
                    del state_dict['network'][k]
            for k, v in list(self.network.state_dict().items()):
                if k not in state_dict['network']:
                    state_dict['network'][k] = v
            self.network.load_state_dict(state_dict['network'])

        parameters = [p for p in self.network.parameters() if p.requires_grad]
        if opt['optimizer'] == 'sgd':
            self.optimizer = optim.SGD(parameters,
                                       opt['learning_rate'],
                                       momentum=opt['momentum'],
                                       weight_decay=opt['weight_decay'])
        elif opt['optimizer'] == 'adamax':
            self.optimizer = optim.Adamax(parameters,
                                          opt['learning_rate'],
                                          weight_decay=opt['weight_decay'])
        elif opt['optimizer'] == 'adam':
            self.optimizer = optim.Adam(parameters,
                                        opt['learning_rate'],
                                        weight_decay=opt['weight_decay'])
        elif opt['optimizer'] == 'adadelta':
            self.optimizer = optim.Adadelta(parameters,
                                            opt['learning_rate'],
                                            rho=0.95)
        else:
            raise RuntimeError('Unsupported optimizer: %s' % opt['optimizer'])
        if state_dict and 'optimizer' in state_dict:
            self.optimizer.load_state_dict(state_dict['optimizer'])

        if opt['fix_embeddings']:
            wvec_size = 0
        else:
            wvec_size = (opt['vocab_size'] -
                         opt['tune_partial']) * opt['embedding_dim']
        if opt.get('have_lr_scheduler', False):
            if opt.get('scheduler_type', 'rop') == 'rop':
                self.scheduler = ReduceLROnPlateau(self.optimizer,
                                                   mode='max',
                                                   factor=opt['lr_gamma'],
                                                   patience=3)
            elif opt.get('scheduler_type', 'rop') == 'exp':
                self.scheduler = ExponentioalLR(self.optimizer,
                                                gamma=opt.get('lr_gamma', 0.5))
            else:
                milestones = [
                    int(step)
                    for step in opt.get('multi_step_lr', '10,20,30').split(',')
                ]
                self.scheduler = MultiStepLR(self.optimizer,
                                             milestones=milestones,
                                             gamma=opt.get('lr_gamma'))
        else:
            self.scheduler = None
        self.total_param = sum([p.nelement() for p in parameters]) - wvec_size
Ejemplo n.º 4
0
Nbatches = int(math.ceil(Ntrain / batch_size))  #batch_size is defined above
Nepochs = 500
Nrep = 1

#model = conv3DNet(grid_size, Noutputs, batch_size)
#model = conv3DNet(grid_size, Noutputs, batch_size)
#model = conv3DNet(grid_size, Noutputs, batch_size)
#model = conv3DNet(grid_size, Noutputs, batch_size)
#model = UnetGenerator_3d(in_dim=1, out_dim=Noutputs, num_filter=4)
#model = UnetGenerator_3d_softmax(in_dim=1, out_dim=Noutputs, num_filter=8)
model = UnetGenerator_3d_log_softmax(in_dim=1, out_dim=Noutputs, num_filter=4)

#optimizer = optim.SGD(model.parameters(), lr=1e-2, momentum=0.90)
#optimizer = optim.Adam(model.parameters())
#optimizer = optim.Adagrad(model.parameters())
optimizer = optim.Adamax(model.parameters())
#optimizer = optim.ASGD(model.parameters())
#optimizer = optim.RMSprop(model.parameters())
#optimizer = optim.Rprop(model.parameters())

scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, 'min', patience=10, verbose=True
)  #Reduces the learning rate if it did not decreased by more than 10^-4 in 10 steps

train_errors = torch.Tensor(Nepochs).zero_()
validation_errors = torch.Tensor(Nepochs).zero_()

ep_loss = torch.Tensor(Nepochs).zero_()

for i_ep in range(Nepochs):
    for b_start in range(0, Ntrain, batch_size):
Ejemplo n.º 5
0
 def _opti(self, parameters):
     return optim.Adamax(parameters)
torch.manual_seed(args.seed)
if torch.cuda.is_available():
    if not args.cuda:
        print("WARNING: You have a CUDA device, so you should probably run with --cuda")
    else:
        torch.cuda.manual_seed(args.seed)


corpus = Corpus(args.task)
model = eval(args.model)(corpus, args)
model.train()
criterion = nn.NLLLoss()

parameters = filter(lambda p: p.requires_grad, model.parameters())
optimizer = optim.Adamax(parameters, lr=args.lr)


if args.cuda:
    model.cuda()
    criterion.cuda()

start_time = time.time()
total_loss = 0
interval = args.interval
save_interval = len(corpus.data_all['train']) // args.batch_size

best_dev_score = -99999
iterations = args.epochs*len(corpus.data_all['train']) // args.batch_size
print('max iterations: '+str(iterations))
count = 0
Ejemplo n.º 7
0
def run(args, kwargs):

    print('\nMODEL SETTINGS: \n', args, '\n')
    print("Random Seed: ", args.manual_seed)

    # ==================================================================================================================
    # SNAPSHOTS
    # ==================================================================================================================
    args.model_signature = str(datetime.datetime.now())[0:19].replace(' ', '_')
    args.model_signature = args.model_signature.replace(':', '_')

    snapshots_path = os.path.join(args.out_dir, 'vae_' + args.dataset + '_')
    snap_dir = snapshots_path + args.flow + '_' + str(args.gpu_num)

    if args.flow != 'no_flow':
        snap_dir += '_' + 'num_flows_' + str(args.num_flows)

    if args.flow == 'orthogonal':
        snap_dir = snap_dir + '_num_vectors_' + str(args.num_ortho_vecs)
    elif args.flow == 'orthogonalH':
        snap_dir = snap_dir + '_num_householder_' + str(args.num_householder)
    elif args.flow == 'iaf':
        snap_dir = snap_dir + '_madehsize_' + str(args.made_h_size)

    elif args.flow == 'permutation':
        snap_dir = snap_dir + '_' + 'kernelsize_' + str(args.kernel_size)
    elif args.flow == 'mixed':
        snap_dir = snap_dir + '_' + 'num_householder_' + str(args.num_householder)

    snap_dir = snap_dir + '__' + args.model_signature + '/'

    args.snap_dir = snap_dir

    if not os.path.exists(snap_dir):
        os.makedirs(snap_dir)

    # SAVING
    torch.save(args, snap_dir + args.flow + '.config')

    # ==================================================================================================================
    # LOAD DATA
    # ==================================================================================================================
    train_loader, val_loader, test_loader, args = load_dataset(args, **kwargs)

    # ==================================================================================================================
    # SELECT MODEL
    # ==================================================================================================================
    # flow parameters and architecture choice are passed on to model through args

    if args.flow == 'no_flow':
        model = VAE.VAE(args)
    elif args.flow == 'planar':
        model = VAE.PlanarVAE(args)
    elif args.flow == 'iaf':
        model = VAE.IAFVAE(args)
    elif args.flow == 'orthogonal':
        model = VAE.OrthogonalSylvesterVAE(args)
    elif args.flow == 'householder':
        model = VAE.HouseholderSylvesterVAE(args)
    elif args.flow == 'triangular':
        model = VAE.TriangularSylvesterVAE(args)
    else:
        raise ValueError('Invalid flow choice')

    if args.cuda:
        print("Model on GPU")
        model.cuda()

    print(model)

    optimizer = optim.Adamax(model.parameters(), lr=args.learning_rate, eps=1.e-7)

    # ==================================================================================================================
    # TRAINING
    # ==================================================================================================================
    train_loss = []
    val_loss = []

    # for early stopping
    best_loss = np.inf
    best_bpd = np.inf
    e = 0
    epoch = 0

    train_times = []

    for epoch in range(1, args.epochs + 1):

        t_start = time.time()
        tr_loss = train(epoch, train_loader, model, optimizer, args)
        train_loss.append(tr_loss)
        train_times.append(time.time()-t_start)
        print('One training epoch took %.2f seconds' % (time.time()-t_start))

        v_loss, v_bpd = evaluate(val_loader, model, args, epoch=epoch)

        val_loss.append(v_loss)

        # early-stopping
        if v_loss < best_loss:
            e = 0
            best_loss = v_loss
            if args.input_type != 'binary':
                best_bpd = v_bpd
            print('->model saved<-')
            torch.save(model, snap_dir + args.flow + '.model')
            # torch.save(model, snap_dir + args.flow + '_' + args.architecture + '.model')

        elif (args.early_stopping_epochs > 0) and (epoch >= args.warmup):
            e += 1
            if e > args.early_stopping_epochs:
                break

        if args.input_type == 'binary':
            print('--> Early stopping: {}/{} (BEST: loss {:.4f})\n'.format(e, args.early_stopping_epochs, best_loss))

        else:
            print('--> Early stopping: {}/{} (BEST: loss {:.4f}, bpd {:.4f})\n'.format(e, args.early_stopping_epochs,
                                                                                   best_loss, best_bpd))

        if math.isnan(v_loss):
            raise ValueError('NaN encountered!')

    train_loss = np.hstack(train_loss)
    val_loss = np.array(val_loss)

    plot_training_curve(train_loss, val_loss, fname=snap_dir + '/training_curve_%s.pdf' % args.flow)

    # training time per epoch
    train_times = np.array(train_times)
    mean_train_time = np.mean(train_times)
    std_train_time = np.std(train_times, ddof=1)
    print('Average train time per epoch: %.2f +/- %.2f' % (mean_train_time, std_train_time))

    # ==================================================================================================================
    # EVALUATION
    # ==================================================================================================================

    test_score_file = snap_dir + 'test_scores.txt'

    with open('experiment_log.txt', 'a') as ff:
        print(args, file=ff)
        print('Stopped after %d epochs' % epoch, file=ff)
        print('Average train time per epoch: %.2f +/- %.2f' % (mean_train_time, std_train_time), file=ff)

    final_model = torch.load(snap_dir + args.flow + '.model')

    if args.testing:
        validation_loss, validation_bpd = evaluate(val_loader, final_model, args)
        test_loss, test_bpd = evaluate(test_loader, final_model, args, testing=True)

        with open('experiment_log.txt', 'a') as ff:
            print('FINAL EVALUATION ON VALIDATION SET\n'
                  'ELBO (VAL): {:.4f}\n'.format(validation_loss), file=ff)
            print('FINAL EVALUATION ON TEST SET\n'
                  'NLL (TEST): {:.4f}\n'.format(test_loss), file=ff)
            if args.input_type != 'binary':
                print('FINAL EVALUATION ON VALIDATION SET\n'
                      'ELBO (VAL) BPD : {:.4f}\n'.format(validation_bpd), file=ff)
                print('FINAL EVALUATION ON TEST SET\n'
                      'NLL (TEST) BPD: {:.4f}\n'.format(test_bpd), file=ff)


    else:
        validation_loss, validation_bpd = evaluate(val_loader, final_model, args)
        # save the test score in case you want to look it up later.
        _, _ = evaluate(test_loader, final_model, args, testing=True, file=test_score_file)

        with open('experiment_log.txt', 'a') as ff:
            print('FINAL EVALUATION ON VALIDATION SET\n'
                  'ELBO (VALIDATION): {:.4f}\n'.format(validation_loss), file=ff)
            if args.input_type != 'binary':
                print('FINAL EVALUATION ON VALIDATION SET\n'
                      'ELBO (VAL) BPD : {:.4f}\n'.format(validation_bpd), file=ff)
Ejemplo n.º 8
0
    def train(self):
        """
        Train the model and print out training data.
        """

        # Define Loss function and Optimizer
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adamax(self._net.parameters(), lr=0.001)

        # Use CUDA device if available
        device = self._set_device()

        start_time = time.perf_counter()
        train_loss_history = []
        train_acc_history = []

        # Train the network
        for epoch in range(10):

            running_loss = 0.0
            train_loss = 0.0
            correct = 0
            total = 0

            for i, data in enumerate(self._data_loader, 0):
                # data is a list of [inputs, labels]
                inputs, labels = data[0].to(device), data[1].to(device)

                # clear the parameter gradients
                optimizer.zero_grad()

                # forward + backward + optimize
                outputs = self._net(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                # calculate training accuracy and loss
                _, predictions = torch.max(outputs, 1)
                correct += (predictions == labels).sum().item()
                total += labels.size(0)
                train_loss += loss.item()

                # print loss and accuracy every 500 mini-batches
                running_loss += loss.item()
                if i % 500 == 499:
                    print(
                        'Epoch %d/10, %5d mini-batches, Loss: %.3f, Accuracy: %.3f'
                        % (epoch + 1, i + 1, running_loss / 500,
                           correct / total))
                    running_loss = 0.0

            train_loss_history.append(train_loss / len(self._data_loader))
            train_acc_history.append(correct / total)

        # print training time
        end_time = time.perf_counter()
        print(
            f'Finished training in {(end_time - start_time)/60:.2f} minutes.')

        # plot training accuracy and loss curve
        plt.plot(np.array(train_loss_history), 'b', label='Training Loss')
        plt.plot(np.array(train_acc_history), 'y', label='Training Accuracy')
        plt.legend()
        plt.show()

        self.save_network()
Ejemplo n.º 9
0
def main_train(epoch_start, epoch_end, train, arg):
    #%%
    net_name = arg['net_name']
    loss_name = arg['loss_name']
    filename = get_filename(net_name, loss_name)
    print('train model: ' + filename)
    if epoch_start == epoch_end:
        print('epoch_end is epoch_start, exist main_train')
        return
    #---------------------------------------
    device = arg['device']
    lr = arg['lr']
    norm_type = arg['norm_type']
    rand_pad = arg['rand_pad']
    #%%
    num_classes = 9
    if norm_type == np.inf:
        noise_norm = 0.1
        max_iter = 1
        step = 1.0
    elif norm_type == 2:
        noise_norm = 5.0
        max_iter = 1
        step = 1.0
#%%
    loader_train, loader_val = get_dataloader(rand_pad=rand_pad)
    #%%
    loss_train_list = []
    acc_train_list = []
    acc_val_list = []
    acc_test_list = []
    epoch_save = epoch_start - 1
    #%%
    model = Net(net_name)
    if epoch_start > 0:
        print('load', filename + '_epoch' + str(epoch_save) + '.pt')
        checkpoint = torch.load(filename + '_epoch' + str(epoch_save) + '.pt',
                                map_location=torch.device('cpu'))
        model.load_state_dict(checkpoint['model_state_dict'])
        #------------------------
        loss_train_list = checkpoint['result']['loss_train_list']
        acc_train_list = checkpoint['result']['acc_train_list']
        acc_val_list = checkpoint['result']['acc_val_list']
        acc_test_list = checkpoint['result']['acc_test_list']
        if 'E' in arg.keys():
            if arg['E'] is None:
                arg['E'] = checkpoint['result']['arg']['E']
                print('load E')
    #------------------------
    model.to(device)
    #------------------------
    if arg['optimizer'] == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=lr)
    elif arg['optimizer'] == 'AdamW':
        optimizer = optim.AdamW(model.parameters(), lr=lr)
    elif arg['optimizer'] == 'Adamax':
        optimizer = optim.Adamax(model.parameters(), lr=lr)
    elif arg['optimizer'] == 'SGD':
        optimizer = optim.SGD(model.parameters(),
                              lr=lr,
                              momentum=0.9,
                              weight_decay=0.001,
                              nesterov=True)
    else:
        raise NotImplementedError('unknown optimizer')
#%%
    for epoch in range(epoch_save + 1, epoch_end):
        #-------- training --------------------------------
        start = time.time()
        loss_train, acc_train = train(model, device, optimizer, loader_train,
                                      epoch, arg)
        loss_train_list.append(loss_train)
        acc_train_list.append(acc_train)
        print('epoch', epoch, 'training loss:', loss_train, 'acc:', acc_train)
        end = time.time()
        print('time cost:', end - start)
        #-------- validation --------------------------------
        result_val = test(model,
                          device,
                          loader_val,
                          num_classes=num_classes,
                          class_balanced_acc=True)
        acc_val_list.append(result_val['acc'])
        #-------- test --------------------------------
        #result_test = test(model, device, loader_test, num_classes=num_classes, class_balanced_acc=True)
        #acc_test_list.append(result_test['acc'])
        #--------save model-------------------------
        result = {}
        result['arg'] = arg
        result['loss_train_list'] = loss_train_list
        result['acc_train_list'] = acc_train_list
        result['acc_val_list'] = acc_val_list
        result['acc_test_list'] = acc_test_list
        if (epoch + 1) % 10 == 0:
            save_checkpoint(filename + '_epoch' + str(epoch) + '.pt', model,
                            result, epoch)
        epoch_save = epoch
        #------- show result ----------------------
        #plt.close('all')
        display.clear_output(wait=False)
        fig, ax = plot_result(loss_train_list, acc_train_list, acc_val_list,
                              acc_test_list)
        display.display(fig)
        fig.savefig(filename + '_epoch' + str(epoch) + '.png')
        plt.close(fig)
Ejemplo n.º 10
0
    model = Net()

model.to(device)

if config.LOSS == "l1":
    loss_function = nn.L1Loss()
elif config.LOSS == "vgg":
    loss_function = loss.VggLoss()
elif config.LOSS == "ssim":
    loss_function = loss.SsimLoss()
elif config.LOSS == "l1+vgg":
    loss_function = loss.CombinedLoss()
else:
    raise ValueError(f"Unknown loss: {config.LOSS}")

optimizer = optim.Adamax(model.parameters(), lr=0.001)

board_writer = SummaryWriter()

# ----------------------------------------------------------------------


def train(epoch):
    print("===> Training...")
    before_pass = [p.data.clone() for p in model.parameters()]
    epoch_loss = 0
    for iteration, batch in enumerate(training_data_loader, 1):
        input, target = batch[0].to(device), batch[1].to(device)

        optimizer.zero_grad()
Ejemplo n.º 11
0
if __name__ == '__main__':
    model = init_model(config.START_FROM_EXISTING_MODEL)

    if config.LOSS == "l1":
        loss_function = nn.L1Loss()
    elif config.LOSS == "vgg":
        loss_function = loss.VggLoss()
    elif config.LOSS == "ssim":
        loss_function = loss.SsimLoss()
    elif config.LOSS == "l1+vgg":
        loss_function = loss.CombinedLoss()
    else:
        raise ValueError(f"Unknown loss: {config.LOSS}")

    optimizer = optim.Adamax(model.parameters(), lr=config.LEARNING_RATE[0])

    board_writer = SummaryWriter()

# ----------------------------------------------------------------------


def train(epoch):
    print("===> Training...")
    before_pass = [p.data.clone() for p in model.parameters()]
    epoch_loss = 0

    target_crop = _make_target_crop(config.PATCH_SIZE[0], config.PATCH_SIZE[1],
                                    config.CROP_SIZE, config.CROP_SIZE)

    epoch_lr = config.LEARNING_RATE[-1]
Ejemplo n.º 12
0
    def forward(self, x):
        x = self.hidden(x)
        x = self.hidden2(x)
        x = self.hidden3(x)
        x = self.sig(x)
        x = self.output(x)

        return x


in_d, out_d, in_test, out_test = load_data()
net = Net()

crit = nn.MSELoss()
# opt = optim.SGD(params=net.parameters(),lr= 0.01)
opt = optim.Adamax(params=net.parameters(), lr=0.002, betas=(0.9, 0.999))

for epoch in range(100):
    loss_value = 0.0

    i = 0

    for values in in_d:

        opt.zero_grad()

        temp = np.matrix(values)
        tensorIn = torch.from_numpy(temp).float()
        outs = net(tensorIn)
        tensorOut = torch.from_numpy(out_d[i]).float()
        loss = crit(outs, tensorOut)
 def optim_selection(self):
     if self.config.optim == "Nesterov":
         return optim.SGD(
             self.model.parameters(),
             lr=self.config.lr,
             momentum=0.9,
             nesterov=True,
             weight_decay=0.0001,
         )
     elif self.config.optim == "SGD":  # weight_decay = l2 regularization
         return optim.SGD(
             self.model.parameters(),
             lr=self.config.lr,
             momentum=0.9,
             nesterov=False,
             weight_decay=0.0001,
         )
     elif self.config.optim == "Adadelta":  # default lr = 1.0
         return optim.Adadelta(
             self.model.parameters(),
             lr=self.config.lr,
             rho=0.9,
             eps=1e-06,
             weight_decay=1e-6,
         )
     elif self.config.optim == "Adagrad":  # default lr = 0.01
         return optim.Adagrad(
             self.model.parameters(),
             lr=self.config.lr,
             lr_decay=0,
             weight_decay=1e-6,
             initial_accumulator_value=0,
             eps=1e-10,
         )
     elif self.config.optim == "Adam":  # default lr=0.001
         return optim.Adam(self.model.parameters(),
                           lr=self.config.lr,
                           weight_decay=1e-6)
     elif self.config.optim == "AdamW":  # default lr=0.001
         return optim.AdamW(
             self.model.parameters(),
             lr=self.config.lr,
             betas=(0.9, 0.999),
             eps=1e-08,
             weight_decay=0.01,
             amsgrad=False,
         )
     elif self.config.optim == "SparseAdam":  # default lr = 0.001
         return optim.SparseAdam(
             self.model.parameters(),
             lr=self.config.lr,
             betas=(0.9, 0.999),
             eps=1e-08,
         )
     elif self.config.optim == "Adamax":  # default lr=0.002
         return optim.Adamax(
             self.model.parameters(),
             lr=self.config.lr,
             betas=(0.9, 0.999),
             eps=1e-08,
             weight_decay=1e-6,
         )
     elif self.config.optim == "ASGD":
         return optim.ASGD(
             self.model.parameters(),
             lr=self.config.lr,
             lambd=0.0001,
             alpha=0.75,
             t0=1000000.0,
             weight_decay=1e-6,
         )
     elif self.config.optim == "RMSprop":  # default lr=0.01
         return optim.RMSprop(
             self.model.parameters(),
             lr=self.config.lr,
             alpha=0.99,
             eps=1e-08,
             weight_decay=0,
             momentum=0,
             centered=False,
         )
     elif self.config.optim == "Rprop":  # default lr=0.01
         return optim.Rprop(
             self.model.parameters(),
             lr=self.config.lr,
             etas=(0.5, 1.2),
             step_sizes=(1e-06, 50),
         )
Ejemplo n.º 14
0
def main(argv):
    config = Config()
    config.load_user_config()
    config.log.info("finish loading user config")

    train_file = config.args["train_file"]
    dev_file = config.args["dev_file"]
    old_glove_file = config.args["glove_file"]
    new_glove_file = config.args["glove_file"] + ".subset"

    # TODO(demi): switch "overwrite" to False
    train_data_raw, dev_data_raw, i2w, w2i, i2c, c2i, new_glove_file, glove_dim, vocab_size, char_vocab_size\
         = squad_read_data(config, train_file, dev_file, old_glove_file, new_glove_file, overwrite=True)
    config.log.info("finish reading squad data in raw formats")

    config.update_batch([("glove_file", new_glove_file),
                   ("glove_dim", glove_dim),
                   ("vocab_size", vocab_size),
                   ("char_vocab_size", char_vocab_size)])


    config.log.warning("reminder: now we only support train/fake mode")
    assert config.args["mode"] in ["train", "fake"], "mode (%s) not found" % config.args["mode"]

    train_id_conversion, train_data = make_dataset(config, train_data_raw, w2i, c2i)
    dev_id_conversion, dev_data = make_dataset(config, dev_data_raw, w2i, c2i)
    config.log.info("finish making datasets: reformatting raw data")

    train_data = QnADataset(train_data, config)
    dev_data = QnADataset(dev_data, config)
    config.log.info("finish generating datasets")

    train_loader = torch.utils.data.DataLoader(train_data, batch_size=1, shuffle=True, **config.kwargs)
    dev_loader = torch.utils.data.DataLoader(dev_data, batch_size=1, **config.kwargs)
    config.log.info("finish generating data loader")


    model = BiDAF(config, i2w)
    config.log.info("finish creating model")
    if config.args["use_cuda"]:
        model.cuda()

    # log config and model
    config.log.info(config.format_string())
    config.log.info("model:{}".format(model))

    if config.args['optimizer'] == "Adam":
        optimizer = optim.Adam(model.get_train_parameters(), lr=config.args['lr'], weight_decay=config.args['weight_decay'])
    if config.args['optimizer'] == "Adamax":
        optimizer = optim.Adamax(model.get_train_parameters(), lr=config.args['lr'], weight_decay=config.args['weight_decay'])
    if config.args['optimizer'] == "SGD":
        optimizer = torch.optim.SGD(model.get_train_parameters(), lr=config.args['lr'], momentum=0.9, weight_decay=config.args['weight_decay'])
    if config.args['optimizer'] == "Adadelta":
        optimizer = torch.optim.Adadelta(model.get_train_parameters(), lr=config.args["lr"])
    #if config.args['optimizer'] == "Adagrad":



    config.log.info("model = %s" % model)
    config.log.info("config = %s" % config.format_string())

    trainer = Trainer(config)
    evaluator = Evaluator(config)

    """ save model checkpoint """
    def save_checkpoint(epoch):
        checkpoint = {"model_state_dict": model.state_dict(),
                      "config_args" : config.args}
        if config.args["optimizer"] != "YF":  # YF can't save state dict right now
            checkpoint["optimizer_state_dict"] = optimizer.state_dict()
        checkpoint_file = config.args["model_dir"] + config.args["model_name"] + "-EPOCH%d" % epoch
        torch.save(checkpoint, checkpoint_file)
        config.log.info("saving checkpoint: {}".format(checkpoint_file))


    for epoch in range(1, config.args["max_epoch"] + 1):
        config.log.info("training: epoch %d" % epoch)
        # QS(demi): do i need to return model & optimizer?
        model, optimizer, train_avg_loss, train_answer_dict = trainer.run(model, train_id_conversion[0], train_loader, optimizer, mode="train")
        model, optimizer, dev_avg_loss, dev_answer_dict = trainer.run(model, dev_id_conversion[0], dev_loader, optimizer, mode="dev")

        # loss is a float tensor with size 1
        config.log.info("[EPOCH %d] LOSS = (train)%.5lf | (dev)%.5lf" % (epoch, train_avg_loss[0], dev_avg_loss[0]))

        answer_filename = "{}/{}-EPOCH{}".format(config.args["model_dir"], config.args["model_name"], epoch)
        config.log.info("[EVAUATION] TRAIN EVAL")
        evaluator.eval("official", train_file, train_answer_dict, "{}/answer.train".format(config.args["model_dir"], answer_filename))
        config.log.info("[EVAUATION] DEV EVAL")
        evaluator.eval("official", dev_file, dev_answer_dict, "{}/answer.dev".format(config.args["model_dir"], answer_filename))

        save_checkpoint(epoch)
Ejemplo n.º 15
0
def main(args):
    device = torch.device("cuda:0")

    # model hyperparameters
    dataset = args.dataset
    batch_size = args.batch_size
    hps = Hyperparameters(base_dim=args.base_dim,
                          res_blocks=args.res_blocks,
                          bottleneck=args.bottleneck,
                          skip=args.skip,
                          weight_norm=args.weight_norm,
                          coupling_bn=args.coupling_bn,
                          affine=args.affine)
    scale_reg = 5e-5  # L2 regularization strength

    # optimization hyperparameters
    lr = args.lr
    momentum = args.momentum
    decay = args.decay

    # prefix for images and checkpoints
    filename = 'bs%d_' % batch_size \
             + 'normal_' \
             + 'bd%d_' % hps.base_dim \
             + 'rb%d_' % hps.res_blocks \
             + 'bn%d_' % hps.bottleneck \
             + 'sk%d_' % hps.skip \
             + 'wn%d_' % hps.weight_norm \
             + 'cb%d_' % hps.coupling_bn \
             + 'af%d' % hps.affine \

    # load dataset
    train_split, val_split, data_info = data_utils.load(dataset)
    train_loader = torch.utils.data.DataLoader(train_split,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               num_workers=2)
    val_loader = torch.utils.data.DataLoader(val_split,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             num_workers=2)

    prior = distributions.Normal(  # isotropic standard normal distribution
        torch.tensor(0.).to(device),
        torch.tensor(1.).to(device))
    flow = realnvp.RealNVP(datainfo=data_info, prior=prior, hps=hps).to(device)
    optimizer = optim.Adamax(flow.parameters(),
                             lr=lr,
                             betas=(momentum, decay),
                             eps=1e-7)

    epoch = 0
    running_loss = 0.
    running_log_ll = 0.
    optimal_log_ll = float('-inf')
    early_stop = 0

    image_size = data_info.channel * data_info.size**2  # full image dimension

    while epoch < args.max_epoch:
        epoch += 1
        print('Epoch %d:' % epoch)
        flow.train()
        for batch_idx, data in enumerate(train_loader, 1):
            optimizer.zero_grad()
            x, _ = data
            # log-determinant of Jacobian from the logit transform
            x, log_det = data_utils.logit_transform(x)
            x = x.to(device)
            log_det = log_det.to(device)

            # log-likelihood of input minibatch
            log_ll, weight_scale = flow(x)
            log_ll = (log_ll + log_det).mean()

            # add L2 regularization on scaling factors
            loss = -log_ll + scale_reg * weight_scale
            running_loss += loss.item()
            running_log_ll += log_ll.item()

            loss.backward()
            optimizer.step()

            if batch_idx % 10 == 0:
                bit_per_dim = (-log_ll.item() + np.log(256.) * image_size) \
                    / (image_size * np.log(2.))
                print('[%d/%d]\tloss: %.3f\tlog-ll: %.3f\tbits/dim: %.3f' % \
                    (batch_idx*batch_size, len(train_loader.dataset),
                        loss.item(), log_ll.item(), bit_per_dim))

        mean_loss = running_loss / batch_idx
        mean_log_ll = running_log_ll / batch_idx
        mean_bit_per_dim = (-mean_log_ll + np.log(256.) * image_size) \
             / (image_size * np.log(2.))
        print('===> Average train loss: %.3f' % mean_loss)
        print('===> Average train log-likelihood: %.3f' % mean_log_ll)
        print('===> Average train bit_per_dim: %.3f' % mean_bit_per_dim)
        running_loss = 0.
        running_log_ll = 0.

        flow.eval()
        with torch.no_grad():
            for batch_idx, data in enumerate(val_loader, 1):
                x, _ = data
                x, log_det = data_utils.logit_transform(x)
                x = x.to(device)
                log_det = log_det.to(device)

                # log-likelihood of input minibatch
                log_ll, weight_scale = flow(x)
                log_ll = (log_ll + log_det).mean()

                # add L2 regularization on scaling factors
                loss = -log_ll + scale_reg * weight_scale
                running_loss += loss.item()
                running_log_ll += log_ll.item()

            mean_loss = running_loss / batch_idx
            mean_log_ll = running_log_ll / batch_idx
            mean_bit_per_dim = (-mean_log_ll + np.log(256.) * image_size) \
                / (image_size * np.log(2.))
            print('===> Average validation loss: %.3f' % mean_loss)
            print('===> Average validation log-likelihood: %.3f' % mean_log_ll)
            print('===> Average validation bits/dim: %.3f' % mean_bit_per_dim)
            running_loss = 0.
            running_log_ll = 0.

            samples = flow.sample(args.sample_size)
            samples, _ = data_utils.logit_transform(samples, reverse=True)
            utils.save_image(
                utils.make_grid(samples),
                './samples/' + dataset + '/' + filename + '_ep%d.png' % epoch)

        if mean_log_ll > optimal_log_ll:
            early_stop = 0
            optimal_log_ll = mean_log_ll
            torch.save(flow, './models/' + dataset + '/' + filename + '.model')
            print('[MODEL SAVED]')
        else:
            early_stop += 1
            if early_stop >= 100:
                break

        print('--> Early stopping %d/100 (BEST validation log-likelihood: %.3f)' \
            % (early_stop, optimal_log_ll))

    print('Training finished at epoch %d.' % epoch)
Ejemplo n.º 16
0
def train():
   
    parser = argparse.ArgumentParser()
    # 配置文件
    parser.add_argument(
        "--config-yml",
        default="exp_fvqa/exp2.yml",
        help=
        "Path to a config file listing reader, model and solver parameters.")

    parser.add_argument("--cpu-workers",
                        type=int,
                        default=8,
                        help="Number of CPU workers for dataloader.")

    parser.add_argument(
        "--save-dirpath",
        default="fvqa/exp_data/checkpoints",
        help=
        "Path of directory to create checkpoint directory and save checkpoints."
    )

    parser.add_argument(
        "--load-pthpath",
        default="",
        help="To continue training, path to .pth file of saved checkpoint.")

    parser.add_argument("--gpus", default="", help="gpus")
    parser.add_argument(
        "--overfit",
        action="store_true",
        help="Whether to validate on val split after every epoch.")

    parser.add_argument(
        "--validate",
        action="store_true",
        help="Whether to validate on val split after every epoch.")

    args = parser.parse_args()

    # set mannual seed
    torch.manual_seed(10)
    torch.cuda.manual_seed(10)
    cudnn.benchmark = True
    cudnn.deterministic = True

    config = yaml.load(open(args.config_yml))

    device = torch.device("cuda:0") if args.gpus != "cpu" else torch.device(
        "cpu")

    # Print config and args.
    print(yaml.dump(config, default_flow_style=False))
    for arg in vars(args):
        print("{:<20}: {}".format(arg, getattr(args, arg)))

 
    print('Loading TrainDataset...')
    train_dataset = FvqaTrainDataset(config, overfit=args.overfit)
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=config['solver']['batch_size'],
                                  num_workers=args.cpu_workers,
                                  shuffle=True,
                                  collate_fn=collate_fn)

    if args.validate:
        print('Loading TestDataset...')
        val_dataset = FvqaTestDataset(config, overfit=args.overfit)
        val_dataloader = DataLoader(val_dataset,
                                    batch_size=config['solver']['batch_size'],
                                    num_workers=args.cpu_workers,
                                    shuffle=True,
                                    collate_fn=collate_fn)


    print('Loading glove...')
    que_vocab = Vocabulary(config['dataset']['word2id_path'])
    glove = np.load(config['dataset']['glove_vec_path'])
    glove = torch.Tensor(glove)


    print('Building Model...')
    model = CMGCNnet(config,
                     que_vocabulary=que_vocab,
                     glove=glove,
                     device=device)

    if torch.cuda.device_count() > 1 and args.gpus != "cpu":
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        model = nn.DataParallel(model)

    model = model.to(device)
    print(model)


    iterations = len(train_dataset) // config["solver"]["batch_size"] + 1

    def lr_lambda_fun(current_iteration: int) -> float:
   
        current_epoch = float(current_iteration) / iterations
        if current_epoch <= config["solver"]["warmup_epochs"]:
            alpha = current_epoch / float(config["solver"]["warmup_epochs"])
            return config["solver"]["warmup_factor"] * (1. - alpha) + alpha
        else:
            idx = bisect(config["solver"]["lr_milestones"], current_epoch)
            return pow(config["solver"]["lr_gamma"], idx)


    optimizer = optim.Adamax(model.parameters(),
                             lr=config["solver"]["initial_lr"])
    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lr_lambda_fun)
    T = iterations * (config["solver"]["num_epochs"] -
                      config["solver"]["warmup_epochs"] + 1)
    scheduler2 = lr_scheduler.CosineAnnealingLR(
        optimizer, int(T), eta_min=config["solver"]["eta_min"], last_epoch=-1)

   
    summary_writer = SummaryWriter(log_dir=args.save_dirpath)
    checkpoint_manager = CheckpointManager(model,
                                           optimizer,
                                           args.save_dirpath,
                                           config=config)


    if args.load_pthpath == "":
        start_epoch = 0
    else:

        start_epoch = int(args.load_pthpath.split("_")[-1][:-4])

        model_state_dict, optimizer_state_dict = load_checkpoint(
            args.load_pthpath)
        if isinstance(model, nn.DataParallel):
            model.module.load_state_dict(model_state_dict)
        else:
            model.load_state_dict(model_state_dict)
        optimizer.load_state_dict(optimizer_state_dict)
        print("Loading resume model from {}...".format(args.load_pthpath))


    global_iteration_step = start_epoch * iterations

    for epoch in range(start_epoch, config['solver']['num_epochs']):

        print(f"\nTraining for epoch {epoch}:")

        train_answers = []
        train_preds = []

        for i, batch in enumerate(tqdm(train_dataloader)):
            optimizer.zero_grad()
            fact_batch_graph = model(batch)
            batch_loss = cal_batch_loss(fact_batch_graph,
                                        batch,
                                        device,
                                        neg_weight=0.1,
                                        pos_weight=0.9)

            batch_loss.backward()
            optimizer.step()

            fact_graphs = dgl.unbatch(fact_batch_graph)
            for i, fact_graph in enumerate(fact_graphs):
                train_pred = fact_graph.ndata['h'].squeeze()  # (num_nodes,1)
                train_preds.append(train_pred)  # [(num_nodes,)]
                train_answers.append(batch['facts_answer_id_list'][i])

            summary_writer.add_scalar('train/loss', batch_loss,
                                      global_iteration_step)
            summary_writer.add_scalar("train/lr",
                                      optimizer.param_groups[0]["lr"],
                                      global_iteration_step)
            summary_writer.add_text('train/loss', str(batch_loss.item()),
                                    global_iteration_step)
            summary_writer.add_text('train/lr',
                                    str(optimizer.param_groups[0]["lr"]),
                                    global_iteration_step)

            if global_iteration_step <= iterations * config["solver"][
                "warmup_epochs"]:
                scheduler.step(global_iteration_step)
            else:
                global_iteration_step_in_2 = iterations * config["solver"][
                    "warmup_epochs"] + 1 - global_iteration_step
                scheduler2.step(int(global_iteration_step_in_2))

            global_iteration_step = global_iteration_step + 1
            torch.cuda.empty_cache()


        checkpoint_manager.step()
        train_acc_1, train_acc_3 = cal_acc(
            train_answers, train_preds)
        print(
            "trainacc@1={:.2%} & trainacc@3={:.2%} "
                .format(train_acc_1, train_acc_3))
        summary_writer.add_scalars(
            'train/acc', {
                'acc@1': train_acc_1,
                'acc@3': train_acc_3

            }, epoch)


        if args.validate:
            model.eval()
            answers = []  # [batch_answers,...]
            preds = []  # [batch_preds,...]
            print(f"\nValidation after epoch {epoch}:")
            for i, batch in enumerate(tqdm(val_dataloader)):
                with torch.no_grad():
                    fact_batch_graph = model(batch)
                batch_loss = cal_batch_loss(fact_batch_graph,
                                            batch,
                                            device,
                                            neg_weight=0.1,
                                            pos_weight=0.9)

                summary_writer.add_scalar('test/loss', batch_loss, epoch)
                fact_graphs = dgl.unbatch(fact_batch_graph)
                for i, fact_graph in enumerate(fact_graphs):
                    pred = fact_graph.ndata['h'].squeeze()  # (num_nodes,1)
                    preds.append(pred)  # [(num_nodes,)]
                    answers.append(batch['facts_answer_id_list'][i])

            acc_1, acc_3 = cal_acc(answers, preds)
            print("acc@1={:.2%} & acc@3={:.2%} ".
                  format(acc_1, acc_3))
            summary_writer.add_scalars('test/acc', {
                'acc@1': acc_1,
                'acc@3': acc_3
            }, epoch)

            model.train()
            torch.cuda.empty_cache()
    print('Train finished !!!')
    summary_writer.close()
Ejemplo n.º 17
0
def train(config_path):
    logger.info('------------MODEL TRAIN--------------')
    logger.info('loading config file...')
    global_config = read_config(config_path)

    # set random seed
    seed = global_config['global']['random_seed']
    torch.manual_seed(seed)

    #set default gpu
    os.environ["CUDA_VISIBLE_DEVICES"] = str(global_config['train']["gpu_id"])

    enable_cuda = global_config['train']['enable_cuda']
    device = torch.device("cuda" if enable_cuda else "cpu")
    if torch.cuda.is_available() and not enable_cuda:
        logger.warning("CUDA is avaliable, you can enable CUDA in config file")
    elif not torch.cuda.is_available() and enable_cuda:
        raise ValueError(
            "CUDA is not abaliable, please unable CUDA in config file")

    logger.info('reading squad dataset...')
    dataset = SquadDataset(global_config)

    logger.info('constructing model...')
    model_choose = global_config['global']['model']
    dataset_h5_path = global_config['data']['dataset_h5']
    if model_choose == 'base':
        model_config = read_config('config/base_model.yaml')
        model = BaseModel(dataset_h5_path, model_config)
    elif model_choose == 'match-lstm':
        model = MatchLSTM(dataset_h5_path)
    elif model_choose == 'match-lstm+':
        model = MatchLSTMPlus(dataset_h5_path,
                              global_config['preprocess']['use_domain_tag'])
    elif model_choose == 'r-net':
        model = RNet(dataset_h5_path)
    elif model_choose == 'm-reader':
        model = MReader(dataset_h5_path)
    else:
        raise ValueError('model "%s" in config file not recoginized' %
                         model_choose)

    model = model.to(device)
    criterion = MyNLLLoss()

    # optimizer
    optimizer_choose = global_config['train']['optimizer']
    optimizer_lr = global_config['train']['learning_rate']
    optimizer_param = filter(lambda p: p.requires_grad, model.parameters())

    if optimizer_choose == 'adamax':
        optimizer = optim.Adamax(optimizer_param)
    elif optimizer_choose == 'adadelta':
        optimizer = optim.Adadelta(optimizer_param)
    elif optimizer_choose == 'adam':
        optimizer = optim.Adam(optimizer_param)
    elif optimizer_choose == 'sgd':
        optimizer = optim.SGD(optimizer_param, lr=optimizer_lr)
    else:
        raise ValueError('optimizer "%s" in config file not recoginized' %
                         optimizer_choose)

    # check if exist model weight
    weight_path = global_config['data']['model_path']
    if os.path.exists(weight_path):
        logger.info('loading existing weight...')
        weight = torch.load(weight_path,
                            map_location=lambda storage, loc: storage)
        if enable_cuda:
            weight = torch.load(
                weight_path, map_location=lambda storage, loc: storage.cuda())
        # weight = pop_dict_keys(weight, ['pointer', 'init_ptr_hidden'])  # partial initial weight
        model.load_state_dict(weight, strict=False)

    # training arguments
    logger.info('start training...')
    train_batch_size = global_config['train']['batch_size']
    valid_batch_size = global_config['train']['valid_batch_size']

    num_workers = global_config['global']['num_data_workers']
    batch_train_data = dataset.get_dataloader_train(train_batch_size,
                                                    num_workers)
    batch_dev_data = dataset.get_dataloader_dev(valid_batch_size, num_workers)

    clip_grad_max = global_config['train']['clip_grad_norm']

    best_avg = 0.
    # every epoch
    for epoch in range(global_config['train']['epoch']):
        # train
        model.train()  # set training = True, make sure right dropout
        sum_loss = train_on_model(model=model,
                                  criterion=criterion,
                                  optimizer=optimizer,
                                  batch_data=batch_train_data,
                                  epoch=epoch,
                                  clip_grad_max=clip_grad_max,
                                  device=device)
        logger.info('epoch=%d, sum_loss=%.5f' % (epoch, sum_loss))

        # evaluate
        with torch.no_grad():
            model.eval()  # let training = False, make sure right dropout
            valid_score_em, valid_score_f1, valid_loss = eval_on_model(
                model=model,
                criterion=criterion,
                batch_data=batch_dev_data,
                epoch=epoch,
                device=device)
            valid_avg = (valid_score_em + valid_score_f1) / 2
        logger.info(
            "epoch=%d, ave_score_em=%.2f, ave_score_f1=%.2f, sum_loss=%.5f" %
            (epoch, valid_score_em, valid_score_f1, valid_loss))

        # save model when best avg score
        if valid_avg > best_avg:
            save_model(
                model,
                epoch=epoch,
                model_weight_path=global_config['data']['model_path'],
                checkpoint_path=global_config['data']['checkpoint_path'])
            logger.info("saving model weight on epoch=%d" % epoch)
            best_avg = valid_avg
    logger.info('pretraining finished.')

    if global_config['global']['finetune']:
        batch_train_data = dataset.get_dataloader_train2(
            train_batch_size, num_workers)
        batch_dev_data = dataset.get_dataloader_dev2(valid_batch_size,
                                                     num_workers)
        for epoch in range(global_config['train']['finetune_epoch']):
            # train
            model.train()  # set training = True, make sure right dropout
            sum_loss = train_on_model(model=model,
                                      criterion=criterion,
                                      optimizer=optimizer,
                                      batch_data=batch_train_data,
                                      epoch=epoch,
                                      clip_grad_max=clip_grad_max,
                                      device=device)
            logger.info('finetune epoch=%d, sum_loss=%.5f' % (epoch, sum_loss))

            # evaluate
            with torch.no_grad():
                model.eval()  # let training = False, make sure right dropout
                valid_score_em, valid_score_f1, valid_loss = eval_on_model(
                    model=model,
                    criterion=criterion,
                    batch_data=batch_dev_data,
                    epoch=epoch,
                    device=device)
                valid_avg = (valid_score_em + valid_score_f1) / 2
            logger.info(
                "finetune epoch=%d, ave_score_em=%.2f, ave_score_f1=%.2f, sum_loss=%.5f"
                % (epoch, valid_score_em, valid_score_f1, valid_loss))

            # save model when best avg score
            if valid_avg > best_avg:
                save_model(
                    model,
                    epoch=epoch,
                    model_weight_path=global_config['data']['model_path'],
                    checkpoint_path=global_config['data']['checkpoint_path'])
                logger.info("saving model weight on epoch=%d" % epoch)
                best_avg = valid_avg

    if global_config['global']['finetune2']:
        batch_train_data = dataset.get_dataloader_train3(
            train_batch_size, num_workers)
        batch_dev_data = dataset.get_dataloader_dev3(valid_batch_size,
                                                     num_workers)
        for epoch in range(global_config['train']['finetune_epoch2']):
            # train
            model.train()  # set training = True, make sure right dropout
            sum_loss = train_on_model(model=model,
                                      criterion=criterion,
                                      optimizer=optimizer,
                                      batch_data=batch_train_data,
                                      epoch=epoch,
                                      clip_grad_max=clip_grad_max,
                                      device=device)
            logger.info('finetune2 epoch=%d, sum_loss=%.5f' %
                        (epoch, sum_loss))

            # evaluate
            with torch.no_grad():
                model.eval()  # let training = False, make sure right dropout
                valid_score_em, valid_score_f1, valid_loss = eval_on_model(
                    model=model,
                    criterion=criterion,
                    batch_data=batch_dev_data,
                    epoch=epoch,
                    device=device)
                valid_avg = (valid_score_em + valid_score_f1) / 2
            logger.info(
                "finetune2 epoch=%d, ave_score_em=%.2f, ave_score_f1=%.2f, sum_loss=%.5f"
                % (epoch, valid_score_em, valid_score_f1, valid_loss))

            # save model when best avg score
            if valid_avg > best_avg:
                save_model(
                    model,
                    epoch=epoch,
                    model_weight_path=global_config['data']['model_path'],
                    checkpoint_path=global_config['data']['checkpoint_path'])
                logger.info("saving model weight on epoch=%d" % epoch)
                best_avg = valid_avg

    logger.info('finished.')
Ejemplo n.º 18
0
     {"params": model.parameters()},
     optim.Adagrad(lr=0.1, params=model.parameters()),
     id="AdagradConf",
 ),
 pytest.param(
     "Adam",
     {"lr": 0.1},
     {"params": model.parameters()},
     optim.Adam(lr=0.1, params=model.parameters()),
     id="AdamConf",
 ),
 pytest.param(
     "Adamax",
     {"lr": 0.1},
     {"params": model.parameters()},
     optim.Adamax(lr=0.1, params=model.parameters()),
     id="AdamaxConf",
 ),
 pytest.param(
     "AdamW",
     {"lr": 0.1},
     {"params": model.parameters()},
     optim.AdamW(lr=0.1, params=model.parameters()),
     id="AdamWConf",
 ),
 pytest.param(
     "ASGD",
     {"lr": 0.1},
     {"params": model.parameters()},
     optim.ASGD(lr=0.1, params=model.parameters()),
     id="ASGDConf",
Ejemplo n.º 19
0
    def optimization_algorithms(SCI_optimizer, cnn, LR, SCI_SGD_MOMENTUM,
                                REGULARIZATION):

        if type(SCI_optimizer) is str:
            if (SCI_optimizer == 'Adam'):
                optimizer = optim.Adam(cnn.parameters(),
                                       lr=LR,
                                       betas=(0.01, 0.999),
                                       weight_decay=REGULARIZATION)
            if (SCI_optimizer == 'AMSGrad'):
                optimizer = optim.Adam(cnn.parameters(),
                                       lr=LR,
                                       betas=(0.01, 0.999),
                                       weight_decay=REGULARIZATION,
                                       amsgrad=True)
            if (SCI_optimizer == 'AdamW'):
                optimizer = AdamW(cnn.parameters(),
                                  lr=LR,
                                  betas=(0.01, 0.999),
                                  weight_decay=REGULARIZATION)
            if (SCI_optimizer == 'RMSprop'):
                optimizer = optim.RMSprop(cnn.parameters(), lr=LR)
            #if (SCI_optimizer == 'SparseAdam') or (int(SCI_optimizer) == 4) :
            #optimizer = optim.SparseAdam(cnn.parameters(), lr=LR)
            if (SCI_optimizer == 'SGD'):
                optimizer = optim.SGD(cnn.parameters(),
                                      lr=LR,
                                      momentum=SCI_SGD_MOMENTUM,
                                      weight_decay=REGULARIZATION)
            if (SCI_optimizer == 'Adadelta'):
                optimizer = optim.Adadelta(cnn.parameters(),
                                           lr=LR,
                                           weight_decay=REGULARIZATION)
            if (SCI_optimizer == 'Rprop'):
                optimizer = optim.Rprop(cnn.parameters(), lr=LR)
            #if (SCI_optimizer == 'Adagrad') or (int(SCI_optimizer) == 7) :
            #    optimizer = optim.Adagrad(cnn.parameters(), lr=LR, weight_decay=REGULARIZATION)
            if (SCI_optimizer == 'Adamax'):
                optimizer = optim.Adamax(cnn.parameters(),
                                         lr=LR,
                                         weight_decay=REGULARIZATION)
            if (SCI_optimizer == 'ASGD'):
                optimizer = optim.ASGD(cnn.parameters(),
                                       lr=LR,
                                       weight_decay=REGULARIZATION)
            #if (SCI_optimizer == 'LBFGS') or (int(SCI_optimizer) == 10) :
            #optimizer = optim.LBFGS(cnn.parameters(), lr=LR)
        else:
            if (int(SCI_optimizer) == 1):
                optimizer = optim.Adam(cnn.parameters(),
                                       lr=LR,
                                       betas=(0.01, 0.999),
                                       weight_decay=REGULARIZATION)
            if (int(SCI_optimizer) == 2):
                optimizer = optim.Adam(cnn.parameters(),
                                       lr=LR,
                                       betas=(0.01, 0.999),
                                       weight_decay=REGULARIZATION,
                                       amsgrad=True)
            if (int(SCI_optimizer) == 3):
                optimizer = AdamW(cnn.parameters(),
                                  lr=LR,
                                  betas=(0.01, 0.999),
                                  weight_decay=REGULARIZATION)
            if (int(SCI_optimizer) == 4):
                optimizer = optim.RMSprop(cnn.parameters(), lr=LR)
            #if (SCI_optimizer == 'SparseAdam') or (int(SCI_optimizer) == 4) :
            #optimizer = optim.SparseAdam(cnn.parameters(), lr=LR)
            if (int(SCI_optimizer) == 5):
                optimizer = optim.SGD(cnn.parameters(),
                                      lr=LR,
                                      momentum=SCI_SGD_MOMENTUM,
                                      weight_decay=REGULARIZATION)
            if (int(SCI_optimizer) == 6):
                optimizer = optim.Adadelta(cnn.parameters(),
                                           lr=LR,
                                           weight_decay=REGULARIZATION)
            if (int(SCI_optimizer) == 7):
                optimizer = optim.Rprop(cnn.parameters(), lr=LR)
            #if (SCI_optimizer == 'Adagrad') or (int(SCI_optimizer) == 7) :
            #    optimizer = optim.Adagrad(cnn.parameters(), lr=LR, weight_decay=REGULARIZATION)
            if (int(SCI_optimizer) == 8):
                optimizer = optim.Adamax(cnn.parameters(),
                                         lr=LR,
                                         weight_decay=REGULARIZATION)
            if (int(SCI_optimizer) == 9):
                optimizer = optim.ASGD(cnn.parameters(),
                                       lr=LR,
                                       weight_decay=REGULARIZATION)
            #if (SCI_optimizer == 'LBFGS') or (int(SCI_optimizer) == 10) :
            #optimizer = optim.LBFGS(cnn.parameters(), lr=LR)

        return optimizer
Ejemplo n.º 20
0
def main(lr, batch_size, epoch, gpu, train_set, valid_set):

    # ------------- Part for tensorboard --------------
    writer = SummaryWriter(comment="_naive_DENET")
    # ------------- Part for tensorboard --------------

    # -------------- Some prepare ---------------------
    torch.backends.cudnn.enabled = True
    torch.cuda.set_device(gpu)
    # torch.set_default_tensor_type('torch.cuda.FloatTensor')
    # -------------- Some prepare ---------------------

    BATCH_SIZE = batch_size
    EPOCH = epoch

    LEARNING_RATE = lr
    belta1 = 0.9
    belta2 = 0.999

    trainset = mydataset(train_set, transform_train)
    valset = mydataset(valid_set)
    trainLoader = torch.utils.data.DataLoader(trainset,
                                              batch_size=BATCH_SIZE,
                                              shuffle=True)
    valLoader = torch.utils.data.DataLoader(valset,
                                            batch_size=1,
                                            shuffle=False)

    opter = Opter(128, 128, batch_size)

    SepConvNet = Network(opter).cuda()
    SepConvNet.apply(weights_init)
    # SepConvNet.load_state_dict(torch.load('/mnt/hdd/xiasifeng/sepconv/sepconv_mutiscale_LD/SepConv_iter33-ltype_fSATD_fs-lr_0.001-trainloss_0.1497-evalloss_0.1357-evalpsnr_29.6497.pkl'))

    # SepConvNet_cost = nn.MSELoss().cuda()
    # SepConvNet_cost = nn.L1Loss().cuda()
    SepConvNet_cost = sepconv.SATDLoss().cuda()
    SepConvNet_optimizer = optim.Adamax(SepConvNet.parameters(),
                                        lr=LEARNING_RATE,
                                        betas=(belta1, belta2))
    SepConvNet_schedule = optim.lr_scheduler.ReduceLROnPlateau(
        SepConvNet_optimizer,
        factor=0.1,
        patience=3,
        verbose=True,
        min_lr=1e-5)

    # ----------------  Time part -------------------
    start_time = time.time()
    global_step = 0
    # ----------------  Time part -------------------

    for epoch in range(0, EPOCH):
        SepConvNet.train().cuda()
        cnt = 0
        sumloss = 0.0  # The sumloss is for the whole training_set
        tsumloss = 0.0  # The tsumloss is for the printinterval
        printinterval = 300
        print("---------------[Epoch%3d]---------------" % (epoch + 1))
        for imgL, imgR, label in trainLoader:
            global_step = global_step + 1
            cnt = cnt + 1
            SepConvNet_optimizer.zero_grad()

            imgL = var(imgL).cuda()
            imgR = var(imgR).cuda()
            label = var(label).cuda()

            output = SepConvNet(imgL, imgR)
            loss = SepConvNet_cost(output, label)
            loss.backward()
            SepConvNet_optimizer.step()
            sumloss = sumloss + loss.data.item()
            tsumloss = tsumloss + loss.data.item()

            if cnt % printinterval == 0:
                writer.add_image("Prev image", imgR[0], cnt)
                writer.add_image("Pred image", output[0], cnt)
                writer.add_scalar('Train Batch SATD loss', loss.data.item(),
                                  int(global_step / printinterval))
                writer.add_scalar('Train Interval SATD loss',
                                  tsumloss / printinterval,
                                  int(global_step / printinterval))
                print(
                    'Epoch [%d/%d], Iter [%d/%d], Time [%4.4f], Batch loss [%.6f], Interval loss [%.6f]'
                    % (epoch + 1, EPOCH, cnt, len(trainset) // BATCH_SIZE,
                       time.time() - start_time, loss.data.item(),
                       tsumloss / printinterval))
                tsumloss = 0.0
        print('Epoch [%d/%d], iter: %d, Time [%4.4f], Avg Loss [%.6f]' %
              (epoch + 1, EPOCH, cnt, time.time() - start_time, sumloss / cnt))

        # ---------------- Part for validation ----------------
        trainloss = sumloss / cnt
        SepConvNet.eval().cuda()
        evalcnt = 0
        pos = 0.0
        sumloss = 0.0
        psnr = 0.0
        for imgL, imgR, label in valLoader:
            imgL = var(imgL).cuda()
            imgR = var(imgR).cuda()
            label = var(label).cuda()

            with torch.no_grad():
                output = SepConvNet(imgL, imgR)
                loss = SepConvNet_cost(output, label)
                sumloss = sumloss + loss.data.item()
                psnr = psnr + calcPSNR.calcPSNR(output.cpu().data.numpy(),
                                                label.cpu().data.numpy())
                evalcnt = evalcnt + 1
        # ------------- Tensorboard part -------------
        writer.add_scalar("Valid SATD loss", sumloss / evalcnt, epoch)
        writer.add_scalar("Valid PSNR", psnr / valset.__len__(), epoch)
        # ------------- Tensorboard part -------------
        print('Validation loss [%.6f],  Average PSNR [%.4f]' %
              (sumloss / evalcnt, psnr / valset.__len__()))
        SepConvNet_schedule.step(psnr / valset.__len__())
        torch.save(
            SepConvNet.state_dict(),
            os.path.join(
                '.', 'naive_DENET_iter' + str(epoch + 1) + '-ltype_fSATD_fs' +
                '-lr_' + str(LEARNING_RATE) + '-trainloss_' +
                str(round(trainloss, 4)) + '-evalloss_' +
                str(round(sumloss / evalcnt, 4)) + '-evalpsnr_' +
                str(round(psnr / valset.__len__(), 4)) + '.pkl'))
    writer.close()
Ejemplo n.º 21
0
    for key, value in dict(model.named_parameters()).items():
        if value.requires_grad:
            if 'cnn' in key:
                params += [{'params':[value], 'lr':opt.cnn_learning_rate,
                        'weight_decay':opt.cnn_weight_decay, 'betas':(opt.cnn_optim_alpha, opt.cnn_optim_beta)}]
            else:
                params += [{'params':[value], 'lr':opt.learning_rate, 
                    'weight_decay':opt.weight_decay, 'betas':(opt.optim_alpha, opt.optim_beta)}]

    print("Use %s as optmization method" %(opt.optim))
    if opt.optim == 'sgd':
        optimizer = optim.SGD(params, momentum=0.9)
    elif opt.optim == 'adam':
        optimizer = optim.Adam(params)
    elif opt.optim == 'adamax':
    	optimizer = optim.Adamax(params)

    # if opt.cnn_optim == 'sgd':
    #     cnn_optimizer = optim.SGD(cnn_params, momentum=0.9)
    # else:
    #     cnn_optimizer = optim.Adam(cnn_params)
    # load optimizer
    # learning_rate_list = np.linspace(opt.learning_rate, 0.0005, opt.max_epochs)

    for epoch in range(start_epoch, opt.max_epochs):
        if epoch > opt.learning_rate_decay_start and opt.learning_rate_decay_start >= 0:
            if (epoch - opt.learning_rate_decay_start) % opt.learning_rate_decay_every == 0:
                # decay the learning rate.
                utils.set_lr(optimizer, opt.learning_rate_decay_rate)
                opt.learning_rate  = opt.learning_rate * opt.learning_rate_decay_rate
Ejemplo n.º 22
0
def main(lr, batch_size, epoch, gpu, train_set, valid_set):
    # ------------- Part for tensorboard --------------
    # writer = SummaryWriter(log_dir='tb/LSTM_ft1')
    # ------------- Part for tensorboard --------------
    torch.backends.cudnn.enabled = True
    torch.cuda.set_device(gpu)

    BATCH_SIZE = batch_size
    EPOCH = epoch

    LEARNING_RATE = lr
    belta1 = 0.9
    belta2 = 0.999

    trainset = vimeodataset(train_set, 'filelist.txt', transform_train)
    valset = vimeodataset(valid_set, 'test.txt')
    trainLoader = torch.utils.data.DataLoader(trainset,
                                              batch_size=BATCH_SIZE,
                                              shuffle=True)
    valLoader = torch.utils.data.DataLoader(valset,
                                            batch_size=BATCH_SIZE,
                                            shuffle=False)
    assert (len(valset) % BATCH_SIZE == 0)

    SepConvNet = Network().cuda()
    # SepConvNet.apply(weights_init)
    SepConvNet.load_my_state_dict(
        torch.load(
            'ft2_baseline_iter86-ltype_fSATD_fs-lr_0.001-trainloss_0.1249-evalloss_0.1155-evalpsnr_29.9327.pkl',
            map_location='cuda:%d' % (gpu)))
    # SepConvNet.load_state_dict(torch.load('beta_LSTM_iter8-ltype_fSATD_fs-lr_0.001-trainloss_0.557-evalloss_0.1165-evalpsnr_29.8361.pkl'))

    # MSE_cost = nn.MSELoss().cuda()
    # SepConvNet_cost = nn.L1Loss().cuda()
    SepConvNet_cost = sepconv.SATDLoss().cuda()
    SepConvNet_optimizer = optim.Adamax(SepConvNet.parameters(),
                                        lr=LEARNING_RATE,
                                        betas=(belta1, belta2))
    SepConvNet_schedule = optim.lr_scheduler.ReduceLROnPlateau(
        SepConvNet_optimizer, factor=0.1, patience=3, verbose=True)

    # ----------------  Time part -------------------
    start_time = time.time()
    global_step = 0
    # ----------------  Time part -------------------

    # ---------------- Opt part -----------------------
    # opter = Opter(gpu)
    # -------------------------------------------------

    for epoch in range(0, EPOCH):
        SepConvNet.train().cuda()
        cnt = 0
        sumloss = 0.0  # The sumloss is for the whole training_set
        tsumloss = 0.0  # The tsumloss is for the printinterval
        printinterval = 100
        print("---------------[Epoch%3d]---------------" % (epoch + 1))
        for label_list in trainLoader:
            bad_list = label_list[7:]
            label_list = label_list[:7]
            # IPython.embed()
            # exit()
            global_step = global_step + 1
            cnt = cnt + 1
            for i in range(5):
                imgL = var(bad_list[i]).cuda()
                imgR = var(bad_list[i + 1]).cuda()
                label = var(label_list[i + 2]).cuda()
                poor_label = var(bad_list[i + 2]).cuda()
                label_L = var(label_list[i]).cuda()
                SepConvNet_optimizer.zero_grad()

                if i == 0:
                    output_f, output_b, stat = SepConvNet(imgL, imgR)
                else:
                    output_f, output_b, stat = SepConvNet(
                        imgL, imgR, res_f, res_b, stat)

                res_f = poor_label - output_f
                res_b = imgL - output_b

                loss = 0.5 * SepConvNet_cost(output_f,
                                             label) + 0.5 * SepConvNet_cost(
                                                 output_b, label_L)

                if i < 4:
                    loss.backward(retain_graph=True)
                else:
                    loss.backward()

                SepConvNet_optimizer.step()

                sumloss = sumloss + loss.data.item()
                tsumloss = tsumloss + loss.data.item()

            if cnt % printinterval == 0:
                print(
                    'Epoch [%d/%d], Iter [%d/%d], Time [%4.4f], Batch loss [%.6f], Interval loss [%.6f]'
                    % (epoch + 1, EPOCH, cnt, len(trainset) // BATCH_SIZE,
                       time.time() - start_time, loss.data.item(),
                       tsumloss / printinterval / 5))
                tsumloss = 0.0
        print('Epoch [%d/%d], iter: %d, Time [%4.4f], Avg Loss [%.6f]' %
              (epoch + 1, EPOCH, cnt, time.time() - start_time,
               sumloss / cnt / 5))

        # ---------------- Part for validation ----------------
        trainloss = sumloss / cnt
        SepConvNet.eval().cuda()
        evalcnt = 0
        pos = 0.0
        sumloss = 0.0
        psnr = 0.0
        for label_list in valLoader:

            bad_list = label_list[7:]
            label_list = label_list[:7]
            with torch.no_grad():
                for i in range(5):

                    imgL = var(bad_list[i]).cuda()
                    imgR = var(bad_list[i + 1]).cuda()
                    label = var(label_list[i + 2]).cuda()
                    poor_label = var(bad_list[i + 2]).cuda()
                    label_L = var(label_list[i]).cuda()

                    if i == 0:
                        output_f, output_b, stat = SepConvNet(imgL, imgR)
                    else:
                        output_f, output_b, stat = SepConvNet(
                            imgL, imgR, res_f, res_b, stat)

                    psnr = psnr + calcPSNR.calcPSNR(
                        output_f.cpu().data.numpy(),
                        label.cpu().data.numpy())
                    res_f = poor_label - output_f
                    res_b = label_L - output_b

                    loss = SepConvNet_cost(output_f, label)
                    sumloss = sumloss + loss.data.item()

                evalcnt = evalcnt + 5

        # ------------- Tensorboard part -------------
        # writer.add_scalar("Valid SATD loss", sumloss / evalcnt, epoch)
        # writer.add_scalar("Valid PSNR", psnr / valset.__len__(), epoch)
        # ------------- Tensorboard part -------------
        print('Validation loss [%.6f],  Average PSNR [%.4f]' %
              (sumloss / evalcnt, psnr / evalcnt))
        SepConvNet_schedule.step(psnr / evalcnt)
        torch.save(
            SepConvNet.state_dict(),
            os.path.join(
                '.', 'minidual_LSTM_iter' + str(epoch + 1) +
                '-ltype_fSATD_fs' + '-lr_' + str(LEARNING_RATE) +
                '-trainloss_' + str(round(trainloss, 4)) + '-evalloss_' +
                str(round(sumloss / evalcnt, 4)) + '-evalpsnr_' +
                str(round(psnr / evalcnt, 4)) + '.pkl'))
Ejemplo n.º 23
0
def lr_lambda_fun(current_iteration: int) -> float:
    """Returns a learning rate multiplier.

    Till `warmup_epochs`, learning rate linearly increases to `initial_lr`,
    and then gets multiplied by `lr_gamma` every time a milestone is crossed.
    """
    current_epoch = float(current_iteration) / iterations
    if current_epoch < config["solver"]["warmup_epochs"]:
        alpha = current_epoch / float(config["solver"]["warmup_epochs"])
        return config["solver"]["warmup_factor"] * (1.0 - alpha) + alpha
    else:
        idx = bisect(config["solver"]["lr_milestones"], current_epoch)
        return pow(config["solver"]["lr_gamma"], idx)


optimizer = optim.Adamax(model.parameters(), lr=config["solver"]["initial_lr"])
scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lr_lambda_fun)  # 可以在一个组里面调节lr参数

# =============================================================================
#   SETUP BEFORE TRAINING LOOP
# =============================================================================
start_time = datetime.datetime.strftime(datetime.datetime.utcnow(), '%d-%b-%Y-%H:%M:%S')
checkpoint_dirpath = args.save_dirpath
if checkpoint_dirpath == 'checkpoints/':
    checkpoint_dirpath += '%s+%s/%s' % (config["model"]["encoder"], config["model"]["decoder"], start_time)
if args.save_model:
    summary_writer = SummaryWriter(log_dir=checkpoint_dirpath)
    checkpoint_manager = CheckpointManager(model, optimizer, checkpoint_dirpath, config=config)

sparse_metrics = SparseGTMetrics()
ndcg = NDCG()
Ejemplo n.º 24
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('--data', default='./data/')
    parser.add_argument('--epoch', type=int, default=10000)
    parser.add_argument('--batch_size', type=int, default=20)
    parser.add_argument('--lr', type=float, default=0.001)
    parser.add_argument('--embed_dim', type=int, default=300)
    parser.add_argument('--hidden_dim', type=int, default=150)
    parser.add_argument('--num_layers', type=int, default=1)
    parser.add_argument('--bidirectional', default=True)
    parser.add_argument('--glove', default='../Data/glove/glove.840B.300d.txt')
    parser.add_argument('--cuda_set', default=True)

    args = parser.parse_args()

    ###############################################################################
    # Load data
    ###############################################################################

    print("Load data file...")
    train_data, dev_data = load_data(args.data)

    print("Preparing batch loader...")
    print("============= Train ===============")
    train_loader = BatchLoader(train_data, 'train', args.cuda_set,
                               args.batch_size)
    print("============= Valid ===============")
    dev_loader = BatchLoader(dev_data, 'dev', args.cuda_set, args.batch_size)

    # vocabulary set
    vocab_size = len(dev_data['word2idx'])
    print("============= Vocab Size ===============")
    print(vocab_size)
    print("")

    idx2word = dev_data['idx2word']

    ###############################################################################
    # Build the model
    ###############################################################################
    cuda.set_device(0)
    if args.cuda_set == True:
        model = MatchNet(vocab_size, args.embed_dim, args.hidden_dim,
                         args.cuda_set, args.num_layers,
                         args.bidirectional).cuda()
        criterion = nn.NLLLoss().cuda()
    else:
        model = MatchNet(vocab_size, args.embed_dim, args.hidden_dim,
                         args.cuda_set, args.num_layers, args.bidirectional)
        criterion = nn.CrossEntropyLoss()

    optimizer = optim.Adamax(model.parameters(),
                             lr=args.lr,
                             betas=(0.9, 0.999))

    print("#" * 15, "Model Info", "#" * 15)
    print("Model: ", model)
    print("Criterion: ", criterion)
    print("Optimizer: ", optimizer)
    print("")

    ###############################################################################
    # Load the pretrained word embedding
    ###############################################################################

    print("loading pretrinaed word embedding ...")
    emb_file = os.path.join(args.data, 'glove_emb.pth')

    if os.path.isfile(emb_file):
        W_emb = torch.load(emb_file)
    else:
        W_emb, embed_dim = load_pretrained_embedding(dev_data['word2idx'],
                                                     args.glove)
        W_emb = torch.from_numpy(W_emb).cuda()
        torch.save(W_emb, emb_file)

    if args.cuda_set:
        W_emb = W_emb.cuda()

    model.embed.embed.state_dict()['weight'].copy_(W_emb)
    model.embed.embed.state_dict()['weight'].requires_grad = False

    ###############################################################################
    # Training
    ###############################################################################

    for epoch in range(args.epoch):
        start_time = time.time()
        train_loss = AverageMeter()
        train_acc = AverageMeter()

        message = "Epoch: %d training.." % (epoch)
        print(message)
        print("")

        for i, data in enumerate(train_loader):
            model.train()

            doc = data[0]
            qry = data[1]
            anss = data[2]
            sis = data[3]
            eis = data[4]
            ids = data[5]
            dm = data[9]
            qm = data[10]

            output1, output2 = model(doc, qry, dm, qm)

            _, pred1 = output1.data.cpu().topk(1)
            _, pred2 = output2.data.cpu().topk(1)

            loss1 = criterion(output1, sis)
            loss2 = criterion(output2, eis)
            loss = loss1 + loss2

            train_loss.update(loss.data[0], doc.size(0))

            acc_tmp = accuracy(pred1.numpy(), tensor2np(sis), pred2.numpy(),
                               tensor2np(eis), ids)
            train_acc.update(acc_tmp, doc.size(0))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        print(
            "===================        Train         ======================")
        print("doc_len: ", doc.size(1))
        random_idx = randomChoice(doc.size(0))
        show_question(random_idx, doc, qry, dm, qm, anss, idx2word)
        show_answer(random_idx, doc, qry, pred1, pred2, sis, eis, idx2word)
        print("")

        message = "Train epoch: %d  iter: %d  train_loss: %1.3f  train_acc: %1.3f  elapsed: %1.3f " % (
            epoch, i, train_loss.avg, train_acc.avg, time.time() - start_time)
        print(message)
        print("")

        ###############################################################################
        # Validation
        ###############################################################################

        print(
            "====================      Evaluation     ======================")

        val_acc = AverageMeter()
        start_time = time.time()
        model.eval()

        cor_cnt = 0
        incor_cnt = 0
        pad_cnt = 0
        val_out = 0
        val_in = 0
        val_false = 0

        for j, data in enumerate(dev_loader):
            doc = data[0]
            qry = data[1]
            anss = data[2]
            sis = data[3]
            eis = data[4]
            ids = data[5]
            dm = data[9]
            qm = data[10]

            output1, output2 = model(doc, qry, dm, qm)

            _, val_pred1 = output1.data.cpu().topk(1)
            _, val_pred2 = output2.data.cpu().topk(1)

            acc_tmp = accuracy_dev(val_pred1.numpy(), sis, val_pred2.numpy(),
                                   eis, ids)
            val_acc.update(acc_tmp, doc.size(0))

        message = "Epoch: %d train_iter: %d iter: %d  val_acc: %1.3f  elapsed: %1.3f " % (
            epoch, i, j, val_acc.avg, time.time() - start_time)
        print(message)
        print("")

        ###############################################################################
        # Show the sample Q&A
        ###############################################################################

        random_idx = randomChoice(doc.size(0))
        show_question(random_idx, doc, qry, dm, qm, anss, idx2word)
        show_answer_dev(random_idx, doc, qry, val_pred1, val_pred2, sis, eis,
                        idx2word)

        train_loss = AverageMeter()
        train_acc = AverageMeter()
        start_time = time.time()
Ejemplo n.º 25
0
def train_model(opt_):
    env = TrainEnvironment(opt_)
    dictionary = env.dict
    if opt_.load_checkpoint:
        net, dictionary = load_model(opt_.load_checkpoint, opt_)
        env = TrainEnvironment(opt_, dictionary)
        env.dict = dictionary
    else:
        net = create_model(opt_, dictionary["words"])
        if opt_.embeddings and opt_.embeddings != "None":
            load_embeddings(opt_, dictionary["words"], net)
    paramnum = 0
    trainable = 0
    for name, parameter in net.named_parameters():
        if parameter.requires_grad:
            trainable += parameter.numel()
        paramnum += parameter.numel()
    print("TRAINABLE", paramnum, trainable)
    if opt_.cuda:
        net = torch.nn.DataParallel(net)
        net = net.cuda()
    if opt_.optimizer == "adamax":
        lr = opt_.learning_rate or 0.002
        named_params_to_optimize = filter(lambda p: p[1].requires_grad,
                                          net.named_parameters())
        params_to_optimize = (p[1] for p in named_params_to_optimize)
        optimizer = optim.Adamax(params_to_optimize, lr=lr)
        if opt_.epoch_start != 0:
            saved_params = torch.load(
                opt_.load_checkpoint,
                map_location=lambda storage, loc: storage)
            optimizer.load_state_dict(saved_params["optim_dict"])
    else:
        lr = opt_.learning_rate or 0.01
        optimizer = optim.SGD(filter(lambda p: p.requires_grad,
                                     net.parameters()),
                              lr=lr)
    start_time = time.time()
    # best_loss = float("+inf")
    best_loss = 0
    test_data_shuffled = env.build_valid_dataloader(True)
    test_data_not_shuffled = env.build_valid_dataloader(False)
    with torch.no_grad():
        validate(
            0,
            net,
            test_data_shuffled,
            nb_candidates=opt_.hits_at_nb_cands,
            shuffled_str="shuffled",
        )
    train_data = None
    for epoch in range(opt_.epoch_start, opt_.num_epochs):
        if train_data is None or opt_.dataset_name == "reddit":
            train_data = env.build_train_dataloader(epoch)
        train(epoch, start_time, net, optimizer, opt_, train_data)
        with torch.no_grad():
            # We compute the loss both for shuffled and not shuffled case.
            # however, the loss that determines if the model is better is the
            # same as the one used for training.
            loss_shuffled = validate(
                epoch,
                net,
                test_data_shuffled,
                nb_candidates=opt_.hits_at_nb_cands,
                shuffled_str="shuffled",
            )
            loss_not_shuffled = validate(
                epoch,
                net,
                test_data_not_shuffled,
                nb_candidates=opt_.hits_at_nb_cands,
                shuffled_str="not-shuffled",
            )
            if opt_.no_shuffle:
                loss = loss_not_shuffled
            else:
                loss = loss_shuffled
            # if loss < best_loss:
            # ========= change loss to P@1,100 =======
            if loss > best_loss:
                best_loss = loss
                best_loss_epoch = epoch
                # logging.info(f"New best loss, saving model to {opt_.model_file}")
                logging.info(
                    f"New best P@1,100, saving model to {opt_.model_file}")
                save_model(opt_.model_file, net, dictionary, optimizer)
            # Stop if it's been too many epochs since the loss has decreased
            if opt_.stop_crit_num_epochs != -1:
                if epoch - best_loss_epoch >= opt_.stop_crit_num_epochs:
                    break
    return net, dictionary
Ejemplo n.º 26
0
def train(model, train_loader, test_loader, gen_loader, configs):
    model.train()
    # optimizer, it's better to set up lr for some modules separately so that the whole training become more stable
    params = [{
        'params': model.reader.parameters(),
        'lr': 0.2 * configs.lr
    }, {
        'params': model.h_mean.parameters(),
        'lr': 0.1 * configs.lr
    }, {
        'params': model.h_var.parameters(),
        'lr': 0.1 * configs.lr
    }, {
        'params': model.writer.parameters()
    }, {
        'params': model.pos_dist.parameters()
    }, {
        'params': model.combine.parameters()
    }, {
        'params': model.describe.parameters()
    }, {
        'params': model.box_vae.parameters(),
        'lr': 10 * configs.lr
    }, {
        'params': model.offset_vae.parameters(),
        'lr': 10 * configs.lr
    }, {
        'params': model.renderer.parameters()
    }, {
        'params': model.bias_mean.parameters()
    }, {
        'params': model.bias_var.parameters()
    }]
    if configs.net == 'PNP':
        params.append({'params': model.vis_dist.parameters()})
    elif configs.net == 'SIMPLE':
        pass
    else:
        raise ValueError('configs.net ?= ', configs.net, 'not a valid value')
    optimizer = optim.Adamax(params, lr=configs.lr)

    model.cuda()

    trainer = PNPNetTrainer(model=model,
                            train_loader=train_loader,
                            val_loader=test_loader,
                            gen_loader=gen_loader,
                            optimizer=optimizer,
                            configs=configs)

    minloss = 1000
    for epoch_num in range(0, configs.epochs + 1):
        timestamp_start = datetime.datetime.now(
            pytz.timezone('America/New_York'))
        trainer.train_epoch(epoch_num, timestamp_start)
        if epoch_num % configs.validate_interval == 0 and epoch_num > 0:
            minloss = trainer.validate(epoch_num, timestamp_start, minloss)
        if epoch_num % configs.sample_interval == 0 and epoch_num > 0:
            trainer.sample(epoch_num,
                           sample_num=8,
                           timestamp_start=timestamp_start)
        if epoch_num % configs.save_interval == 0 and epoch_num > 0:
            torch.save(
                model.state_dict(),
                osp.join(configs.exp_dir, 'checkpoints',
                         'model_epoch_{0}.pth'.format(epoch_num)))
Ejemplo n.º 27
0
    if args.cuda != -1:
        rnn = rnn.cuda(args.cuda)

    print(rnn)

    last_save_losses = []

    if args.optim == 'adam':
        optimizer = optim.Adam(rnn.parameters(),
                               lr=args.lr,
                               eps=1e-9,
                               betas=[0.9, 0.98])  # 0.0001
    elif args.optim == 'adamax':
        optimizer = optim.Adamax(rnn.parameters(),
                                 lr=args.lr,
                                 eps=1e-9,
                                 betas=[0.9, 0.98])  # 0.0001
    elif args.optim == 'rmsprop':
        optimizer = optim.RMSprop(rnn.parameters(),
                                  lr=args.lr,
                                  momentum=0.9,
                                  eps=1e-10)  # 0.0001
    elif args.optim == 'sgd':
        optimizer = optim.SGD(rnn.parameters(), lr=args.lr)  # 0.01
    elif args.optim == 'adagrad':
        optimizer = optim.Adagrad(rnn.parameters(), lr=args.lr)
    elif args.optim == 'adadelta':
        optimizer = optim.Adadelta(rnn.parameters(), lr=args.lr)

    last_100_losses = []
Ejemplo n.º 28
0
def main(
    dataset,
    dataroot,
    download,
    augment,
    batch_size,
    eval_batch_size,
    epochs,
    saved_model,
    seed,
    hidden_channels,
    K,
    L,
    actnorm_scale,
    flow_permutation,
    flow_coupling,
    LU_decomposed,
    learn_top,
    y_condition,
    y_weight,
    max_grad_clip,
    max_grad_norm,
    lr,
    n_workers,
    cuda,
    n_init_batches,
    output_dir,
    saved_optimizer,
    warmup,
):

    device = "cpu" if (not torch.cuda.is_available() or not cuda) else "cuda:0"

    check_manual_seed(seed)

    ds = check_dataset(dataset, dataroot, augment, download)
    image_shape, num_classes, train_dataset, test_dataset = ds

    # Note: unsupported for now
    multi_class = False

    train_loader = data.DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=n_workers,
        drop_last=True,
    )
    test_loader = data.DataLoader(
        test_dataset,
        batch_size=eval_batch_size,
        shuffle=False,
        num_workers=n_workers,
        drop_last=False,
    )

    model = Glow(
        image_shape,
        hidden_channels,
        K,
        L,
        actnorm_scale,
        flow_permutation,
        flow_coupling,
        LU_decomposed,
        num_classes,
        learn_top,
        y_condition,
    )

    model = model.to(device)
    optimizer = optim.Adamax(model.parameters(), lr=lr, weight_decay=5e-5)

    lr_lambda = lambda epoch: min(1.0, (epoch + 1) / warmup)  # noqa
    scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer,
                                                  lr_lambda=lr_lambda)

    def step(engine, batch):
        model.train()
        optimizer.zero_grad()

        x, y = batch
        x = x.to(device)

        if y_condition:
            y = y.to(device)
            z, nll, y_logits = model(x, y)
            losses = compute_loss_y(nll, y_logits, y_weight, y, multi_class)
        else:
            z, nll, y_logits = model(x, None)
            losses = compute_loss(nll)

        losses["total_loss"].backward()

        if max_grad_clip > 0:
            torch.nn.utils.clip_grad_value_(model.parameters(), max_grad_clip)
        if max_grad_norm > 0:
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)

        optimizer.step()

        return losses

    def eval_step(engine, batch):
        model.eval()

        x, y = batch
        x = x.to(device)

        with torch.no_grad():
            if y_condition:
                y = y.to(device)
                z, nll, y_logits = model(x, y)
                losses = compute_loss_y(nll,
                                        y_logits,
                                        y_weight,
                                        y,
                                        multi_class,
                                        reduction="none")
            else:
                z, nll, y_logits = model(x, None)
                losses = compute_loss(nll, reduction="none")

        return losses

    trainer = Engine(step)
    checkpoint_handler = ModelCheckpoint(output_dir,
                                         "glow",
                                         n_saved=2,
                                         require_empty=False)

    trainer.add_event_handler(
        Events.EPOCH_COMPLETED,
        checkpoint_handler,
        {
            "model": model,
            "optimizer": optimizer
        },
    )

    monitoring_metrics = ["total_loss"]
    RunningAverage(output_transform=lambda x: x["total_loss"]).attach(
        trainer, "total_loss")

    evaluator = Engine(eval_step)

    # Note: replace by https://github.com/pytorch/ignite/pull/524 when released
    Loss(
        lambda x, y: torch.mean(x),
        output_transform=lambda x: (
            x["total_loss"],
            torch.empty(x["total_loss"].shape[0]),
        ),
    ).attach(evaluator, "total_loss")

    if y_condition:
        monitoring_metrics.extend(["nll"])
        RunningAverage(output_transform=lambda x: x["nll"]).attach(
            trainer, "nll")

        # Note: replace by https://github.com/pytorch/ignite/pull/524 when released
        Loss(
            lambda x, y: torch.mean(x),
            output_transform=lambda x:
            (x["nll"], torch.empty(x["nll"].shape[0])),
        ).attach(evaluator, "nll")

    pbar = ProgressBar()
    pbar.attach(trainer, metric_names=monitoring_metrics)

    # load pre-trained model if given
    if saved_model:
        model.load_state_dict(torch.load(saved_model))
        model.set_actnorm_init()

        if saved_optimizer:
            optimizer.load_state_dict(torch.load(saved_optimizer))

        file_name, ext = os.path.splitext(saved_model)
        resume_epoch = int(file_name.split("_")[-1])

        @trainer.on(Events.STARTED)
        def resume_training(engine):
            engine.state.epoch = resume_epoch
            engine.state.iteration = resume_epoch * len(
                engine.state.dataloader)

    @trainer.on(Events.STARTED)
    def init(engine):
        model.train()

        init_batches = []
        init_targets = []

        with torch.no_grad():
            for batch, target in islice(train_loader, None, n_init_batches):
                init_batches.append(batch)
                init_targets.append(target)

            init_batches = torch.cat(init_batches).to(device)

            assert init_batches.shape[0] == n_init_batches * batch_size

            if y_condition:
                init_targets = torch.cat(init_targets).to(device)
            else:
                init_targets = None

            model(init_batches, init_targets)

    @trainer.on(Events.EPOCH_COMPLETED)
    def evaluate(engine):
        evaluator.run(test_loader)

        scheduler.step()
        metrics = evaluator.state.metrics

        losses = ", ".join(
            [f"{key}: {value:.2f}" for key, value in metrics.items()])

        print(f"Validation Results - Epoch: {engine.state.epoch} {losses}")

    timer = Timer(average=True)
    timer.attach(
        trainer,
        start=Events.EPOCH_STARTED,
        resume=Events.ITERATION_STARTED,
        pause=Events.ITERATION_COMPLETED,
        step=Events.ITERATION_COMPLETED,
    )

    @trainer.on(Events.EPOCH_COMPLETED)
    def print_times(engine):
        pbar.log_message(
            f"Epoch {engine.state.epoch} done. Time per batch: {timer.value():.3f}[s]"
        )
        timer.reset()

    trainer.run(train_loader, epochs)
Ejemplo n.º 29
0
                       weight_decay=5e-4)  # optimizrにMomentumSGDを指定
optKind2 = 'MomentumSGD'

network3 = CNNNet().to(device)  # networkにさっき定義したnetworkを代入
optimizer3 = optim.Adam(network3.parameters(), lr=0.001)  # optimizrにAdamを指定
optKind3 = 'Adam'

network4 = CNNNet().to(device)  # networkにさっき定義したnetworkを代入
optimizer4 = optim.RMSprop(network4.parameters(), lr=0.0005,
                           eps=1e-06)  # optimizrにRMSpropを指定
optKind4 = 'RMSprop'

network5 = CNNNet().to(device)  # networkにさっき定義したnetworkを代入
optimizer5 = optim.Adamax(network5.parameters(),
                          lr=0.002,
                          betas=(0.9, 0.999),
                          eps=1e-08,
                          weight_decay=0)  # optimizrにAdamaxを指定
optKind5 = 'Adamax'



times, test_acc_list, train_loss_lists, train_acc_lists, val_loss_lists, val_acc_lists = [], [], [], [], [], []
Optims = [optKind1, optKind2, optKind3, optKind4, optKind5]
Optimizers = [optimizer1, optimizer2, optimizer3, optimizer4, optimizer5]
networks = [network1, network2, network3, network4, network5]

for i in range(len(Optims)):
    network = networks[i]
    optKind = Optims[i]
    optimizer = Optimizers[i]
Ejemplo n.º 30
0
def train(data, save_model_dir, seg=True):

    print("Training with {} model.".format(data.model_type))

    #data.show_data_summary()


    model = SeqModel(data)
    print( "finish building model.")

    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = optim.Adamax(parameters, lr=data.HP_lr)

    best_dev = -1
    best_dev_p = -1
    best_dev_r = -1

    best_test = -1
    best_test_p = -1
    best_test_r = -1


    ## start training
    for idx in range(data.HP_iteration):
        epoch_start = time.time()
        temp_start = epoch_start
        print(("Epoch: %s/%s" %(idx,data.HP_iteration)))
        optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr)
        instance_count = 0
        sample_loss = 0
        batch_loss = 0
        total_loss = 0
        right_token = 0
        whole_token = 0
        random.shuffle(data.train_Ids)
        ## set model in train model
        model.train()
        model.zero_grad()
        batch_size = data.HP_batch_size
        batch_id = 0
        train_num = len(data.train_Ids)
        total_batch = train_num//batch_size+1

        for batch_id in range(total_batch):
            start = batch_id*batch_size
            end = (batch_id+1)*batch_size
            if end >train_num:
                end = train_num
            instance = data.train_Ids[start:end]
            words = data.train_texts[start:end]
            if not instance:
                continue

            gaz_list,  batch_word, batch_biword, batch_wordlen, batch_label, layer_gaz, gaz_count, gaz_chars, gaz_mask, gazchar_mask, mask, batch_bert, bert_mask = batchify_with_label(instance, data.HP_gpu,data.HP_num_layer)

            instance_count += 1
            loss, tag_seq = model.neg_log_likelihood_loss(gaz_list, batch_word, batch_biword, batch_wordlen, layer_gaz, gaz_count,gaz_chars, gaz_mask, gazchar_mask, mask, batch_label, batch_bert, bert_mask)

            right, whole = predict_check(tag_seq, batch_label, mask)
            right_token += right
            whole_token += whole
            sample_loss += loss.data
            total_loss += loss.data
            batch_loss += loss

            if end%500 == 0:
                temp_time = time.time()
                temp_cost = temp_time - temp_start
                temp_start = temp_time
                print(("     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"%(end, temp_cost, sample_loss, right_token, whole_token,(right_token+0.)/whole_token)))
                sys.stdout.flush()
                sample_loss = 0
            if end%data.HP_batch_size == 0:
                batch_loss.backward()
                optimizer.step()
                model.zero_grad()
                batch_loss = 0

        temp_time = time.time()
        temp_cost = temp_time - temp_start
        print(("     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"%(end, temp_cost, sample_loss, right_token, whole_token,(right_token+0.)/whole_token))       )
        epoch_finish = time.time()
        epoch_cost = epoch_finish - epoch_start
        print(("Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s,  total loss: %s"%(idx, epoch_cost, train_num/epoch_cost, total_loss)))

        speed, acc, p, r, f, pred_labels, gazs = evaluate(data, model, "dev")
        dev_finish = time.time()
        dev_cost = dev_finish - epoch_finish

        if seg:
            current_score = f
            print(("Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"%(dev_cost, speed, acc, p, r, f)))
        else:
            current_score = acc
            print(("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f"%(dev_cost, speed, acc)))

        if current_score > best_dev:
            if seg:
                print( "Exceed previous best f score:", best_dev)

            else:
                print( "Exceed previous best acc score:", best_dev)

            model_name = save_model_dir
            torch.save(model.state_dict(), model_name)
            #best_dev = current_score
            best_dev_p = p
            best_dev_r = r

        # ## decode test
        speed, acc, p, r, f, pred_labels, gazs = evaluate(data, model, "test")
        test_finish = time.time()
        test_cost = test_finish - dev_finish
        if seg:
            current_test_score = f
            print(("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"%(test_cost, speed, acc, p, r, f)))
        else:
            current_test_score = acc
            print(("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f"%(test_cost, speed, acc)))

        if current_score > best_dev:
            best_dev = current_score
            best_test = current_test_score
            best_test_p = p
            best_test_r = r

        print("Best dev score: p:{}, r:{}, f:{}".format(best_dev_p,best_dev_r,best_dev))
        print("Test score: p:{}, r:{}, f:{}".format(best_test_p,best_test_r,best_test))
        gc.collect()

    with open(data.result_file,"a") as f:
        f.write(save_model_dir+'\n')
        f.write("Best dev score: p:{}, r:{}, f:{}\n".format(best_dev_p,best_dev_r,best_dev))
        f.write("Test score: p:{}, r:{}, f:{}\n\n".format(best_test_p,best_test_r,best_test))
        f.close()