Esempio n. 1
0
def main():

    par = Params(sys.argv)
    random.seed(par.seed)
    torch.manual_seed(par.seed)
    if torch.cuda.is_available(): torch.cuda.manual_seed_all(par.seed)

    if par.trn and par.val:
        chk = Checkpoint(par.dir)

        if chk.contains_model:  ####### resume training ####################################
            cfg, mod, opt = chk.load(par)  ### also moves to GPU if cfg.cuda
            #            cfg.update_par(par) ### updates par in cfg
            print_time('Learning [resume It={}]...'.format(cfg.n_iters_sofar))

        else:  ######################## training from scratch ##############################
            cfg = Config(par)  ### reads cfg and par (reads vocabularies)
            mod = Model(cfg)
            if cfg.cuda: mod.cuda()  ### moves to GPU
            opt = Optimizer(cfg, mod)  #build Optimizer
            print_time('Learning [from scratch]...')

        trn = Dataset(par.trn,
                      cfg.svoc,
                      cfg.tvoc,
                      par.batch_size,
                      par.max_src_len,
                      par.max_tgt_len,
                      do_shuffle=True,
                      do_filter=True,
                      is_test=False)
        val = Dataset(par.val,
                      cfg.svoc,
                      cfg.tvoc,
                      par.batch_size,
                      par.max_src_len,
                      par.max_tgt_len,
                      do_shuffle=True,
                      do_filter=True,
                      is_test=True)
        Training(cfg, mod, opt, trn, val, chk)

    elif par.tst:  #################### inference ##########################################
        chk = Checkpoint()
        cfg, mod, opt = chk.load(par, par.chk)
        #        cfg.update_par(par) ### updates cfg options with pars
        tst = Dataset(par.tst,
                      cfg.svoc,
                      cfg.tvoc,
                      par.batch_size,
                      0,
                      0,
                      do_shuffle=False,
                      do_filter=False,
                      is_test=True)
        print_time('Inference [model It={}]...'.format(cfg.n_iters_sofar))
        Inference(cfg, mod, tst)
Esempio n. 2
0
        raise ValueError
    if arguments.no_ft and arguments.pruning_iterations != 1:
        print("You can't specify a pruning_iteration value if there is no fine-tuning at all")
        raise ValueError
    get_mask = get_mask_function(arguments.pruning_type)
    _dataset = get_dataset(arguments)
    _targets = [int((n + 1) * (arguments.target / arguments.pruning_iterations)) for n in
                range(arguments.pruning_iterations)]

    # Train model
    print('Train model !')
    print(f'Regularization with t-{_targets[0]}')

    training_model = Checkpoint(arguments, 'training')
    training_model.regularization = Regularization(None, _targets[0], arguments)
    training_model.load()
    train_model(training_model, arguments, [0, arguments.epochs], _dataset, None, soft_pruning=arguments.soft_pruning)

    if arguments.lr_rewinding:
        training_model.rewind_lr()

    if arguments.no_ft:
        print('\nPruning model without fine tuning :')
        pruned_model = training_model.clone('pruned')
        pruned_model.load()
        mask = get_mask(pruned_model.model, arguments.target)
        apply_mask(pruned_model.model, mask)
        _acc, _top5, _test_loss = test_model(_dataset, pruned_model.model, arguments)
        pruned_model.save_results({'epoch': 'before', 'acc': _acc, 'top5': _top5, 'loss': _test_loss,
                                   'norm': l2_norm(pruned_model.model),
                                   'pruned_param_count': pruned_model.model.compute_params_count(
Esempio n. 3
0
    def train(
        self, train_loader, val_loader=None, max_epochs=1000, enable_early_stopping=True
    ):
        if val_loader is None:
            enable_early_stopping = False

        print()
        print("-" * 2, "Training Setup", "-" * 2)
        print(f"Maximum Epochs: {max_epochs}")
        print(f"Enable Early Stoping: {enable_early_stopping}")
        print("-" * 20)
        print("*Start Training.")

        # model setup
        self.model.train().to(self.device)
        if self.multi_gpus and torch.cuda.device_count() > 1:
            print(f"*Using {torch.cuda.device_count()} GPUs!")
            self.model = nn.DataParallel(self.model)

        # early stopping instance
        if enable_early_stopping:
            if self.early_stopping is None:
                self.early_stopping = EarlyStopping(patience=5)
            else:
                self.early_stopping.reset_counter()

        # training start!
        for epoch in range(1, max_epochs + 1):
            running_loss = 0.0

            for step, data in enumerate(train_loader, start=1):
                inputs, labels = data
                inputs, labels = inputs.to(self.device), labels.to(self.device)

                # Zero the parameter gradients
                self.optimizer.zero_grad()
                # forward + backward + optimize
                outputs = self.model(inputs)
                loss = self.loss_func(outputs, labels)
                loss.backward()
                self.optimizer.step()
                # print statistics
                running_loss += loss.item()

                if step % 100 == 0 or step == len(train_loader):
                    print(
                        f"[{epoch}/{max_epochs}, {step}/{len(train_loader)}] loss: {running_loss / step :.3f}"
                    )

            # train & validation loss
            train_loss = running_loss / len(train_loader)
            if val_loader is None:
                print(f"train loss: {train_loss:.3f}")
            else:
                # FIXME: fixed the problem that first validation is not correct
                val_loss = self.validation(val_loader)
                print(f"train loss: {train_loss:.3f}, val loss: {val_loss:.3f}")

                if enable_early_stopping:
                    self.early_stopping(self.model, val_loss, self.optimizer)
                    if self.early_stopping.get_early_stop() == True:
                        print("*Early Stopping.")
                        break

        print("*Finished Training!")
        if enable_early_stopping:
            checkpoint = self.early_stopping.get_checkpoint()
        else:
            checkpoint = Checkpoint()
            checkpoint.tmp_save(self.model, self.optimizer, epoch, val_loss)
        self.checkpoint = checkpoint
        self.model = checkpoint.load(self.model, self.optimizer)["model"]
        return self.model
Esempio n. 4
0
def train():
    args = configs.get_args()
    use_cuda = args.use_cuda and torch.cuda.is_available()

    # prepare dataset
    dataset = libs.dataset.MyDataset(min_length=args.min_length)
    voc_size = dataset.get_voc_size()
    dataloader = DataLoader(dataset, 1, True, drop_last=False)

    # prepare model
    model = models.TopModuleCNN(voc_size, output_channel=args.output_channel)
    if use_cuda:
        model = model.cuda()

    # load pretrained if asked
    if args.resume:
        checkpoint_path = Checkpoint.get_certain_checkpoint(
            "./experiment/cnn_net", "best")
        resume_checkpoint = Checkpoint.load(checkpoint_path)
        model = resume_checkpoint.model
        optimizer = resume_checkpoint.optimizer

        resume_optim = optimizer.optimizer
        defaults = resume_optim.param_groups[0]
        defaults.pop('params', None)
        optimizer.optimizer = resume_optim.__class__(model.parameters(),
                                                     **defaults)

        start_epoch = resume_checkpoint.epoch
        max_ans_acc = resume_checkpoint.max_ans_acc
    else:
        start_epoch = 1
        max_ans_acc = 0
        optimizer = NoamOpt(
            512, 1, 2000,
            optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9))

    # define loss
    loss = nn.CrossEntropyLoss(weight=torch.tensor([1., 4.]))
    if use_cuda:
        loss = loss.cuda()

    # training
    for i in range(start_epoch, args.epochs):
        # test the model
        if args.resume:
            test_ans_acc = max_ans_acc
        else:
            test_ans_acc = test(DataLoader(dataset, 1, True, drop_last=False),
                                model, i)
        print('For EPOCH {}, total f1: {:.2f}'.format(i, test_ans_acc))

        # calculate loss
        j = 0
        los1 = []
        for _, data in enumerate(dataloader):
            j += 1
            x = data['que'].long()
            y = data['ans'].long()
            res = data['res'].long()
            if use_cuda:
                x, y, res = x.cuda(), y.cuda(), res.cuda()
            res_pred = model(x, y)

            los1.append(loss(res_pred, res).unsqueeze(0))

            # apply gradient
            if j % args.batch_size == 0:
                los1 = torch.cat(los1)
                los = los1.sum()
                model.zero_grad()
                los.backward()
                optimizer.step()
                los1 = []
                print('EPOCH: {}, {} / {}====> LOSS: {:.2f}'.format(
                    i, j // args.batch_size,
                    dataloader.__len__() // args.batch_size,
                    los.item() / args.batch_size))

        # save checkpoint
        if test_ans_acc > max_ans_acc:
            max_ans_acc = test_ans_acc
            th_checkpoint = Checkpoint(model=model,
                                       optimizer=optimizer,
                                       epoch=i,
                                       max_ans_acc=max_ans_acc)
            th_checkpoint.save_according_name("./experiment/cnn_net", 'best')