Example #1
0
    def setup_train(self, model_file_path=None):
        self.model = Model(model_file_path)

        params = list(self.model.encoder.parameters()) + list(self.model.decoder.parameters()) + \
                 list(self.model.reduce_state.parameters())
        initial_lr = config.lr_coverage if config.is_coverage else config.lr
        self.optimizer = Adagrad(
            params,
            lr=initial_lr,
            initial_accumulator_value=config.adagrad_init_acc)
        # self.optimizer = Adam(params)
        start_iter, start_loss = 0, 0

        if model_file_path is not None:
            state = torch.load(model_file_path,
                               map_location=lambda storage, location: storage)
            start_iter = state['iter']
            start_loss = state['current_loss']

            if not config.is_coverage:
                self.optimizer.load_state_dict(state['optimizer'])
                if use_cuda:
                    for state in self.optimizer.state.values():
                        for k, v in state.items():
                            if torch.is_tensor(v):
                                state[k] = v.cuda()

        return start_iter, start_loss
Example #2
0
    def setup_train(self, model_file_path=None):
        """模型初始化或加载、初始化迭代次数、损失、优化器"""
        # 初始化模型
        self.model = Model(model_file_path)

        # 模型参数的列表
        params = list(self.model.encoder.parameters()) + list(self.model.decoder.parameters()) + \
                 list(self.model.reduce_state.parameters())
        initial_lr = config.lr_coverage if config.is_coverage else config.lr  # lr_coverage和lr二选一
        # 定义优化器
        self.optimizer = Adagrad(params, lr=initial_lr, initial_accumulator_value=config.adagrad_init_acc)
        # 初始化迭代次数和损失
        start_iter, start_loss = 0, 0
        # 如果传入的已存在的模型路径,加载模型继续训练
        if model_file_path is not None:
            state = torch.load(model_file_path, map_location=lambda storage, location: storage)
            start_iter = state['iter']
            start_loss = state['current_loss']

            if not config.is_coverage:
                self.optimizer.load_state_dict(state['optimizer'])
                if USE_CUDA:
                    for state in self.optimizer.state.values():
                        for k, v in state.items():
                            if torch.is_tensor(v):
                                state[k] = v.to(DEVICE)

        return start_iter, start_loss
Example #3
0
    def __init__(self, *args, **kwargs):
        super(WeightedHolE, self).__init__()
        # self.add_hyperparam('rparam', kwargs.pop('rparam', 0.0))

        self.learning_rate = kwargs.get('lr', _DEF_LEARNING_RATE)
        entity_dim, _, relation_dim = args[0]
        embed_dim = args[1]
        self._max_epochs = kwargs.get('max_epochs', _DEF_MAX_EPOCHS)
        
        init_relations = kwargs.get('init_relations')
        if init_relations is not None:
            self.R = nn.Parameter(init_relations)
        else:
            self.R = nn.Parameter(torch.FloatTensor(relation_dim, embed_dim).uniform_(-.1,.1))
        self.R.my_name = 'R'
        self.R.grad = torch.zeros_like(self.R)
        
        pretrained_ent = kwargs.get('pretrained_entities')
        if pretrained_ent is not None:
            self.E = nn.Parameter(pretrained_ent)
        else:
            self.E = nn.Parameter(torch.FloatTensor(entitiy_dim, embed_dim).uniform_(-.1,.1))
        self.E.my_name = 'E'
        self.E.grad = torch.zeros_like(self.E)
        
        self.loss_function = nn.SoftMarginLoss(reduction='sum')
        self.optim = Adagrad(list(self.parameters()), lr=self.learning_rate)
Example #4
0
    def __init__(self, model, args):
        self.encoder = model.encoder
        self.decoder = model.decoder
        self.lr = {
            "encoder": args.encoder_learning_rate,
            "decoder": args.decoder_learning_rate
        }
        self.warmup_steps = {
            "encoder": args.encoder_warmup_steps,
            "decoder": args.decoder_warmup_steps,
        }
        if args.optimizer == "adam":
            self.optimizers = {
                "encoder": Adam(model.encoder.parameters(),
                                lr=self.lr["encoder"]),
                "decoder": Adam(model.decoder.parameters(),
                                lr=self.lr["decoder"]),
            }
        elif args.optimizer == "adagrad":
            self.optimizers = {
                "encoder":
                Adagrad(model.encoder.parameters(), lr=self.lr["encoder"]),
                "decoder":
                Adagrad(model.decoder.parameters(), lr=self.lr["decoder"]),
            }
        else:
            raise NotImplementedError

        self._step = 0
    def setup_train(self, model_file_path=None):
        self.model = Model(model_file_path)

        params = list(self.model.encoder.parameters()) + list(self.model.decoder.parameters()) + \
                 list(self.model.reduce_state.parameters())
        #print("params : ",params)
        #print("params collection is completed....")
        initial_lr = config.lr_coverage if config.is_coverage else config.lr
        self.optimizer = Adagrad(
            params,
            lr=initial_lr,
            initial_accumulator_value=config.adagrad_init_acc)

        start_iter, start_loss = 0, 0

        #### Loading state where the training stopped earlier use that to train for future epoches ####
        if model_file_path is not None:
            state = torch.load(model_file_path,
                               map_location=lambda storage, location: storage)
            start_iter = state['iter']
            start_loss = state['current_loss']

            if not config.is_coverage:
                self.optimizer.load_state_dict(state['optimizer'])
                ###### Making into GPU/server accessable Variables #####
                if use_cuda:
                    for state in self.optimizer.state.values():
                        for k, v in state.items():
                            if torch.is_tensor(v):
                                state[k] = v.cuda()
        return start_iter, start_loss
Example #6
0
    def fit(self, data_loader, print_freq=1000, num_epochs=10):
        ''' fit to the data

        Parameters
        ----------
        data_loader : DataLoader
            if enumerated, it returns array-like object of shape (batch_size, length),
            where each element corresponds to word index.
        print_freq : int
            how frequent to print loss
        num_epochs : int
            the number of epochs
        '''

        def repackage_hidden(h):
            """Wraps hidden states in new Variables, to detach them from their history."""
            if type(h) == Variable:
                return Variable(h.data)
            else:
                return tuple(repackage_hidden(v) for v in h)

        if self.padding_idx is None:
            criterion = nn.CrossEntropyLoss()
        else:
            criterion = nn.CrossEntropyLoss(ignore_index=self.padding_idx)
        optimizer = Adagrad(self.parameters())
        i = 0
        running_loss = 0
        for epoch in range(num_epochs):
            for each_idx, each_batch in enumerate(data_loader):
                batch_var = Variable(each_batch, requires_grad=False)
                if self.use_gpu:
                    batch_var = batch_var.cuda()

                try:
                    pred_batch = self.forward(batch_var[:, :-1])
                except:
                    import ipdb; ipdb.set_trace()
                    
                pred_batch.contiguous()
                batch_var.contiguous()
                tgt = batch_var[:, :-1]
                tgt.contiguous()
                loss = criterion(pred_batch.view(-1, self.vocab_size),
                                 tgt.view(-1))
                loss.backward()
                optimizer.step()
                self.init_hidden()

                # print statistics
                running_loss += loss.data[0]
                i += 1
                if i % print_freq == print_freq-1:
                    print('epoch: {}\t total examples: {}\t loss: {}'.format(
                        epoch + 1, (i + 1) * self.batch_size, running_loss / print_freq))
                    running_loss = 0.0

        print('Finished Training')
Example #7
0
    def __fit(self, dataset: Tensor, model: VAEBase):
        it = DataLoader(dataset, batch_size=self.batch_size, shuffle=True)

        dur = self.noepochs * ceil(len(dataset) / self.batch_size)
        history = History(zeros(dur), zeros(dur), zeros(dur), zeros(dur),
                          zeros(dur))

        hooks = CombinedHook()
        hooks.add(LossHook)
        hooks.add(RekonstruktHook, dataset[:10, :])
        hooks.add(LatentSamplerHook, self.nolatents)
        hooks.prehook(self, history)

        self.model = model
        self.noinputs = model.noinputs
        self.opt = Adagrad(self.model.parameters(), lr=0.01)  # See Section 5.

        for epoch in range(self.noepochs):
            for i, x in enumerate(it):
                self.opt.zero_grad()

                # Apply model in the following steps:
                # (a) encode datapoint into latent space;
                # (b) sample points from latent space;
                # (c) decode sampled points from latent space.
                mu, logsigma2 = self.model.encode(x)
                z = self.model.sample(mu, logsigma2)
                X = self.model.decode(z)

                # Estimate KL-divergence and reconstruction error (RE).
                kl = self.model.kl(mu, logsigma2)
                re = self.model.re(x, X)

                # Do error backpropagation.
                loss = kl + re
                loss.backward()
                self.opt.step()

                # Aggregation runtime statistics.
                history.append(epoch=epoch,
                               batch=i,
                               kl=float(kl / self.batch_size),
                               re=float(re / self.batch_size))

                if i % self.show_every == 0:
                    hooks.hook(self, history)

        # Print status before exit.
        hooks.posthook(self, history)

        # Return itself for calls chaining.
        return self
Example #8
0
 def __init__(self,
              params,
              lr=1e-2,
              lr_decay=0,
              weight_decay=0,
              initial_accumulator_value=0,
              grad_clip=0):
     Adagrad.__init__(self,
                      params,
                      lr=lr,
                      lr_decay=lr_decay,
                      weight_decay=weight_decay,
                      initial_accumulator_value=initial_accumulator_value)
     self.defaults['grad_clip'] = grad_clip
     self.param_groups[0].setdefault('grad_clip', grad_clip)
Example #9
0
def create_optimiser(params, args):
    name = args.optim.lower()

    lr = args.lr
    wd = args.weight_decay

    if name == 'adam':
        b = [args.beta1, args.beta2]
        if wd > 0:
            from torch.optim import AdamW
            opt = AdamW(params, lr, betas=b, weight_decay=wd)
        else:
            from torch.optim import Adam
            opt = Adam(params, lr, betas=b)

    elif name == 'sgd':
        from torch.optim import SGD
        m = args.momentum
        opt = SGD(params, lr, momentum=m, nesterov=True, weight_decay=wd)

    elif name == 'rmsprop':
        from torch.optim import RMSprop
        m = args.momentum
        a = args.alpha
        opt = RMSprop(params, lr, momentum=m, alpha=a, weight_decay=wd)

    elif name == 'adagrad':
        from torch.optim import Adagrad
        ld = args.lr_decay
        opt = Adagrad(params, lr, lr_decay=ld, weight_decay=wd)

    else:
        raise ValueError('optim must be one of adam, sgd, rmsprop, adagrad')

    return opt
Example #10
0
 def _set_optimizer(self, lr, opt_conf):
     """optimizerとしてself._optimizerの指示の元、インスタンスを立てるメソッド
     """
     if self._optimizer in adam:
         return Adam([{
             'params': self.model.parameters()
         }],
                     lr=lr,
                     **opt_conf)
     elif self._optimizer in sgd:
         return SGD([{
             'params': self.model.parameters()
         }],
                    lr=lr,
                    **opt_conf)
     elif self._optimizer in rmsprop:
         return RMSprop([{
             'params': self.model.parameters()
         }],
                        lr=lr,
                        **opt_conf)
     elif self._optimizer in adadelta:
         return Adadelta([{
             'params': self.model.parameters()
         }],
                         lr=lr,
                         **opt_conf)
     elif self._optimizer in adagrad:
         return Adagrad([{
             'params': self.model.parameters()
         }],
                        lr=lr,
                        **opt_conf)
     else:
         raise ValueError(f'optimizer={self._optimizer}は用意されていません')
Example #11
0
    def setup(self, config):

        model = Model(config)
        checkpoint = None
        if config.train_from != '':
            logging('Train from %s' % config.train_from)
            checkpoint = torch.load(config.train_from, map_location='cpu')
            model.load_state_dict(checkpoint['model'])
            self.step = checkpoint['step']

        self.model = model.to(device)
        self.optimizer = Adagrad(model.parameters(),
                                 lr=config.learning_rate,
                                 initial_accumulator_value=config.initial_acc)
        if checkpoint is not None:
            self.optimizer.load_state_dict(checkpoint['optimizer'])
Example #12
0
    def setup_train(self,
                    model_file_path=None,
                    emb_v_path=None,
                    emb_list_path=None,
                    vocab=None,
                    log=None):
        self.model = Model(model_file_path)
        if model_file_path is None:
            set_embedding(self.model,
                          emb_v_path=emb_v_path,
                          emb_list_path=emb_list_path,
                          vocab=self.vocab,
                          use_cuda=use_cuda,
                          log=log)
        params = list(self.model.encoder.parameters()) + list(self.model.decoder.parameters()) + \
                 list(self.model.reduce_state.parameters())
        initial_lr = config.lr_coverage if config.is_coverage else config.lr
        if config.mode == 'MLE':
            self.optimizer = Adagrad(params,
                                     lr=0.15,
                                     initial_accumulator_value=0.1)
        else:
            self.optimizer = Adam(params, lr=initial_lr)

        start_iter, start_loss = 0, 0

        if model_file_path is not None:
            state = torch.load(model_file_path,
                               map_location=lambda storage, location: storage)
            start_iter = state['iter']
            start_loss = state['current_loss']
        return start_iter, start_loss
Example #13
0
    def setup(self, config):

        self.model = Model(config).to(config['device'])
        self.optimizer = Adagrad(self.model.parameters(),
                                 lr=config['learning_rate'],
                                 initial_accumulator_value=0.1)
        # self.optimizer = Adam(self.model.parameters(),lr = config['learning_rate'],betas = config['betas'])
        checkpoint = None

        if config[
                'train_from'] != '':  # Counter在两次mostCommon间, 相同频率的元素可能以不同的次序输出...!
            logging('Train from %s' % config['train_from'])
            checkpoint = torch.load(config['train_from'], map_location='cpu')
            self.model.load_state_dict(checkpoint['model'])
            self.step = checkpoint['step']
            self.vocab = checkpoint['vocab']
            self.optimizer.load_state_dict(checkpoint['optimizer'])
Example #14
0
    def train(self, train_data, tester_val, tester_tst):
        head, tail, rela = train_data
        # useful information related to cache
        n_train = len(head)

        if self.args.optim=='adam' or self.args.optim=='Adam':
            self.optimizer = Adam(self.model.parameters(), lr=self.args.lr)
        elif self.args.optim=='adagrad' or self.args.optim=='Adagrad':
            self.optimizer = Adagrad(self.model.parameters(), lr=self.args.lr)
        else:
            self.optimizer = SGD(self.model.parameters(), lr=self.args.lr)

        scheduler = ExponentialLR(self.optimizer, self.args.decay_rate)

        n_epoch = self.args.n_epoch
        n_batch = self.args.n_batch
        best_mrr = 0

        # used for counting repeated triplets for margin based loss

        for epoch in range(n_epoch):
            start = time.time()

            self.epoch = epoch
            rand_idx = torch.randperm(n_train)
            head = head[rand_idx].cuda()
            tail = tail[rand_idx].cuda()
            rela = rela[rand_idx].cuda()

            epoch_loss = 0

            for h, t, r in batch_by_size(n_batch, head, tail, rela, n_sample=n_train):
                self.model.zero_grad()

                loss = self.model.forward(h, t, r)
                loss += self.args.lamb * self.model.regul
                loss.backward()
                self.optimizer.step()
                self.prox_operator()
                epoch_loss += loss.data.cpu().numpy()

            self.time_tot += time.time() - start
            scheduler.step()

            if (epoch+1) %  self.args.epoch_per_test == 0:
                # output performance 
                valid_mrr, valid_mr, valid_10 = tester_val()
                test_mrr,  test_mr,  test_10 = tester_tst()
                out_str = '%.4f\t\t%.4f\t%.4f\t%.4f\n'%(epoch + 1, test_mr, test_mrr, test_10)

                # output the best performance info
                if valid_mrr > best_mrr:
                    best_mrr = valid_mrr
                    best_str = out_str
                if best_mrr < self.args.thres:
                    print('\tearly stopped in Epoch:{}, best_mrr:{}'.format(epoch+1, best_mrr), self.model.struct)
                    return best_str
        return best_mrr, best_str
def fit(model, num_epochs, trainloader, valloader):
    criterion = binary_cross_entropy(input_size, target_size) 
    optimizer = Adagrad(model.parameters(), lr=lr,lr_decay=lr/num_epochs)
    scheduler = OneCycleLR(optimizer, lr_range=(lr,1.), num_steps=1000)
    print("epoch\ttrain loss\tvalid loss\taccuracy")
    for epoch in range(num_epochs):
        train_loss = train(trainloader, model, criterion, optimizer, scheduler)
        valid_loss, valid_acc = validate(valloader, model, criterion)
        print(f"{epoch}\t{train_loss:.5f}\t\t{valid_loss:.5f}\t\t{valid_acc:.3f}")
Example #16
0
 def build(self, params):
     from torch.optim import Adagrad
     return Adagrad(
         params,
         lr=self.lr,
         lr_decay=self.lr_decay,
         weight_decay=self.weight_decay,
         initial_accumulator_value=self.initial_accumulator_value,
         eps=self.eps)
Example #17
0
def get_optimizer(optim_name, fixed_cnn, args):
    """Return the asked optimizer"""

    optimizers = {
        'sgd': SGD(params=adjust_weight_decay(fixed_cnn, args.l2_reg), lr=args.lr, momentum=0.9, nesterov=True),
        'adam': Adam(params=adjust_weight_decay(fixed_cnn, args.l2_reg), lr=args.lr),
        'adagrad': Adagrad(params=adjust_weight_decay(fixed_cnn, args.l2_reg), lr=args.lr),
        'rmsprop': RMSprop(params=adjust_weight_decay(fixed_cnn, args.l2_reg), lr=args.lr)
    }
    return optimizers.get(optim_name)
Example #18
0
def dispatch_optimizer(model, args):
    if args.optimizer == 'SGD':
        return SGD(model.parameters(), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay)
    if args.optimizer == 'Adam':
        return Adam(model.parameters(), lr=args.learning_rate)
    if args.optimizer == 'AdamW':
        return AdamW(model.parameters(), lr=args.learning_rate)
    if args.optimizer == 'RMSprop':
        return RMSprop(model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay)
    if args.optimizer == 'Adagrad':
        return Adagrad(model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay)
Example #19
0
 def configure_optimizers(self):
     args = self.config.optimizer.args
     lr = args.lr
     lr_init_accum = args.lr_init_accum
     params = self.parameters()
     optimizer = Adagrad(
         params=params,
         lr=lr,
         initial_accumulator_value=lr_init_accum,
     )
     return optimizer
Example #20
0
def train_model(model, lr, epochs, train_loader, val_loader, patience):
    optimizer = Adagrad(model.parameters(), lr)
    criterion = nn.MSELoss()

    best_rmse = 100
    rounds_no_imporve = 0
    for epoch in range(epochs):
        for users, items, x, y in train_loader:
            y_pred = model(users, items, x)
            loss = criterion(y_pred.reshape(-1), y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        logging.info('Last train loss: {0:.3f}'.format(
            loss.detach().cpu().numpy().tolist()))
        with torch.no_grad():
            errors = np.array([])
            for users, items, x, y in val_loader:
                y_pred = model(users, items, x)
                group_errors = (y_pred - y).reshape(-1).cpu().numpy()
                errors = np.concatenate([errors, group_errors])
            rmse = (errors**2).mean()**0.5
            logging.info('Test RMSE: {0:.3f}'.format(rmse))
            if rmse < best_rmse:
                best_rmse = rmse
                rounds_no_imporve = 0
            else:
                rounds_no_imporve += 1
            if rounds_no_imporve >= patience:
                return model
    return model
Example #21
0
def run_train():
    datainfo, vocabs = set_up_data()
    train_sampler = RandomSampler()
    criterion = SummLoss(config=config, padding_idx=vocabs.to_index(PAD_TOKEN))
    model = CGSum(config, vocab=vocabs)
    model.to(device)

    initial_lr = config.lr
    logger.info(f"learning rate = {initial_lr}")
    optimizer = Adagrad(filter(lambda p: p.requires_grad, model.parameters()),
                        lr=initial_lr,
                        initial_accumulator_value=config.adagrad_init_acc)

    train_loader = datainfo.datasets["train"]
    valid_loader = datainfo.datasets["dev"]

    callbacks = [
        TrainCallback(config, patience=10),
        FitlogCallback(),
        LRDecayCallback(optimizer.param_groups, steps=args.weight_decay_step)
    ]
    trainer = Trainer(model=model,
                      train_data=train_loader,
                      optimizer=optimizer,
                      loss=criterion,
                      batch_size=config.batch_size,
                      check_code_level=-1,
                      sampler=train_sampler,
                      n_epochs=config.n_epochs,
                      print_every=100,
                      dev_data=valid_loader,
                      update_every=args.update_every,
                      metrics=FastRougeMetric(
                          pred='prediction',
                          art_oovs='article_oovs',
                          abstract_sentences='abstract_sentences',
                          config=config,
                          vocab=datainfo.vocabs["vocab"]),
                      metric_key="rouge-l-f",
                      validate_every=args.validate_every * args.update_every,
                      save_path=None,
                      callbacks=callbacks,
                      use_tqdm=True)

    logger.info("-" * 5 + "start training" + "-" * 5)
    traininfo = trainer.train(load_best_model=True)

    logger.info('   | end of Train | time: {:5.2f}s | '.format(
        traininfo["seconds"]))
    logger.info('[INFO] best eval model in epoch %d and iter %d',
                traininfo["best_epoch"], traininfo["best_step"])
    def setup_train(self, model_file_path=None):
        self.model = Model(model_file_path)

        params = list(self.model.encoder.parameters()) + list(self.model.decoder.parameters()) + \
                 list(self.model.reduce_state.parameters())
        print('somma parametri: ' +
              str(sum([p.numel() for p in params if p.requires_grad])))
        initial_lr = config.lr_coverage if config.is_coverage else config.lr
        self.optimizer = Adagrad(
            params,
            lr=initial_lr,
            initial_accumulator_value=config.adagrad_init_acc)
        if config.is_mixed_precision_training:
            [
                self.model.encoder, self.model.decoder, self.model.reduce_state
            ], self.optimizer = amp.initialize([
                self.model.encoder, self.model.decoder, self.model.reduce_state
            ],
                                               self.optimizer,
                                               loss_scale=1.0,
                                               opt_level="O2")
        start_iter, start_loss = 0, 0

        if model_file_path is not None:
            state = torch.load(model_file_path,
                               map_location=lambda storage, location: storage)
            start_iter = state['iter']
            start_loss = state['current_loss']

            if not config.is_coverage:
                self.optimizer.load_state_dict(state['optimizer'])
                if use_cuda:
                    for state in self.optimizer.state.values():
                        for k, v in state.items():
                            if torch.is_tensor(v):
                                state[k] = v.cuda()

        return start_iter, start_loss
Example #23
0
 def make_optimizer(params: Iterable[torch.nn.Parameter], is_emb: bool) -> Optimizer:
     params = list(params)
     if len(params) == 0:
         optimizer = DummyOptimizer()
     elif is_emb:
         optimizer = RowAdagrad(params, lr=config.lr)
     else:
         if config.relation_lr is not None:
             lr = config.relation_lr
         else:
             lr = config.lr
         optimizer = Adagrad(params, lr=lr)
     optimizer.share_memory()
     return optimizer
Example #24
0
    def __init__(self,
                 model,
                 args,
                 train_dataset,
                 eval_dataset,
                 test_dataset,
                 vocab,
                 is_train=True):
        self.model = model  #.to(args.device)
        self.args = args
        self.train_dataset = train_dataset
        self.eval_dataset = eval_dataset
        self.test_dataset = test_dataset
        self.is_train = is_train
        self.vocab = vocab

        self.params = list(model.encoder.parameters()) + \
            list(model.decoder.parameters()) + list(model.reduce_state.parameters())
        initial_lr = args.lr_coverage if args.is_coverage else args.lr
        self.optimizer = Adagrad(
            self.params,
            lr=initial_lr,
            initial_accumulator_value=args.adagrad_init_acc)
Example #25
0
 def test_train_lcwa(self) -> None:
     """Test that LCWA training does not fail."""
     loop = LCWATrainingLoop(
         model=self.model,
         optimizer=Adagrad(params=self.model.get_grad_params(), lr=0.001),
         **(self.training_loop_kwargs or {}),
     )
     losses = self._safe_train_loop(
         loop,
         num_epochs=self.train_num_epochs,
         batch_size=self.train_batch_size,
         sampler='default',
     )
     self.assertIsInstance(losses, list)
Example #26
0
def adagrad(parameters):
    # pick defaults
    if not ("lr_decay" in parameters["optimizer"]):
        parameters["optimizer"]["lr_decay"] = 0
    if not ("eps" in parameters["optimizer"]):
        parameters["optimizer"]["eps"] = 1e-6
    if not ("weight_decay" in parameters["optimizer"]):
        parameters["optimizer"]["weight_decay"] = 0

    return Adagrad(
        parameters["model_parameters"],
        lr=parameters["learning_rate"],
        lr_decay=parameters["optimizer"]["lr_decay"],
        eps=parameters["optimizer"]["eps"],
        weight_decay=parameters["optimizer"]["weight_decay"],
    )
Example #27
0
def get_optimizer(net):
	if args.optimizer == 'sgd':
		optimizer = SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
	elif args.optimizer == 'nesterov':
		optimizer = SGD(net.parameters(), lr=args.lr, momentum=args.momentum,
						weight_decay=args.weight_decay,nesterov=True)
	elif args.optimizer == 'adagrad':
		optimizer = Adagrad(net.parameters(), weight_decay=args.weight_decay)
	elif args.optimizer == 'adadelta':
		optimizer = Adadelta(net.parameters(), weight_decay=args.weight_decay)
	elif args.optimizer == 'adam':
		optimizer = Adam(net.parameters(), weight_decay=args.weight_decay)
	else:
		raise Exception('Invalid optimizer specified.')

	return optimizer
Example #28
0
    def testAccuracy_AsyncAdagrad(self):
        for sparse in (True, False):
            # testing that Adagrad = AsyncAdagrad with 1 process
            NE = 10000
            golden_model = nn.Embedding(NE, 100, sparse=sparse)
            test_model = nn.Embedding(NE, 100, sparse=sparse)
            test_model.load_state_dict(golden_model.state_dict())

            golden_optimizer = Adagrad(golden_model.parameters())
            self._stress_optimizer(golden_model, golden_optimizer, num_processes=1)

            test_optimizer = AsyncAdagrad(test_model.parameters())
            self._stress_optimizer(test_model, test_optimizer, num_processes=1)

            # This fails for Adagrad because it's not stable
            self.assertTensorEqual(golden_model.weight, test_model.weight)
Example #29
0
def demo_pytorch_vae_mnist(hidden_sizes=[200, 200],
                           latent_dim=5,
                           distribution_type='bernoulli',
                           minibatch_size=20,
                           checkpoints=100,
                           n_epochs=20):

    cp = Checkpoints(checkpoints)

    model = VAEModel(
        encoder=make_mlp_encoder(visible_dim=784,
                                 hidden_sizes=hidden_sizes,
                                 latent_dim=latent_dim),
        decoder=make_mlp_decoder(latent_dim=latent_dim,
                                 hidden_sizes=hidden_sizes,
                                 visible_dim=784,
                                 dist_type=distribution_type),
        latent_dim=latent_dim,
    )
    # optimizer = Adam(params = model.parameters())
    # optimizer = RMSprop(params = model.parameters())
    # optimizer = Adamax(params = model.parameters())
    optimizer = Adagrad(params=model.parameters())
    # optimizer = SGD(lr=0.001, params = model.parameters())

    train_loader = torch.utils.data.DataLoader(datasets.MNIST(
        '../data',
        train=True,
        download=True,
        transform=transforms.Compose([transforms.ToTensor()])),
                                               batch_size=minibatch_size,
                                               shuffle=True)

    for epoch in range(n_epochs):
        for batch_idx, (x, y) in enumerate(train_loader):

            epoch_pt = epoch + batch_idx / len(train_loader)

            optimizer.zero_grad()
            loss = -model.elbo(x.flatten(1)).sum()
            loss.backward()
            optimizer.step()

            rate = measure_global_rate('training')

            if cp():

                print(f'Mean Rate at Epoch {epoch_pt:.2g}: {rate:.3g}iter/s')
                z_samples = model.prior().sample((64, ))
                x_dist = model.decode(z_samples)
                dbplot(x_dist.mean.reshape(-1, 28, 28),
                       'Sample Means',
                       title=f'Sample Means at epoch {epoch_pt:.2g}')
Example #30
0
def run_train(config):
    train_dir, model_dir = initial_dir('train', config)
    config.train_path = train_dir
    config.model_path = model_dir
    print_config(config, train_dir)
    datainfo = set_up_data('train', config)
    train_sampler = BucketSampler(batch_size=config.batch_size, seq_len_field_name='enc_len')
    criterion = MyLoss(config=config, padding_idx=datainfo.vocabs["train"].to_index(PAD_TOKEN))

    model = Model(vocab=datainfo.vocabs["train"], config=config)
    params = list(model.encoder.parameters()) + list(model.decoder.parameters()) + \
             list(model.reduce_state.parameters())
    initial_lr = config.lr_coverage if config.is_coverage else config.lr
    optimizer = Adagrad(params, lr=initial_lr, initial_accumulator_value=config.adagrad_init_acc)

    train_loader = datainfo.datasets["train"]
    valid_loader = datainfo.datasets["dev"]
    summary_writer = tf.compat.v1.summary.FileWriter(train_dir)
    trainer = Trainer(model=model, train_data=train_loader, optimizer=optimizer, loss=criterion,
                      batch_size=config.batch_size, check_code_level=-1,
                      n_epochs=config.n_epochs, print_every=100, dev_data=valid_loader,
                      metrics=FastRougeMetric(pred='prediction', art_oovs='article_oovs',
                                              abstract_sentences='abstract_sentences', config=config,
                                              vocab=datainfo.vocabs["train"]),
                      metric_key="rouge-l-f", validate_every=-1, save_path=model_dir,
                      callbacks=[TrainCallback(config, summary_writer, patience=10)], use_tqdm=False,
                      device=config.visible_gpu)

    logger.info("-" * 5 + "start training" + "-" * 5)

    traininfo = trainer.train(load_best_model=True)
    logger.info('   | end of Train | time: {:5.2f}s | '.format(traininfo["seconds"]))
    logger.info('[INFO] best eval model in epoch %d and iter %d', traininfo["best_epoch"], traininfo["best_step"])
    logger.info(traininfo["best_eval"])

    bestmodel_save_path = os.path.join(config.model_path,
                                       'bestmodel.pkl')  # this is where checkpoints of best models are saved
    state = {
        'encoder_state_dict': model.encoder.state_dict(),
        'decoder_state_dict': model.decoder.state_dict(),
        'reduce_state_dict': model.reduce_state.state_dict()
    }
    torch.save(state, bestmodel_save_path)
    # 不是作为形参传入到Trainer里面的么,怎么里面的model变化会影响到外面的?
    logger.info('[INFO] Saving eval best model to %s', bestmodel_save_path)