Esempio n. 1
0
 def test_sparse_adam(self):
     self._test_rosenbrock_sparse(
         lambda params: optim.SparseAdam(params, lr=4e-2),
         True
     )
     with self.assertRaisesRegex(ValueError, "Invalid beta parameter at index 0: 1.0"):
         optim.SparseAdam(None, lr=1e-2, betas=(1.0, 0.0))
Esempio n. 2
0
    def _init_optimizer(self):
        """
        """
        if not hasattr(self, 'embeddings_'):
            raise ValueError('You should call .fit first!')

        params = {'sparse': [], 'dense': []}
        for lyr in self.embeddings_.children():
            if lyr.sparse:
                params['sparse'].append(lyr.weight)
            else:
                params['dense'].append(lyr.weight)

        # determine if sparse optimizer needed
        if len(params['sparse']) == 0:
            self.opt = optim.Adam(params['dense'],
                                  lr=self.learn_rate,
                                  weight_decay=self.l2)
        elif len(params['dense']) == 0:
            self.opt = optim.SparseAdam(params['sparse'], lr=self.learn_rate)
        else:
            # init multi-optimizers
            # register it to the instance
            self.opt = MultipleOptimizer(
                optim.SparseAdam(params['sparse'], lr=self.learn_rate),
                optim.Adam(params['dense'],
                           lr=self.learn_rate,
                           weight_decay=self.l2))
Esempio n. 3
0
    def __init__(self,
                 logger,
                 ntm_params={},
                 tmn_params={},
                 kl_growing_epoch=0):
        self.ntm_params = ntm_params
        self.ntm = NeuralTopicModel(**ntm_params)
        self.ntm.cuda()

        self.tmn_params = tmn_params
        self.tmn = TopicMemoryNetwork(**tmn_params)
        self.tmn.cuda()

        self.ntm_optimizer = optim.SparseAdam(self.ntm.parameters())
        self.tmn_optimizer = optim.SparseAdam(self.tmn.parameters())

        self.kl_strength = 1.0
        self.ntm_loss_fn = NTMLoss(self.kl_strength)

        self.logger = logger

        self.optimize_ntm = True
        self.first_optimize_ntm = True
        self.min_bound_ntm = np.inf
        self.min_bound_cls = -np.inf
        self.epoch_since_improvement = 0
        self.epoch_since_improvement_global = 0

        self.psudo_indices = np.expand_dims(np.arange(
            self.ntm_params.get("n_topics")),
                                            axis=0)
        self.kl_growing_epoch = kl_growing_epoch

        self.max_epochs = 0
        self.current_epoch = 0
def optimizer(params, mode, *args, **kwargs):

    if mode == 'SGD':
        opt = optim.SGD(params, *args, momentum=0., **kwargs)
    elif mode.startswith('nesterov'):
        momentum = float(mode[len('nesterov'):])
        opt = optim.SGD(params, *args, momentum=momentum, nesterov=True, **kwargs)
    elif mode.lower() == 'adam':
        betas = kwargs.pop('betas', (.9, .999))
        opt = optim.Adam(params, *args, betas=betas, amsgrad=True, **kwargs)
    elif mode.lower() == 'adam_hyp2':
        betas = kwargs.pop('betas', (.5, .99))
        opt = optim.Adam(params, *args, betas=betas, amsgrad=True, **kwargs)
    elif mode.lower() == 'adam_hyp3':
        betas = kwargs.pop('betas', (0., .99))
        opt = optim.Adam(params, *args, betas=betas, amsgrad=True, **kwargs)
    elif mode.lower() == 'adam_sparse':
        betas = kwargs.pop('betas', (.9, .999))
        opt = optim.SparseAdam(params, *args, betas=betas)
    elif mode.lower() == 'adam_sparse_hyp2':
        betas = kwargs.pop('betas', (.5, .99))
        opt = optim.SparseAdam(params, *args, betas=betas)
    elif mode.lower() == 'adam_sparse_hyp3':
        betas = kwargs.pop('betas', (.0, .99))
        opt = optim.SparseAdam(params, *args, betas=betas)
    else:
        raise NotImplementedError()
    return opt
Esempio n. 5
0
 def train(self,
           iterations,
           lr=None,
           negative_targets=5,
           discard_probability=0.05):
     for iteration in range(iterations):
         print("Epoch: {}".format(iteration + 1))
         if not lr:
             lr = self.initial_lr
         optimizer = optim.SparseAdam(list(
             self.skip_gram_model.parameters()),
                                      lr=lr)
         self.dataset.update_discard_probability(discard_probability)
         self.dataset.update_negative_targets(negative_targets)
         self.dataloader = self.create_dataloader(self.dataset)
         running_loss = 0.0
         for i, sample_batched in enumerate(tqdm(self.dataloader)):
             if len(sample_batched[0]) > 1:
                 pos_u = sample_batched[0].to(self.device)
                 pos_v = sample_batched[1].to(self.device)
                 neg_v = sample_batched[2].to(self.device)
                 optimizer.zero_grad()
                 loss = self.skip_gram_model.forward(pos_u, pos_v, neg_v)
                 loss.backward()
                 optimizer.step()
                 running_loss = +loss.item()
         print("Loss:", running_loss)
         self.skip_gram_model.save_embedding(self.dataset.data.id2gene,
                                             self.output_file_name)
Esempio n. 6
0
    def fit(self, raw_inputs):
        """ Learn a vocabulary """
        flat_items = raw_inputs if self.reduce is None else list(it.chain.from_iterable(raw_inputs))
        if self.vocab_size is None:
            # if vocab size is None, use all items
            cutoff = len(flat_items)
        elif isinstance(self.vocab_size, float):
            # if vocab size is float, interprete it as percentage of top items (authors)
            cutoff = int(self.vocab_size * len(flat_items))
        else:
            # else use fixed vocab size or None, which is fine aswell
            cutoff = int(self.vocab_size)
        print("Using top {:.2f}% authors ({})".format(cutoff / len(flat_items) * 100, cutoff))

        item_cnt = Counter(flat_items).most_common(cutoff)
        # index 0 is reserved for unk idx
        self.vocab = {value: idx + 1 for idx, (value, __) in enumerate(item_cnt)}
        num_embeddings = len(self.vocab) + 1
        self.embedding = nn.Embedding(num_embeddings,
                                      self.embedding_dim,
                                      padding_idx=self.padding_idx,
                                      **self.embedding_params,
                                      sparse=self.sparse)
        if self.use_cuda and self.embedding_on_gpu:
            # Put the embedding on GPU only when wanted
            self.embedding = self.embedding.cuda()
        print("Embedding before creating optimizer:", self.embedding, sep='\n')
        if self.sparse:
            self.optimizer = optim.SparseAdam(self.embedding.parameters(), lr=self.lr)
        else:
            self.optimizer = optim.Adam(self.embedding.parameters(), lr=self.lr)
        return self
Esempio n. 7
0
def get_optimizer(optimizer, lr, params):

	if optimizer == 'adagrad':
		optimizer = torch.optim.Adagrad(params, lr=lr*5, lr_decay=0, weight_decay=0, initial_accumulator_value=0, eps=1e-10)
	elif optimizer == 'adadelta':
		optimizer = optim.Adadelta(params, lr=lr*100*5, rho=0.9, eps=1e-06, weight_decay=0)
	elif optimizer == 'adam':
		optimizer = optim.Adam(params, lr=lr/10*5, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)
	elif optimizer == 'adaw':
		optimizer = optim.AdamW(params, lr=lr/10*5, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.01, amsgrad=False)
	elif optimizer == 'sparseadam':
		optimizer = optim.SparseAdam(params/10*5, lr=lr, betas=(0.9, 0.999), eps=1e-08)
	elif optimizer == 'ASGD':
		optimizer = optim.ASGD(params, lr=lr*5, lambd=0.0001, alpha=0.75, t0=1000000.0, weight_decay=0)
	elif optimizer == 'LBFGS':
		optimizer = optim.LBFGS(params, lr=lr*100*5) 
	elif optimizer == 'RMSprop':
		optimizer = optim.RMSprop(params, lr=lr*5)
	elif optimizer == 'rprop':
		optimizer = optim.Rprop(params, lr=lr*5)
	elif optimizer == 'SGD':
		optimizer = optim.SGD(params, lr=lr*5, momentum=0, dampening=0, weight_decay=0, nesterov=False)
	elif optimizer == 'adamax': #standard: adamax
		optimizer = optim.Adamax(params, lr=lr) # best lr=0.01, standard is lr=0.002, mutiply every other by factor 5 as well
	else:
		raise Exception("Optimizer not supported. Please change it!")

	return optimizer
Esempio n. 8
0
def get_optim(config, model):
    if config.optimizer == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=float(config.lr))
    if config.optimizer == 'RMSprop':
        optimizer = optim.RMSprop(model.parameters(), lr=float(config.lr))
    if config.optimizer == 'Adagrad':
        optimizer = optim.Adagrad(model.parameters(), lr=float(config.lr))
    if config.optimizer == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=float(config.lr))
    if config.optimizer == 'Adadelta':
        optimizer = optim.Adadelta(model.parameters(), lr=float(config.lr))
    if config.optimizer == 'AdamW':
        optimizer = optim.AdamW(model.parameters(), lr=float(config.lr))
    if config.optimizer == 'SparseAdam':
        optimizer = optim.SparseAdam(model.parameters(), lr=float(config.lr))
    if config.optimizer == 'Adamax':
        optimizer = optim.Adamax(model.parameters(), lr=float(config.lr))
    if config.optimizer == 'ASGD':
        optimizer = optim.ASGD(model.parameters(), lr=float(config.lr))
    if config.optimizer == 'LBFGS':
        optimizer = optim.LBFGS(model.parameters(), lr=float(config.lr))
    if config.optimizer == 'Rprop':
        optimizer = optim.Rprop(model.parameters(), lr=float(config.lr))
    print('\noptimizer :', optimizer, '\n')
    return optimizer
Esempio n. 9
0
    def __init__(self,
                 epochs,
                 training_file,
                 name="model/skipgram",
                 embedding_dim=100,
                 batch_size=256,
                 window_size=2,
                 negative_sample=5):
        self.epochs = epochs
        self.training_file = training_file
        self.embedding_dim = embedding_dim
        self.batch_size = batch_size
        self.win_size = window_size
        self.neg_samples = negative_sample
        corpus = TokenizedCorpus(self.training_file)
        self.sentences = corpus.get_words()
        self.skip_data = VsGram(self.sentences, self.win_size)
        self.vocab = self.skip_data.w2i
        self.model = SkipGram(self.vocab, self.embedding_dim)
        self.optimizer = optim.SparseAdam(self.model.parameters(), lr=0.001)
        self.name = "model/skipgram"

        #save w2i and i2w as json
        with open(os.path.join(self.name + "_i2w.txt"), "w") as out:
            json.dump(self.skip_data.i2w, out, indent=4)
        with open(os.path.join(self.name + "_w2i.txt"), "w") as out:
            json.dump(self.skip_data.w2i, out, indent=4)

        self.training()
Esempio n. 10
0
def get_optimiser(name, net_params, optim_params):
    lr = optim_params['learning_rate']
    momentum = optim_params['momentum']
    weight_decay = optim_params['weight_decay']
    if(name == "SGD"):
        return optim.SGD(net_params, lr, 
            momentum = momentum, weight_decay = weight_decay)
    elif(name == "Adam"):
        return optim.Adam(net_params, lr, weight_decay = 1e-5)
    elif(name == "SparseAdam"):
        return optim.SparseAdam(net_params, lr)
    elif(name == "Adadelta"):
        return optim.Adadelta(net_params, lr, weight_decay = weight_decay)
    elif(name == "Adagrad"):
        return optim.Adagrad(net_params, lr, weight_decay = weight_decay)
    elif(name == "Adamax"):
        return optim.Adamax(net_params, lr, weight_decay = weight_decay)
    elif(name == "ASGD"):
        return optim.ASGD(net_params, lr, weight_decay = weight_decay)
    elif(name == "LBFGS"):
        return optim.LBFGS(net_params, lr)
    elif(name == "RMSprop"):
        return optim.RMSprop(net_params, lr, momentum = momentum,
            weight_decay = weight_decay)
    elif(name == "Rprop"):
        return optim.Rprop(net_params, lr)
    else:
        raise ValueError("unsupported optimizer {0:}".format(name))
Esempio n. 11
0
 def set_parameters(self, params):
     """ ? """
     self.params = []
     self.sparse_params = []
     for k, p in params:
         if p.requires_grad:
             if self.method != 'sparseadam' or "embed" not in k:
                 self.params.append(p)
             else:
                 self.sparse_params.append(p)
     if self.method == 'sgd':
         self.optimizer = optim.SGD(self.params, lr=self.learning_rate)
     elif self.method == 'adagrad':
         self.optimizer = optim.Adagrad(self.params, lr=self.learning_rate)
         for group in self.optimizer.param_groups:
             for p in group['params']:
                 self.optimizer.state[p]['sum'] = self.optimizer\
                     .state[p]['sum'].fill_(self.adagrad_accum)
     elif self.method == 'adadelta':
         self.optimizer = optim.Adadelta(self.params, lr=self.learning_rate)
     elif self.method == 'adam':
         self.optimizer = optim.Adam(self.params, lr=self.learning_rate,
                                     betas=self.betas, eps=1e-9)
     elif self.method == 'sparseadam':
         self.optimizer = MultipleOptimizer(
             [optim.Adam(self.params, lr=self.learning_rate,
                         betas=self.betas, eps=1e-8),
              optim.SparseAdam(self.sparse_params, lr=self.learning_rate,
                               betas=self.betas, eps=1e-8)])
     else:
         raise RuntimeError("Invalid optim method: " + self.method)
Esempio n. 12
0
    def __init__(self,
                 root_dir='./VolSegData/',
                 checkpoint_dir='./checkpoints/',
                 batch_size=10,
                 shuffle=True,
                 num_workers=0,
                 num_epochs=2,
                 load_from=None):

        self.model = sc.Net.create()

        if load_from:
            self.model.load_state_dict(torch.load(load_from), strict=False)

        self.dataloader = dl.LitsDataSet.create(root_dir=root_dir,
                                                batch_size=batch_size,
                                                shuffle=True,
                                                num_workers=0)
        self.criterion = nn.MSELoss()
        self.optimizer = optim.SparseAdam(self.model.parameters(),
                                          lr=0.001,
                                          betas=(0.9, 0.999),
                                          eps=1e-08)
        self.checkpoint = checkpoint_dir
        self.num_epochs = num_epochs
Esempio n. 13
0
    def _initialize(self, interactions):

        (self._num_users, self._num_items) = (interactions.num_users,
                                              interactions.num_items)

        if self._representation is not None:
            self._net = gpu(self._representation, self._use_cuda)
        else:
            self._net = gpu(
                BilinearNet(self._num_users,
                            self._num_items,
                            self._embedding_dim,
                            sparse=self._sparse), self._use_cuda)

        if self._optimizer_func is None:
            if self._sparse:
                self._optimizer = optim.SparseAdam(self._net.parameters())
            else:
                self._optimizer = optim.Adam(self._net.parameters(),
                                             weight_decay=self._l2,
                                             lr=self._learning_rate)
        else:
            self._optimizer = self._optimizer_func(self._net.parameters())

        if self._loss == 'pointwise':
            self._loss_func = pointwise_loss
        elif self._loss == 'bpr':
            self._loss_func = bpr_loss
        elif self._loss == 'hinge':
            self._loss_func = hinge_loss
        else:
            self._loss_func = adaptive_hinge_loss
Esempio n. 14
0
    def train(self):

        optimizer = optim.SparseAdam(self.skip_gram_model.parameters(),
                                     lr=self.initial_lr)
        path_to_save = os.path.join(self.args["output_folder"],
                                    "word_vectors.npy")

        for iteration in range(self.iterations):

            running_loss = 0.0
            for i, sample_batched in enumerate(self.dataloader):

                if len(sample_batched[0]) > 1:
                    pos_u = sample_batched[0].to(self.device)
                    pos_v = sample_batched[1].to(self.device)
                    neg_v = sample_batched[2].to(self.device)

                    optimizer.zero_grad()

                    loss = self.skip_gram_model.forward(pos_u, pos_v, neg_v)
                    loss.backward()
                    optimizer.step()

                    running_loss = running_loss * 0.9 + loss.item() * 0.1

            if (iteration + 1) % self.args["print_every"] == 0:
                print("Iter {}: Loss: {}".format(iteration, running_loss))

            self.skip_gram_model.save_embedding(path_to_save)

        json.dump(
            self.id2token,
            open(os.path.join(self.args["output_folder"], "index2word.json"),
                 'w'))
Esempio n. 15
0
def get_optim(lr):
    if args.optim == 'adam':
        optimizer = optim.Adam(model.parameters(),
                               lr=lr,
                               eps=1e-9,
                               betas=[0.9, 0.98])
    if args.optim == 'sparseadam':
        optimizer = optim.SparseAdam(model.parameters(),
                                     lr=lr,
                                     eps=1e-9,
                                     betas=[0.9, 0.98])
    if args.optim == 'adamax':
        optimizer = optim.Adamax(model.parameters(),
                                 lr=lr,
                                 eps=1e-9,
                                 betas=[0.9, 0.98])
    elif args.optim == 'rmsprop':
        optimizer = optim.RMSprop(model.parameters(),
                                  lr=lr,
                                  eps=1e-9,
                                  momentum=0.9)
    elif args.optim == 'sgd':
        optimizer = optim.SGD(model.parameters(), lr=lr)  # 0.01
    elif args.optim == 'adagrad':
        optimizer = optim.Adagrad(model.parameters(), lr=lr)
    elif args.optim == 'adadelta':
        optimizer = optim.Adadelta(model.parameters(), lr=lr)

    return optimizer
Esempio n. 16
0
    def train(self):

        for iteration in range(self.iterations):
            print("\n\n\nIteration: " + str(iteration + 1))
            optimizer = optim.SparseAdam(self.skip_gram_model.parameters(),
                                         lr=self.initial_lr)
            scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
                optimizer, len(self.dataloader))

            running_loss = 0.0
            for i, sample_batched in enumerate(tqdm(self.dataloader)):

                if len(sample_batched[0]) > 1:
                    pos_u = sample_batched[0].to(self.device)
                    pos_v = sample_batched[1].to(self.device)
                    neg_v = sample_batched[2].to(self.device)

                    scheduler.step()
                    optimizer.zero_grad()
                    loss = self.skip_gram_model.forward(pos_u, pos_v, neg_v)
                    loss.backward()
                    optimizer.step()

                    running_loss = running_loss * 0.9 + loss.item() * 0.1
                    if i > 0 and i % 500 == 0:
                        print(" Loss: " + str(running_loss))

            self.skip_gram_model.save_embedding(self.data.id2word,
                                                self.output_file_name)
Esempio n. 17
0
    def _build_optimizer(self):
        r"""Init the Optimizer

        Returns:
            torch.optim: the optimizer
        """
        if self.learner.lower() == 'adam':
            optimizer = optim.Adam(self.model.parameters(),
                                   lr=self.learning_rate)
        elif self.learner.lower() == 'sgd':
            optimizer = optim.SGD(self.model.parameters(),
                                  lr=self.learning_rate)
        elif self.learner.lower() == 'adagrad':
            optimizer = optim.Adagrad(self.model.parameters(),
                                      lr=self.learning_rate)
        elif self.learner.lower() == 'rmsprop':
            optimizer = optim.RMSprop(self.model.parameters(),
                                      lr=self.learning_rate)
        elif self.learner.lower() == 'sparse_adam':
            optimizer = optim.SparseAdam(self.model.parameters(),
                                         lr=self.learning_rate)
        else:
            self.logger.warning(
                'Received unrecognized optimizer, set default Adam optimizer')
            optimizer = optim.Adam(self.model.parameters(),
                                   lr=self.learning_rate)
        return optimizer
Esempio n. 18
0
def main():
    user = os.path.expanduser("~")
    user = os.path.join(user, 'PycharmProjects/Parametric_GT')

    current_dataset = 'caltech'
    max_epochs = 20
    batch_size = 8

    dataset, stats, number_of_classes = misc(user, current_dataset)
    dataset_train = os.path.join(dataset, 'train_labelled0.1')
    dataset_test = os.path.join(dataset, 'test')

    nets_and_features = create_dict_nets_and_features()
    net_types = ['resnet18']
    out_dir = os.path.join(os.path.join(os.path.join(user, 'Results'), current_dataset), 'nets')

    for net_type in net_types:
        inception = net_type == 'inception'
        train_loader = prepare_loader_train(dataset_train, stats, batch_size)
        test_loader = prepare_loader_val(dataset_test, stats, batch_size)

        net, feature_size = create_net(number_of_classes, nets_and_features, net_type=net_type)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.SparseAdam(net.parameters(), lr=1e-4)

        best_net = train(net, net_type, train_loader, test_loader, optimizer, criterion, max_epochs, out_dir)

        net.load_state_dict(torch.load(best_net))
        net_accuracy = evaluate(net, test_loader)
        print('Accuracy: ' + str(net_accuracy))
Esempio n. 19
0
    def __init__(
                    self,
                    root_dir = ["/data/keshav/CT/trainbatch/trainbatch1/","/data/keshav/CT/trainbatch/trainbatch2/"],
                    checkpoint_dir = './checkpoints/',
                    batch_size = 10,
                    shuffle = True,
                    num_workers = 0,
                    num_epochs = 2,
                    load_from = None
                ):

        self.model = sc.Net.create()
        self.batch_size = batch_size

        if load_from:
            self.model.load_state_dict(torch.load(load_from),strict=False)

        self.dataloader = LITS.LitsDataLoader.create(root_dir = root_dir, batch_size = batch_size,
                                            shuffle = False, num_workers = 0)
        self.criterion = nn.MSELoss()
        self.optimizer = optim.SparseAdam(self.model.parameters(), lr=0.001,
                                            betas=(0.9, 0.999), eps=1e-08)
        self.checkpoint = checkpoint_dir
        self.num_epochs = num_epochs
        self.N = 0
        self.epoch_loss = []
        self.batch_loss = []
        self.scan_loss = []
        self.test_loss = 0.0
        self.train_loss = 0.0
        self.N_test = 0
Esempio n. 20
0
    def optimizer_reset(self, learning_rate):
        self.learning_rate = learning_rate

        if self.optimizer_type == "Adam":
            self.optimizer = optim.Adam(self.dense_parameters,
                                        lr=learning_rate,
                                        weight_decay=self.l2_lambda)

            if len(self.sparse_parameters) > 0:
                self.sparse_optimizer = optim.SparseAdam(
                    self.sparse_parameters, lr=learning_rate)
            else:
                self.sparse_optimizer = None
        elif self.optimizer_type == "SGD":
            self.optimizer = optim.SGD(self.dense_parameters,
                                       lr=learning_rate,
                                       weight_decay=self.l2_lambda)
            if len(self.sparse_parameters) > 0:
                self.sparse_optimizer = optim.SGD(self.sparse_parameters,
                                                  lr=learning_rate)
            else:
                self.sparse_optimizer = None

        if the_gpu() >= 0:
            recursively_set_device(self.optimizer.state_dict(), the_gpu())
            if self.sparse_optimizer is not None:
                recursively_set_device(self.sparse_optimizer.state_dict(),
                                       the_gpu())
Esempio n. 21
0
    def _build_optimizer(self, params):
        r"""Init the Optimizer

        Returns:
            torch.optim: the optimizer
        """
        if self.config['reg_weight'] and self.weight_decay and self.weight_decay * self.config['reg_weight'] > 0:
            self.logger.warning(
                'The parameters [weight_decay] and [reg_weight] are specified simultaneously, '
                'which may lead to double regularization.'
            )

        if self.learner.lower() == 'adam':
            optimizer = optim.Adam(params, lr=self.learning_rate, weight_decay=self.weight_decay)
        elif self.learner.lower() == 'sgd':
            optimizer = optim.SGD(params, lr=self.learning_rate, weight_decay=self.weight_decay)
        elif self.learner.lower() == 'adagrad':
            optimizer = optim.Adagrad(params, lr=self.learning_rate, weight_decay=self.weight_decay)
        elif self.learner.lower() == 'rmsprop':
            optimizer = optim.RMSprop(params, lr=self.learning_rate, weight_decay=self.weight_decay)
        elif self.learner.lower() == 'sparse_adam':
            optimizer = optim.SparseAdam(params, lr=self.learning_rate)
            if self.weight_decay > 0:
                self.logger.warning('Sparse Adam cannot argument received argument [{weight_decay}]')
        else:
            self.logger.warning('Received unrecognized optimizer, set default Adam optimizer')
            optimizer = optim.Adam(params, lr=self.learning_rate)
        return optimizer
Esempio n. 22
0
    def _build_optimizer(self, params):
        r"""Init the Optimizer

        Returns:
            torch.optim: the optimizer
        """
        if self.learner.lower() == 'adam':
            optimizer = optim.Adam(params,
                                   lr=self.learning_rate,
                                   weight_decay=self.weight_decay)
        elif self.learner.lower() == 'sgd':
            optimizer = optim.SGD(params,
                                  lr=self.learning_rate,
                                  weight_decay=self.weight_decay)
        elif self.learner.lower() == 'adagrad':
            optimizer = optim.Adagrad(params,
                                      lr=self.learning_rate,
                                      weight_decay=self.weight_decay)
        elif self.learner.lower() == 'rmsprop':
            optimizer = optim.RMSprop(params,
                                      lr=self.learning_rate,
                                      weight_decay=self.weight_decay)
        elif self.learner.lower() == 'sparse_adam':
            optimizer = optim.SparseAdam(params, lr=self.learning_rate)
            if self.weight_decay > 0:
                self.logger.warning(
                    'Sparse Adam cannot argument received argument [{weight_decay}]'
                )
        else:
            self.logger.warning(
                'Received unrecognized optimizer, set default Adam optimizer')
            optimizer = optim.Adam(params, lr=self.learning_rate)
        return optimizer
Esempio n. 23
0
    def train(self):
        """Train the network with the settings used to initialise the Trainer
		
		"""
        for epoch in range(self.epochs):
            print("### Epoch: " + str(epoch))
            optimizer = optim.SparseAdam(self.skipgram.parameters(),
                                         lr=self.initial_lr)
            scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
                optimizer, len(self.dataloader))

            running_loss = 0.0
            for sample_batched in tqdm(self.dataloader):

                if len(sample_batched[0]) > 1:
                    pos_target = sample_batched[0].to(self.device)
                    pos_context = sample_batched[1].to(self.device)
                    neg_context = sample_batched[2].to(self.device)

                    optimizer.zero_grad()
                    loss = self.skipgram.forward(
                        pos_target, pos_context, neg_context
                    )  # the loss is integrated into the forward function
                    loss.backward()
                    optimizer.step()
                    scheduler.step()

                    running_loss = running_loss * 0.9 + loss.item() * 0.1
            print(" Loss: " + str(running_loss))

        final_embeddings = self.skipgram.target_embeddings.weight.cpu(
        ).data.numpy()
        save_graph_embeddings(self.corpus, final_embeddings, self.output_fh)
Esempio n. 24
0
def get_optimizer(model, name, hyper_parameters):
    """
        name can be:
            'Adam',
            'SparseAdam',
            'Adamax',
            'RMSprop',
    """
    optimizer = None
    if name == "adam":
        optimizer = optim.Adam(model.parameters(), lr=hyper_parameters["lr"])

    elif name == "sparseadam":
        optimizer = optim.SparseAdam(model.parameters(),
                                     lr=hyper_parameters["lr"],
                                     betas=eval(hyper_parameters["betas"]),
                                     eps=hyper_parameters["eps"])

    elif name == "adamax":
        optimizer = optim.Adamax(model.parameters(),
                                 lr=hyper_parameters["lr"],
                                 betas=hyper_parameters["betas"],
                                 eps=hyper_parameters["eps"],
                                 weight_decay=hyper_parameters["weight_decay"])

    elif name == "sgd":
        optimizer = optim.SGD(model.parameters(),
                              lr=hyper_parameters["lr"],
                              momentum=hyper_parameters["momentum"],
                              weight_decay=hyper_parameters["weight_decay"])

    return optimizer
Esempio n. 25
0
    def get_optimizers(self, args, checkpoint):

        optimizers = list()

        if args.encoder_lr > 0:
            optimizer_encoder = optim.Adam(
                list(self.bert.parameters()) +
                list(self.fc.parameters() if args.project else list()),
                lr=args.encoder_lr,
            )
            if args.resume_from_checkpoint is not None:
                optimizer_encoder.load_state_dict(
                    checkpoint["optimizer_dense"])
                optimizer_encoder.param_groups[0]["lr"] = args.encoder_lr
                optimizer_encoder.param_groups[0][
                    "weight_decay"] = args.encoder_weight_decay
            optimizers.append(optimizer_encoder)
        else:
            optimizers.append(
                DummyOptimizer(self.out.parameters(), defaults={}))

        if args.decoder_lr > 0:
            if args.sparse:
                optimizer_decoder = optim.SparseAdam(self.out.parameters(),
                                                     lr=args.decoder_lr)
            else:
                optimizer_decoder = optim.Adam(self.out.parameters(),
                                               lr=args.decoder_lr)
            if args.resume_from_checkpoint is not None:
                optimizer_decoder.load_state_dict(
                    checkpoint["optimizer_sparse"])
                if "weight_decay" not in optimizer_decoder.param_groups[0]:
                    optimizer_decoder.param_groups[0]["weight_decay"] = 0
                optimizer_decoder.param_groups[0]["lr"] = args.decoder_lr
                if not args.sparse:
                    optimizer_decoder.param_groups[0][
                        "weight_decay"] = args.decoder_weight_decay
            optimizers.append(optimizer_decoder)
        else:
            optimizers.append(
                DummyOptimizer(self.out.parameters(), defaults={}))

        lr_schedulers = [
            getattr(LRSchedulers, lr_scheduler)(optimizer=optimizer,
                                                **lr_scheduler_config)
            for optimizer, (lr_scheduler, lr_scheduler_config) in zip(
                optimizers,
                [
                    (args.encoder_lr_scheduler,
                     args.encoder_lr_scheduler_config),
                    (args.decoder_lr_scheduler,
                     args.decoder_lr_scheduler_config),
                ],
            )
            # and not isinstance(optimizer, DummyOptimizer)
            if lr_scheduler is not None
        ]

        return tuple(optimizers), tuple(lr_schedulers)
Esempio n. 26
0
    def train(self):
        if self.optimizer == 'adam':
            optimizer = optim.Adam(self.skip_gram_model.parameters(),
                                   lr=self.initial_lr,
                                   **self.optimizer_kwargs)
        elif self.optimizer == 'sparse_adam':
            optimizer = optim.SparseAdam(self.skip_gram_model.parameters(),
                                         lr=self.initial_lr,
                                         **self.optimizer_kwargs)
        elif self.optimizer == 'sgd':
            optimizer = optim.SGD(self.skip_gram_model.parameters(),
                                  lr=self.initial_lr,
                                  **self.optimizer_kwargs)
        elif self.optimizer == 'asgd':
            optimizer = optim.ASGD(self.skip_gram_model.parameters(),
                                   lr=self.initial_lr,
                                   **self.optimizer_kwargs)
        elif self.optimizer == 'adagrad':
            optimizer = optim.Adagrad(self.skip_gram_model.parameters(),
                                      lr=self.initial_lr,
                                      **self.optimizer_kwargs)
        else:
            raise Exception('Unknown optimizer!')

        for iteration in range(self.iterations):

            print("\n\n\nIteration: " + str(iteration + 1))

            if self.lr_schedule:
                scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
                    optimizer, len(self.dataloader))
            running_loss = 0.0
            iprint = len(self.dataloader) // 20
            for i, sample_batched in enumerate(tqdm(self.dataloader)):

                if len(sample_batched[0]) > 1:
                    pos_u = sample_batched[0].to(self.device)
                    pos_v = sample_batched[1].to(self.device)
                    neg_v = sample_batched[2].to(self.device)

                    optimizer.zero_grad()
                    loss = self.skip_gram_model.forward(pos_u, pos_v, neg_v)
                    loss.backward()
                    optimizer.step()
                    if self.lr_schedule:
                        scheduler.step()

                    running_loss = running_loss * (
                        1 - 5 / iprint) + loss.item() * (5 / iprint)
                    if i > 0 and i % iprint == 0:
                        print(" Loss: " + str(running_loss) + ' lr: ' + str([
                            param_group['lr']
                            for param_group in optimizer.param_groups
                        ]))
            print(" Loss: " + str(running_loss))

            self.skip_gram_model.save_embedding(self.data.id2word,
                                                self.output_file_name)
Esempio n. 27
0
def train_skipgram(model, docs):
    ''' train skipgram model'''

    torch.manual_seed(42)
    np.random.seed(42)

    # set optimizer HYPERPARAMS?
    optimizer = optim.SparseAdam(model.parameters())

    # batch size
    batch_size = 512

    # get pdf for negative sampling
    pdf = model.get_neg_sample_pdf(model.counter)

    # epoch
    for epoch in range(model.nr_epochs):
        print(f"Epoch: {epoch}")

        # get batch of positive and negative examples
        for step, (pos_batch, neg_batch) in enumerate(
                get_batches(model, docs, batch_size, pdf)):
            optimizer.zero_grad()

            # extract words
            pos_u = [x[0].item() for x in pos_batch]
            pos_v = [x[1].item() for x in pos_batch]
            neg_v = neg_batch

            # forward pass
            loss = model.forward(pos_u, pos_v, neg_v)

            if step % 50 == 0:
                print(f'at step {step}: loss: {loss.item()}')

            # save every 1000 steps
            if step & 1000 == 0:
                if not os.path.exists('./models'):
                    os.mkdir('./models')
                    torch.save(model.state_dict(),
                               f'./models/trained_w2v_bs_256_thr_120.pt')

            # backprop
            loss.backward()
            optimizer.step()

        # save model
        if not os.path.exists('./models'):
            os.mkdir('./models')
        torch.save(model.state_dict(),
                   f'./models/trained_w2v_bs_256_thr_120.pt')

    # aggregate all docs to embeddings for retrieval
    print('Done with training. \nConverting all documents to embeddings.',
          'This may take a while.')
    model.aggregate_all_docs()
    print('Done with converting all docs')
Esempio n. 28
0
    def train(self, model: nn.Module, train_interactions: np.ndarray,
              test_interactions: np.ndarray, is_sparse: bool):

        optimizer: optim.Optimizer

        if is_sparse:
            optimizer = optim.SparseAdam(model.parameters(), lr=self.LR)
        else:
            optimizer = optim.Adam(model.parameters(),
                                   lr=self.LR,
                                   weight_decay=self.WEIGHT_DECAY)

        train_loss_history = []
        test_loss_history = []

        train_dataset = get_dataset(train_interactions)
        test_dataset = get_dataset(test_interactions)
        test_users, test_movies, test_ratings = test_dataset.tensors

        data_loader = DataLoader(train_dataset, batch_size=self.BATCH_SIZE)

        model.to(DEVICE)

        for epoch in tqdm(range(0, self.EPOCHS), desc='Training'):
            train_loss = 0

            for users_batch, movies_batch, ratings_batch in data_loader:
                optimizer.zero_grad()

                prediction = model(users_batch, movies_batch)
                loss = self.loss(prediction, ratings_batch)

                for regularizer in self.regularizers:
                    loss += regularizer(prediction)

                loss.backward()
                optimizer.step()

                train_loss += loss.item()

            test_prediction = model(test_users, test_movies)
            test_loss = self.loss(test_prediction, test_ratings).item()
            for regularizer in self.regularizers:
                test_loss += regularizer(test_prediction).item()

            train_loss /= len(data_loader)

            train_loss_history.append(train_loss)
            test_loss_history.append(test_loss)

            if self.VERBOSE:
                msg = f'Train loss: {train_loss:.3f}, '
                msg += f'Test loss: {test_loss:.3f}'
                tqdm.write(msg)

        return train_loss_history, test_loss_history
Esempio n. 29
0
def build_optimizer(model, args, reload=False):
    optimizer_sparse = None
    if args.optim.lower() == 'sgd':
        if args.sample_softmax > 0:
            dense_params, sparse_params = [], []
            for param in model.parameters():
                if param.size() == model.word_emb.weight.size():
                    sparse_params.append(param)
                else:
                    dense_params.append(param)
            optimizer_sparse = optim.SGD(sparse_params, lr=args.lr * 2)
            optimizer = optim.SGD(dense_params, lr=args.lr, momentum=args.mom)
        else:
            optimizer = optim.SGD(model.parameters(), lr=args.lr,
                                  momentum=args.mom)
    elif args.optim.lower() == 'adam':
        if args.sample_softmax > 0:
            dense_params, sparse_params = [], []
            for param in model.parameters():
                if param.size() == model.word_emb.weight.size():
                    sparse_params.append(param)
                else:
                    dense_params.append(param)
            optimizer_sparse = optim.SparseAdam(sparse_params, lr=args.lr)
            optimizer = optim.Adam(dense_params, lr=args.lr)
        else:
            optimizer = optim.Adam(model.parameters(), lr=args.lr)
    elif args.optim.lower() == 'adagrad':
        optimizer = optim.Adagrad(model.parameters(), lr=args.lr)
    else:
        raise ValueError(f"optimizer type {args.optim} not recognized")

    if reload:
        if args.restart_from is not None:
            optim_name = f'optimizer_{args.restart_from}.pt'
        else:
            optim_name = 'optimizer.pt'
        optim_file_name = os.path.join(args.restart_dir, optim_name)
        logging(f"reloading {optim_file_name}")
        if os.path.exists(os.path.join(args.restart_dir, optim_name)):
            with open(os.path.join(args.restart_dir, optim_name), 'rb') as optim_file:
                opt_state_dict = torch.load(optim_file)
                try:
                    optimizer.load_state_dict(opt_state_dict)
                # in case the optimizer param groups aren't the same shape, merge them
                except:
                    logging("merging optimizer param groups")
                    opt_state_dict["param_groups"][0]["params"] \
                        = [param for param_group in opt_state_dict["param_groups"] for param in param_group["params"]]
                    opt_state_dict["param_groups"] = [opt_state_dict["param_groups"][0]]
                    optimizer.load_state_dict(opt_state_dict)
        else:
            logging('Optimizer was not saved. Start from scratch.')

    return optimizer, optimizer_sparse
Esempio n. 30
0
    def train(self,
              training_files,
              output_file,
              num_epochs=100,
              init=weightInit.fromScratch,
              model_path=None):

        losses = list()
        dataloader = self.initDataLoader(training_files)

        self.weightInitialisation(init, saved_model_path=model_path)
        self.initDevice()

        for iteration in tqdm(range(num_epochs)):

            optimizer = optim.SparseAdam(self.skip_gram_model.parameters(),
                                         lr=self.initial_lr)
            scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
                optimizer, len(dataloader))

            count = 0.0
            running_loss = 0.0
            cumulative_loss = 0.0

            for i, sample_batched in enumerate(dataloader):

                if len(sample_batched[0]) > 1:
                    pos_u = sample_batched[0].to(self.device)
                    pos_v = sample_batched[1].to(self.device)
                    neg_v = sample_batched[2].to(self.device)

                    scheduler.step()
                    optimizer.zero_grad()
                    loss = self.skip_gram_model.forward(pos_u, pos_v, neg_v)
                    loss.backward()
                    optimizer.step()

                    running_loss = running_loss * 0.9 + loss.item() * 0.1
                    cumulative_loss += loss.item()

                    count += 1.0

            losses.append(cumulative_loss / count)
            self.iter_per_epoch = int(count * self.batch_size)

        # write to vectors
        #if torch.cuda.device_count() > 1:
        #self.skip_gram_model.module.save_embedding(self.data.id2word, output_file, self.data.max_num_words_file)
        #else:
        #self.skip_gram_model.save_embedding(self.data.id2word, output_file, self.data.max_num_words_file)

        self.skip_gram_model.save_embedding(self.data.id2word, output_file,
                                            self.data.max_num_words_file)
        return losses