Example #1
0
    def _init_training(self, model):
        model = self._parallel_to_device(model)

        # define the param to optimize.
        params = [
            {
                "params": [value],
                "name": key,
                "weight_decay": self.conf.weight_decay,
                "param_size": value.size(),
                "nelement": value.nelement(),
                "lr": self.conf.lr,
            }
            for key, value in model.named_parameters()
            if value.requires_grad
        ]

        # create the optimizer.
        if self.conf.optimizer == "adam":
            opt = optim.Adam(
                params,
                lr=self.conf.lr,
                betas=(0.9, 0.999),
                eps=1e-8,
                weight_decay=self.conf.weight_decay,
            )
        else:
            raise NotImplementedError("this optimizer is not supported yet.")
        opt.zero_grad()
        model.zero_grad()
        self.log_fn(f"Initialize the optimizer: {self.conf.optimizer}")
        return opt, model
Example #2
0
    def _init_training(self, model):
        model = self._parallel_to_device(model)

        # define the param to optimize.
        params = [{
            "params": [value],
            "name":
            key,
            "weight_decay":
            self.conf.weight_decay,
            "param_size":
            value.size(),
            "nelement":
            value.nelement(),
            "lr":
            self.conf.lr_for_mask if self.conf.lr_for_mask is not None
            and "mask" in key else self.conf.lr,
        } for key, value in model.named_parameters() if value.requires_grad]

        # create the optimizer.
        if self.conf.optimizer == "adam":
            opt = optim.Adam(
                params,
                lr=self.conf.lr,
                betas=(self.conf.adam_beta_1, self.conf.adam_beta_2),
                eps=self.conf.adam_eps,
                weight_decay=self.conf.weight_decay,
            )
        elif self.conf.optimizer == "sgd":
            opt = torch.optim.SGD(
                params,
                lr=self.conf.lr,
                momentum=self.conf.momentum_factor,
                weight_decay=self.conf.weight_decay,
                nesterov=self.conf.use_nesterov,
            )
        elif self.conf.optimizer == "signsgd":
            opt = optim.SignSGD(
                params,
                lr=self.conf.lr,
                momentum=self.conf.momentum_factor,
                weight_decay=self.conf.weight_decay,
                nesterov=self.conf.use_nesterov,
            )
        else:
            raise NotImplementedError("this optimizer is not supported yet.")
        opt.zero_grad()
        model.zero_grad()
        self.log_fn(f"Initialize the optimizer: {self.conf.optimizer}")
        return opt, model
Example #3
0
        def test_sample(filename):
            with open(filename, 'r') as o:
                lr = float(o.readline().strip())
                p, q, r = list(map(int, o.readline().split()))
                W = np.zeros([p, q])
                b = np.zeros([r])

                for i in range(p):
                    line = list(map(float, o.readline().split()))
                    W[i, :] = line
                line = list(map(float, o.readline().split()))
                b[:] = line

                n = int(o.readline().strip())
                grads_flat = np.zeros([n, p * q + r])
                W_ans = np.zeros([p, q])
                b_ans = np.zeros([r])
                m_ans = np.zeros([p * q + r])
                v_ans = np.zeros([p * q + r])

                for i in range(n):
                    line = list(map(float, o.readline().split()))
                    grads_flat[i, :] = line
                for i in range(p):
                    line = list(map(float, o.readline().split()))
                    W_ans[i, :] = line
                line = list(map(float, o.readline().split()))
                b_ans[:] = line
                line = list(map(float, o.readline().split()))
                m_ans[:] = line
                line = list(map(float, o.readline().split()))
                v_ans[:] = line

            model = TestModel(W, b)
            optimizer = optim.Adam(model, lr)
            for i in range(n):
                optimizer.update(grads_flat[i])

            self.assertTrue(
                np.isclose(model.params()[0], W_ans).all(), filename)
            self.assertTrue(
                np.isclose(model.params()[1], b_ans).all(), filename)
            self.assertTrue(np.isclose(optimizer.m, m_ans).all(), filename)
            self.assertTrue(np.isclose(optimizer.v, v_ans).all(), filename)
Example #4
0
    def __init__(self, config_file, config):
        super().__init__(cache=False)

        self.train_language_list = config.train_language_list
        self.n_train_lang = len(config.train_language_list)

        self.main_dev_list = config.main_dev_language_list
        self.n_main_dev_lang = len(config.main_dev_language_list)

        self.add_dev_list = config.add_dev_language_list
        self.n_add_dev_lang = len(config.add_dev_language_list)

        self.subwords_to_vectors = data.get_subwords_to_vectors(
            config.feature_fn)

        self.data = data.MultilangDataset(
            feats_fns=config.train_feats,
            align_fns=config.train_align,
            vocab_fns=config.train_vocab,
            subwords=config.subwords,
            subwords_to_vectors=self.subwords_to_vectors,
            min_occ_count=config.train_min_occ_count,
            min_seg_dur=config.train_min_seg_dur,
            stack_frames=config.stack_frames,
            batch_size=config.train_batch_size,
            shuffle=config.shuffle,
            cache=self.cache)

        # statistics
        train_subwords = set(
            data.combine_subwords_to_ids(config.train_vocab,
                                         config.subwords).keys())
        log.info(f"Using {len(train_subwords)} subwords in training")

        # dev sets for all training languages
        self.dev_datasets = []

        for i in range(self.n_main_dev_lang + self.n_add_dev_lang):
            data_dev = data.DevDataset(
                feats=config.dev_feats[i],
                align=config.dev_align[i],
                vocab=config.dev_vocab[i],
                subwords=config.subwords,
                min_occ_count=config.dev_min_occ_count,
                min_seg_dur=config.dev_min_seg_dur,
                stack_frames=config.stack_frames,
                batch_size=config.dev_batch_size,
                cache=self.cache,
                subwords_to_vectors=self.subwords_to_vectors)

            self.dev_datasets.append(data_dev)

            # statistics
            if i < self.n_main_dev_lang:
                this_lang = self.main_dev_list[i]
            else:
                this_lang = self.add_dev_list[i - self.n_main_dev_lang]

            this_subwords = set(
                data.combine_subwords_to_ids([config.dev_vocab[i]],
                                             config.subwords))
            log.info(
                f"language {this_lang} has {len(this_subwords)} subwords, "
                f"intersect {len(train_subwords.intersection(this_subwords))} subwords"
            )

        loss_fun = loss.Obj02(margin=config.loss_margin, k=config.loss_k)

        self.net = net.MultiViewRNN_Phonetic(
            config=config,
            feat_dim=self.data.feat_dim,
            phone_feat_dim=self.data.phone_feat_dim,
            loss_fun=loss_fun,
            use_gpu=True)

        self.optim = optim.Adam(params=self.net.parameters(),
                                lr=config.adam_lr)

        self.sched = sched.RevertOnPlateau(network=self.net,
                                           optimizer=self.optim,
                                           mode=config.mode,
                                           factor=config.factor,
                                           patience=config.patience,
                                           min_lr=config.min_lr)

        save_dir = os.path.join(expt_dir, "save")
        os.makedirs(save_dir, exist_ok=True)
        self.set_savepaths(save_dir=save_dir)
        self.save_dir = save_dir

        self.config_file = config_file
        self.config = config
Example #5
0
# load embedding
model.embed.lut.weight = nn.Parameter(TEXT.vocab.vectors)

device = th.device('cuda:0')
model = model.to(device)

embed_params, other_params, wd_params = unpack_params(model.named_parameters())

optimizer = get_wrapper(config['opt_wrapper'])(optim.Adam([{
    'params': embed_params,
    'lr': 0
}, {
    'params':
    other_params,
    'lr':
    config.get('lr', 1e-3)
}, {
    'params':
    wd_params,
    'lr':
    config.get('lr', 1e-3),
    'weight_decay':
    5e-5
}]), **config.get('opt_attrs', {}))

best_val, test_acc = 1e9, 0

for epoch in range(config['n_epochs']):
    import time
    tic = time.time()
    print('epoch {}'.format(epoch))
    print('training...')
Example #6
0
                            n_layers, m_layers,
                            dropouti=config['dropouti'], dropouth=config['dropouth'],
                            dropouta=config['dropouta'], dropoutc=config['dropoutc'],
                            rel_pos=config['rel_pos'])

# load embedding
model.embed.lut.weight = nn.Parameter(TEXT.vocab.vectors)

device = th.device('cuda:0')
model = model.to(device)

embed_params, other_params, wd_params = unpack_params(model.named_parameters())

optimizer = get_wrapper(config['opt_wrapper'])(
    optim.Adam([
        {'params': embed_params, 'lr': 0},
        {'params': other_params, 'lr': config.get('lr', 1e-3)},
        {'params': wd_params, 'lr': config.get('lr', 1e-3), 'weight_decay': 5e-5}]))

best_val, test_acc = 1e9, 0

for epoch in range(config['n_epochs']):
    print('epoch {}'.format(epoch))
    print('training...')
    model.train()
    n_tokens = 0
    sum_loss = 0
    hit = 0
    for i, batch in enumerate(train_loader):
        batch.y = batch.y.to(device)
        batch.g.edata['etype'] = batch.g.edata['etype'].to(device)
        batch.g.ndata['x'] = batch.g.ndata['x'].to(device)
if __name__ == '__main__':
    trainloader = Cifar10('./data/cifar-10-batches-py/',
                          batch_size=32,
                          phase='train',
                          shuffle='True')
    testloader = Cifar10('./data/cifar-10-batches-py/',
                         batch_size=100,
                         phase='test',
                         shuffle='False')

    model = Model()
    model.initialize()
    criterion = nn.CrossEntropyLoss()
    # optimizer = optim.SGD(model.parameters(), lr=1e-3, momentum=0.9, weight_decay=5-4)
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    epoch_list, train_loss_list, train_acc_list, test_loss_list, test_acc_list = [], [], [], [], []
    early_stop = 3
    for i in range(100):
        train_loss, train_acc = train(model, trainloader, criterion, optimizer)
        test_loss, test_acc = test(model, testloader, criterion)

        epoch_list.append(i)
        train_loss_list.append(train_loss)
        train_acc_list.append(train_acc)
        test_loss_list.append(test_loss)
        test_acc_list.append(test_acc)

        if len(test_acc_list) > 1 and test_acc < test_acc_list[-2]:
            early_stop -= 1