예제 #1
0
 def __init__(self,
              corpus_data_0,
              corpus_data_1,
              *,
              params,
              n_samples=10000000):
     self.skip_gram = [
         SkipGram(corpus_data_0.vocab_size + 1, params.emb_dim).to(GPU),
         SkipGram(corpus_data_1.vocab_size + 1, params.emb_dim).to(GPU)
     ]
     self.perm = Permutation(params.emb_dim,
                             params.p_sample_top,
                             n_units=params.p_n_units,
                             batch_norm=params.p_bn).to(GPU)
     self.sampler = [
         WordSampler(corpus_data_0.dic,
                     n_urns=n_samples,
                     alpha=params.p_sample_factor,
                     top=params.p_sample_top),
         WordSampler(corpus_data_1.dic,
                     n_urns=n_samples,
                     alpha=params.p_sample_factor,
                     top=params.p_sample_top)
     ]
     self.p_bs = params.p_bs
     self.p_sample_top = params.p_sample_top
     self.emb_dim = params.emb_dim
     self.vocab_size_0, self.vocab_size_1 = corpus_data_0.vocab_size, corpus_data_1.vocab_size
     self.perm_optimizer, self.perm_scheduler = optimizers.get_sgd_find_lr(
         self.perm.parameters(),
         lr=params.p_lr,
         wd=params.p_wd,
         momentum=params.p_momentum)
     self.entropy_loss = EntropyLoss()
예제 #2
0
    def __init__(self, corpus_data_0, corpus_data_1, *, params, n_samples=10000000):
        self.skip_gram = [SkipGram(corpus_data_0.vocab_size + 1, params.emb_dim).to(GPU),
                          SkipGram(corpus_data_1.vocab_size + 1, params.emb_dim).to(GPU)]
        self.discriminator = Discriminator(params.emb_dim, n_layers=params.d_n_layers, n_units=params.d_n_units,
                                           drop_prob=params.d_drop_prob, drop_prob_input=params.d_drop_prob_input,
                                           leaky=params.d_leaky, batch_norm=params.d_bn).to(GPU)
        self.mapping = nn.Linear(params.emb_dim, params.emb_dim, bias=False)
        self.mapping.weight.data.copy_(torch.diag(torch.ones(params.emb_dim)))
        self.mapping = self.mapping.to(GPU)
        self.sg_optimizer, self.sg_scheduler = [], []
        for id in [0, 1]:
            optimizer, scheduler = optimizers.get_sgd_adapt(self.skip_gram[id].parameters(),
                                                            lr=params.sg_lr, mode="max")
            self.sg_optimizer.append(optimizer)
            self.sg_scheduler.append(scheduler)
        self.a_optimizer, self.a_scheduler = [], []
        for id in [0, 1]:
            optimizer, scheduler = optimizers.get_sgd_adapt(
                [{"params": self.skip_gram[id].u.parameters()}, {"params": self.skip_gram[id].v.parameters()}],
                lr=params.a_lr, mode="max")
            self.a_optimizer.append(optimizer)
            self.a_scheduler.append(scheduler)
        if params.d_optimizer == "SGD":
            self.d_optimizer, self.d_scheduler = optimizers.get_sgd_adapt(self.discriminator.parameters(),
                                                                          lr=params.d_lr, mode="max", wd=params.d_wd)

        elif params.d_optimizer == "RMSProp":
            self.d_optimizer, self.d_scheduler = optimizers.get_rmsprop_linear(self.discriminator.parameters(),
                                                                               params.n_steps,
                                                                               lr=params.d_lr, wd=params.d_wd)
        else:
            raise Exception(f"Optimizer {params.d_optimizer} not found.")
        if params.m_optimizer == "SGD":
            self.m_optimizer, self.m_scheduler = optimizers.get_sgd_adapt(self.mapping.parameters(),
                                                                          lr=params.m_lr, mode="max", wd=params.m_wd)
        elif params.m_optimizer == "RMSProp":
            self.m_optimizer, self.m_scheduler = optimizers.get_rmsprop_linear(self.mapping.parameters(),
                                                                               params.n_steps,
                                                                               lr=params.m_lr, wd=params.m_wd)
        else:
            raise Exception(f"Optimizer {params.m_optimizer} not found")
        self.m_beta = params.m_beta
        self.smooth = params.smooth
        self.loss_fn = nn.BCEWithLogitsLoss(reduction="elementwise_mean")
        self.corpus_data_queue = [
            _data_queue(corpus_data_0, n_threads=(params.n_threads + 1) // 2, n_sentences=params.n_sentences,
                        batch_size=params.sg_bs),
            _data_queue(corpus_data_1, n_threads=(params.n_threads + 1) // 2, n_sentences=params.n_sentences,
                        batch_size=params.sg_bs)
        ]
        self.sampler = [
            WordSampler(corpus_data_0.dic, n_urns=n_samples, alpha=params.a_sample_factor, top=params.a_sample_top),
            WordSampler(corpus_data_1.dic, n_urns=n_samples, alpha=params.a_sample_factor, top=params.a_sample_top)]
        self.d_bs = params.d_bs
예제 #3
0
 def __init__(self,
              corpus_data_0,
              corpus_data_1,
              *,
              params,
              n_samples=10000000):
     self.skip_gram = [
         SkipGram(corpus_data_0.vocab_size + 1, params.emb_dim).to(GPU),
         SkipGram(corpus_data_1.vocab_size + 1, params.emb_dim).to(GPU)
     ]
     self.perm = Permutation(params.emb_dim,
                             params.p_sample_top,
                             n_units=params.p_n_units,
                             batch_norm=params.p_bn).to(GPU)
     self.sampler = [
         WordSampler(corpus_data_0.dic,
                     n_urns=n_samples,
                     alpha=params.p_sample_factor,
                     top=params.p_sample_top),
         WordSampler(corpus_data_1.dic,
                     n_urns=n_samples,
                     alpha=params.p_sample_factor,
                     top=params.p_sample_top)
     ]
     self.p_bs = params.p_bs
     self.i_bs = params.i_bs
     self.p_sample_top = params.p_sample_top
     self.emb_dim = params.emb_dim
     self.vocab_size_0, self.vocab_size_1 = corpus_data_0.vocab_size, corpus_data_1.vocab_size
     self.perm_optimizer, self.perm_scheduler = optimizers.get_sgd_adapt(
         self.perm.parameters(),
         lr=params.p_lr,
         mode="min",
         wd=params.p_wd,
         momentum=params.p_momentum,
         factor=params.p_lr_factor,
         patience=params.p_lr_patience)
     self.entropy_loss = EntropyLoss()
     self.init_target = None
     self.init_loss_fn = nn.CrossEntropyLoss(reduction="elementwise_mean")
     self.i_sampler = [
         WordSampler(corpus_data_0.dic,
                     n_urns=n_samples,
                     alpha=params.p_sample_factor,
                     top=params.i_n_init),
         WordSampler(corpus_data_1.dic,
                     n_urns=n_samples,
                     alpha=params.p_sample_factor,
                     top=params.i_n_init)
     ]
예제 #4
0
 def skip_gram_step(self):
     losses = []
     for id in [0, 1]:
         self.sg_optimizer[id].zero_grad()
         pos_u_b, pos_v_b, neg_v_b = self.corpus_data_queue[id].__next__()
         pos_s, neg_s = self.skip_gram[id](pos_u_b, pos_v_b, neg_v_b)
         loss = SkipGram.loss_fn(pos_s, neg_s)
         loss.backward()
         self.sg_optimizer[id].step()
         losses.append(loss.item())
     return losses[0], losses[1]
예제 #5
0
 def build_model(self):
     if not self.data_processor.vocab:
         self.data_processor.get_vocab()
     if self.use_skip_gram:
         self.model = SkipGram(self.embedding_dim,
                               len(self.data_processor.vocab),
                               self.neg_model)
     else:
         self.model = COBW(self.embedding_dim,
                           len(self.data_processor.vocab), self.neg_model)
     if self.use_cuda:
         self.model.cuda()
예제 #6
0
import pickle

if __name__ == '__main__':

    window_size = 5
    hidden_size = 100
    batch_size = 100
    max_epoch = 10

    corpus, word_to_id, id_to_word = ptb.load_data('train')
    vocab_size = len(word_to_id)
    contexts, target = create_contexts_target(corpus, window_size=window_size)

    # モデル
    #model = CBOW(vocab_size, hidden_size, window_size, corpus)
    model = SkipGram(vocab_size, hidden_size, window_size, corpus)
    optimizer = Adam()

    # 学習
    trainer = Trainer(model, optimizer)
    trainer.fit(contexts, target, max_epoch=max_epoch, batch_size=batch_size)

    # plot
    trainer.plot('chap4_ptb.png')

    # 単語の分散表現保存
    params = {}
    params['word_vecs'] = model.word_vecs.astype(np.float16)
    params['word_to_id'] = word_to_id
    params['id_to_word'] = id_to_word
    #fname = 'cbow_params.pkl'
예제 #7
0
# Training 
########################

if False:
    print('\n-\tPre-training the embedding layer\n')
    
    print(type(train_x))
    print(type(train_x[0]))
    raise TypeError('Billy not bob')

    # Save train_y
    print(vocab_size)
    np.save('/home/carter/src/TDS-LSTM-Tutorial/train_x.npy', train_x)
    
    from skip_gram import SkipGram
    e = SkipGram(vocab_size)
    e.train(train_x, verbose=True)

print('\n-\tTraining the model\n')
# Loss and optimization functions
lr = 0.001 # Learning rate
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(net.parameters(), lr=lr)

# Training params
epochs = 4 # TODO: Play with this and look validation loss
counter = 0
print_every = 100
clip = 5 # gradient clipping TODO:What?

if True:
import numpy as np

import torch

import os, sys
currentdir = os.path.dirname(os.path.realpath(__file__))
parentdir = os.path.dirname(currentdir)
sys.path.append(parentdir)
from skip_gram import SkipGram, train_skip_gram

print('\n-\tPre-training the embedding layer\n')

# Load train_y
train_x = np.load('/home/carter/src/TDS-LSTM-Tutorial/train_x.npy')

e = SkipGram(181686)
print(e)
'''
from estimator import SizeEstimator

se = SizeEstimator(e, input_size=(181686,))
print(se.estimate_size())

# Returns
# (size in megabytes, size in bits)
# (408.2833251953125, 3424928768)

print(se.param_bits) # bits taken up by parameters
print(se.forward_backward_bits) # bits stored for forward and backward
print(se.input_bits) # bits for input
'''
예제 #9
0
    out_path = os.path.join(params.modelDir, params.out_path)
    if not os.path.exists(out_path):
        os.mkdir(out_path)

    corpus_data = CorpusData(os.path.join(params.dataDir, params.corpus_path),
                             os.path.join(params.dataDir, params.dic_path),
                             max_ws=params.max_ws,
                             n_ns=params.n_ns,
                             threshold=params.threshold)
    data_loader = DataLoader(corpus_data,
                             collate_fn=concat_collate,
                             batch_size=params.n_sentences,
                             num_workers=params.n_threads,
                             pin_memory=True,
                             sampler=BlockRandomSampler(corpus_data))
    model = SkipGram(corpus_data.vocab_size + 1, params.emb_dim).to(GPU)
    optimizer, scheduler = optimizers.get_sgd_linear(model.parameters(),
                                                     params.n_epochs *
                                                     len(data_loader),
                                                     lr=params.lr)
    vis = visdom.Visdom(server=f'http://{params.vis_host}',
                        port=params.vis_port,
                        log_to_filename=os.path.join(out_path, "log.txt"))
    out_freq = (len(data_loader) + 99) // 100
    loss0, loss1, step, mini_step = 0, 0.0, 0, 0
    for epoch in trange(params.n_epochs, desc="epoch"):
        print(f"epoch {epoch} ; out_path = {out_path}")
        for pos_u, pos_v, neg_v in tqdm(data_loader, desc=f"epoch {epoch}"):
            scheduler.step()
            for i in range(pos_u.shape[0] // params.bs):
                optimizer.zero_grad()