Ejemplo n.º 1
0
    def __init__(self,
                 training_data_ids_path,
                 validation_data_ids_path,
                 language_model_model_dir,
                 data_itos_path,
                 cuda_device_id=0,
                 batch_size=32,
                 dropout_multiplier=0.7):
        torch.cuda.set_device(cuda_device_id)
        self.use_cuda = True if cuda_device_id >= 0 else False

        self.inspire_data_itos = pickle.load(open(data_itos_path, 'rb'))
        self.vocabulary_size = len(self.inspire_data_itos)

        number_of_backpropagation_through_time_steps = 70
        number_of_hidden_units = 1150
        number_of_layers = 3
        self.embedding_size = 400
        optimization_function = partial(optim.Adam, betas=(0.8, 0.99))

        training_token_ids = np.load(training_data_ids_path)
        training_token_ids = np.concatenate(training_token_ids)
        validation_token_ids = np.load(validation_data_ids_path)
        validation_token_ids = np.concatenate(validation_token_ids)

        training_dataloader = LanguageModelLoader(
            nums=training_token_ids,
            bs=batch_size,
            bptt=number_of_backpropagation_through_time_steps)
        validation_dataloader = LanguageModelLoader(
            nums=validation_token_ids,
            bs=batch_size,
            bptt=number_of_backpropagation_through_time_steps)
        model = LanguageModelData(
            path=language_model_model_dir,
            pad_idx=1,
            n_tok=self.vocabulary_size,
            trn_dl=training_dataloader,
            val_dl=validation_dataloader,
            bs=batch_size,
            bptt=number_of_backpropagation_through_time_steps)

        dropouts = np.array([0.25, 0.1, 0.2, 0.02, 0.15]) * dropout_multiplier

        self.learner = model.get_model(opt_fn=optimization_function,
                                       emb_sz=self.embedding_size,
                                       n_hid=number_of_hidden_units,
                                       n_layers=number_of_layers,
                                       dropouti=dropouts[0],
                                       dropout=dropouts[1],
                                       wdrop=dropouts[2],
                                       dropoute=dropouts[3],
                                       dropouth=dropouts[4])
        self.learner.reg_fn = partial(seq2seq_reg, alpha=2, beta=1)
        self.learner.clip = 0.3
        self.learner.metrics = [accuracy]
Ejemplo n.º 2
0
    def _train_lm(self, train_ids, batch_size=4, val_ids=None):
        train_dataloader = LanguageModelLoader(np.concatenate(train_ids),
                                               batch_size, self._bptt)
        val_dataloader = LanguageModelLoader(np.concatenate(val_ids),
                                             batch_size, self._bptt)

        md = LanguageModelData("tmp",
                               1,
                               self._vocab.size,
                               train_dataloader,
                               val_dataloader,
                               bs=batch_size,
                               bptt=self._bptt)

        self._language_model = md.get_model(self.OPT_FN,
                                            self._embedding_size,
                                            self._n_hidden_activations,
                                            self._n_layers,
                                            dropouti=self._dropouts_lm[0],
                                            dropout=self._dropouts_lm[1],
                                            wdrop=self._dropouts_lm[2],
                                            dropoute=self._dropouts_lm[3],
                                            dropouth=self._dropouts_lm[4])

        self._language_model.metrics = [accuracy]
        self._language_model.unfreeze()

        lr = 1e-3
        self._language_model.lr_find(start_lr=lr / 10,
                                     end_lr=lr * 50,
                                     linear=True)
        self._language_model.fit(
            lr / 2,
            1,
            wds=self._wd,
            use_clr=(32, 2),
            cycle_len=1,
            callbacks=[LoggingCallback(save_path="./tmp/log")])

        self._language_model.lr_find(start_lr=lr / 10,
                                     end_lr=lr * 10,
                                     linear=True)

        self._language_model.fit(
            lr,
            1,
            wds=self._wd,
            use_clr=(32, 2),
            cycle_len=20,
            callbacks=[LoggingCallback(save_path="./tmp/log")])

        self._language_model.save_encoder("enc_weights")
Ejemplo n.º 3
0
    itos[v] = k


# In[12]:


itos[4]


# In[13]:


path = Path("../data/cache/lm_word/")
path.mkdir(parents=True, exist_ok=True)
model_data = LanguageModelData(
    path, pad_idx=0, n_tok=n_tok, trn_dl=trn_loader, val_dl=val_loader, test_dl=tst_loader
)


# ### QRNN Model

# In[ ]:


drops = np.array([0.05, 0.1, 0.05, 0, 0.1])
learner = model_data.get_model(
    partial(Adam, betas=(0.8, 0.999)),
    emb_sz=300, n_hid=500, n_layers=4,
    dropouti=drops[0], dropout=drops[1], wdrop=drops[2],
    dropoute=drops[3], dropouth=drops[4], qrnn=True
)
bs = 64
bptt = 50
trn_dl = LanguageModelLoader(np.concatenate(tokens_train), bs, bptt)
val_dl = LanguageModelLoader(np.concatenate(tokens_val), bs, bptt)

# In[21]:

np.max(np.array(list(itertools.chain.from_iterable(tokens_train))))

# In[23]:

model_data = LanguageModelData(path,
                               2,
                               n_toks,
                               trn_dl,
                               val_dl,
                               bs=bs,
                               bptt=bptt)

# In[24]:

drops = np.array([0.25, 0.1, 0.2, 0.02, 0.15]) * 0.7
opt_fn = partial(torch.optim.Adam, betas=(0.8, 0.99))

# In[25]:

learner = model_data.get_model(opt_fn,
                               EMB_DIM,
                               500,
                               3,