예제 #1
0
    def initialize_learner(self):
        optimization_function = partial(optim.Adam, betas=(0.8, 0.99))

        self.learner = RNN_Learner(data=self.model_data, models=TextModel(to_gpu(self.model)),
                                   opt_fn=optimization_function)
        self.learner.reg_fn = partial(seq2seq_reg, alpha=2, beta=1)
        self.learner.clip = 25.
        self.learner.metrics = [accuracy]
예제 #2
0
파일: rnn.py 프로젝트: dotannn/nlp-final
    def _train_classifier(self,
                          train_ids,
                          train_labels,
                          batch_size=4,
                          val_ids=None,
                          val_labels=None):
        # change from multi-label to multi-class:

        def one_hot_idxs(idxs, n_classes):
            res = np.zeros(n_classes)
            res[idxs] = 1.
            return res

        onehot_train_labels = np.array(
            [one_hot_idxs(l, self._n_classes) for l in train_labels])
        onehot_val_labels = np.array(
            [one_hot_idxs(l, self._n_classes) for l in val_labels])

        train_ds = TextDataset(train_ids, onehot_train_labels)
        val_ds = TextDataset(val_ids, onehot_val_labels)

        train_sampler = SortishSampler(train_ids,
                                       key=lambda x: len(train_ids[x]),
                                       bs=batch_size)
        val_sampler = SortSampler(val_ids, key=lambda x: len(val_ids[x]))

        train_dl = DataLoader(train_ds,
                              batch_size,
                              num_workers=1,
                              transpose=True,
                              pad_idx=1,
                              sampler=train_sampler)
        val_dl = DataLoader(val_ds,
                            batch_size,
                            num_workers=1,
                            transpose=True,
                            pad_idx=1,
                            sampler=val_sampler)

        md = ModelData("tmp", train_dl, val_dl)

        m = get_rnn_classifier(
            self._bptt,
            20 * 70,
            self._n_classes,
            self._vocab.size,
            emb_sz=self._embedding_size,
            n_hid=self._n_hidden_activations,
            n_layers=self._n_layers,
            pad_token=1,
            layers=[self._embedding_size * 3, 128, self._n_classes],
            drops=[self._dropouts_classifier[4], 0.1],
            dropouti=self._dropouts_classifier[0],
            wdrop=self._dropouts_classifier[1],
            dropoute=self._dropouts_classifier[2],
            dropouth=self._dropouts_classifier[3])

        self._classifier_model = RNN_Learner(md,
                                             TextModel(to_gpu(m)),
                                             opt_fn=self.OPT_FN)
        self._classifier_model.reg_fn = partial(seq2seq_reg, alpha=2, beta=1)
        self._classifier_model.clip = 25.  # or 0.3 ?!

        def binary_ce_wrapper(predicted, gt):
            out = F.sigmoid(predicted)
            return binary_cross_entropy(out, gt)

        self._classifier_model.crit = binary_ce_wrapper
        jaccard_0_5 = partial(self.func_metric, func=jaccard_index)
        jaccard_0_5.__name__ = "jaccard_0_5"
        precision_0_5 = partial(self.func_metric, func=precision)
        precision_0_5.__name__ = "precision_0_5"
        recall_0_5 = partial(self.func_metric, func=recall)
        recall_0_5.__name__ = "recall_0_5"
        f1_0_5 = partial(self.func_metric, func=f1)
        f1_0_5.__name__ = "f1_0_5"

        self._classifier_model.metrics = [
            jaccard_0_5, precision_0_5, recall_0_5, f1_0_5
        ]

        lr = 3e-3
        lrm = 2.6
        lrs = np.array(
            [lr / (lrm**4), lr / (lrm**3), lr / (lrm**2), lr / lrm, lr])

        self._classifier_model.load_encoder('enc_weights')

        self._classifier_model.freeze_to(-1)
        self._classifier_model.fit(
            lrs,
            1,
            cycle_len=1,
            use_clr=(8, 3),
            callbacks=[LoggingCallback(save_path="./tmp/log")])
        self._classifier_model.freeze_to(-2)
        self._classifier_model.fit(
            lrs,
            1,
            cycle_len=1,
            use_clr=(8, 3),
            callbacks=[LoggingCallback(save_path="./tmp/log")])
        self._classifier_model.unfreeze()
        self._classifier_model.fit(
            lrs,
            1,
            cycle_len=24,
            use_clr=(32, 10),
            callbacks=[LoggingCallback(save_path="./tmp/log")])

        self._classifier_model.save('classifier_weights')
    clf_layers=[EMB_DIM, 50, 3],
    pad_token=2,
    embd_pdrop=0.1,
    attn_pdrop=0.1,
    resid_pdrop=0.1,
    clf_pdrop=[0.5, 0.1],
    afn="gelu"
)    


# In[21]:


learn = TransformerLearner(
    model_data, 
    TransformerTextModel(to_gpu(model)), 
    opt_fn=partial(torch.optim.Adam, betas=(0.9, 0.999)))
learn.clip=25
learn.metrics = [accuracy]
learn.load_encoder('lm1_enc')


# In[22]:


lrs = np.array([5e-5, 1e-4, 2e-4, 5e-4, 2e-3])
learn.freeze_to(-1)
learn.lr_find(lrs/1000)
learn.sched.plot()

                           3,
                           n_toks,
                           emb_sz=EMB_DIM,
                           n_hid=500,
                           n_layers=3,
                           pad_token=2,
                           layers=[EMB_DIM * 3, 50, 3],
                           drops=[dps[4], 0.1],
                           dropouti=dps[0],
                           wdrop=dps[1],
                           dropoute=dps[2],
                           dropouth=dps[3])

# In[42]:

learn = RNN_Learner(model_data, TextModel(to_gpu(model)), opt_fn=opt_fn)
learn.reg_fn = partial(seq2seq_reg, alpha=2, beta=1)
learn.clip = 25.
learn.metrics = [accuracy]
learn.load_encoder('lm1_enc')

# In[43]:

learn.freeze_to(-1)
learn.lr_find(lrs / 1000)
learn.sched.plot()

# In[44]:

lr = 2e-4
lrm = 2.6