def initialize_learner(self): optimization_function = partial(optim.Adam, betas=(0.8, 0.99)) self.learner = RNN_Learner(data=self.model_data, models=TextModel(to_gpu(self.model)), opt_fn=optimization_function) self.learner.reg_fn = partial(seq2seq_reg, alpha=2, beta=1) self.learner.clip = 25. self.learner.metrics = [accuracy]
def _train_classifier(self, train_ids, train_labels, batch_size=4, val_ids=None, val_labels=None): # change from multi-label to multi-class: def one_hot_idxs(idxs, n_classes): res = np.zeros(n_classes) res[idxs] = 1. return res onehot_train_labels = np.array( [one_hot_idxs(l, self._n_classes) for l in train_labels]) onehot_val_labels = np.array( [one_hot_idxs(l, self._n_classes) for l in val_labels]) train_ds = TextDataset(train_ids, onehot_train_labels) val_ds = TextDataset(val_ids, onehot_val_labels) train_sampler = SortishSampler(train_ids, key=lambda x: len(train_ids[x]), bs=batch_size) val_sampler = SortSampler(val_ids, key=lambda x: len(val_ids[x])) train_dl = DataLoader(train_ds, batch_size, num_workers=1, transpose=True, pad_idx=1, sampler=train_sampler) val_dl = DataLoader(val_ds, batch_size, num_workers=1, transpose=True, pad_idx=1, sampler=val_sampler) md = ModelData("tmp", train_dl, val_dl) m = get_rnn_classifier( self._bptt, 20 * 70, self._n_classes, self._vocab.size, emb_sz=self._embedding_size, n_hid=self._n_hidden_activations, n_layers=self._n_layers, pad_token=1, layers=[self._embedding_size * 3, 128, self._n_classes], drops=[self._dropouts_classifier[4], 0.1], dropouti=self._dropouts_classifier[0], wdrop=self._dropouts_classifier[1], dropoute=self._dropouts_classifier[2], dropouth=self._dropouts_classifier[3]) self._classifier_model = RNN_Learner(md, TextModel(to_gpu(m)), opt_fn=self.OPT_FN) self._classifier_model.reg_fn = partial(seq2seq_reg, alpha=2, beta=1) self._classifier_model.clip = 25. # or 0.3 ?! def binary_ce_wrapper(predicted, gt): out = F.sigmoid(predicted) return binary_cross_entropy(out, gt) self._classifier_model.crit = binary_ce_wrapper jaccard_0_5 = partial(self.func_metric, func=jaccard_index) jaccard_0_5.__name__ = "jaccard_0_5" precision_0_5 = partial(self.func_metric, func=precision) precision_0_5.__name__ = "precision_0_5" recall_0_5 = partial(self.func_metric, func=recall) recall_0_5.__name__ = "recall_0_5" f1_0_5 = partial(self.func_metric, func=f1) f1_0_5.__name__ = "f1_0_5" self._classifier_model.metrics = [ jaccard_0_5, precision_0_5, recall_0_5, f1_0_5 ] lr = 3e-3 lrm = 2.6 lrs = np.array( [lr / (lrm**4), lr / (lrm**3), lr / (lrm**2), lr / lrm, lr]) self._classifier_model.load_encoder('enc_weights') self._classifier_model.freeze_to(-1) self._classifier_model.fit( lrs, 1, cycle_len=1, use_clr=(8, 3), callbacks=[LoggingCallback(save_path="./tmp/log")]) self._classifier_model.freeze_to(-2) self._classifier_model.fit( lrs, 1, cycle_len=1, use_clr=(8, 3), callbacks=[LoggingCallback(save_path="./tmp/log")]) self._classifier_model.unfreeze() self._classifier_model.fit( lrs, 1, cycle_len=24, use_clr=(32, 10), callbacks=[LoggingCallback(save_path="./tmp/log")]) self._classifier_model.save('classifier_weights')
clf_layers=[EMB_DIM, 50, 3], pad_token=2, embd_pdrop=0.1, attn_pdrop=0.1, resid_pdrop=0.1, clf_pdrop=[0.5, 0.1], afn="gelu" ) # In[21]: learn = TransformerLearner( model_data, TransformerTextModel(to_gpu(model)), opt_fn=partial(torch.optim.Adam, betas=(0.9, 0.999))) learn.clip=25 learn.metrics = [accuracy] learn.load_encoder('lm1_enc') # In[22]: lrs = np.array([5e-5, 1e-4, 2e-4, 5e-4, 2e-3]) learn.freeze_to(-1) learn.lr_find(lrs/1000) learn.sched.plot()
3, n_toks, emb_sz=EMB_DIM, n_hid=500, n_layers=3, pad_token=2, layers=[EMB_DIM * 3, 50, 3], drops=[dps[4], 0.1], dropouti=dps[0], wdrop=dps[1], dropoute=dps[2], dropouth=dps[3]) # In[42]: learn = RNN_Learner(model_data, TextModel(to_gpu(model)), opt_fn=opt_fn) learn.reg_fn = partial(seq2seq_reg, alpha=2, beta=1) learn.clip = 25. learn.metrics = [accuracy] learn.load_encoder('lm1_enc') # In[43]: learn.freeze_to(-1) learn.lr_find(lrs / 1000) learn.sched.plot() # In[44]: lr = 2e-4 lrm = 2.6