def load_training_and_validation_data(self, training_data_ids_path, training_data_labels_path, validation_data_ids_path, validation_data_labels_path, classifier_data_dir, batch_size=10): training_token_ids = np.load(training_data_ids_path) validation_token_ids = np.load(validation_data_ids_path) training_labels = np.load(training_data_labels_path) validation_labels = np.load(validation_data_labels_path) training_labels = training_labels.flatten() validation_labels = validation_labels.flatten() training_labels -= training_labels.min() validation_labels -= validation_labels.min() training_dataset = TextDataset(training_token_ids, training_labels) validation_dataset = TextDataset(validation_token_ids, validation_labels) training_data_sampler = SortishSampler( data_source=training_token_ids, key=lambda x: len(training_token_ids[x]), bs=batch_size // 2) validation_data_sampler = SortSampler( data_source=validation_token_ids, key=lambda x: len(validation_token_ids[x])) training_dataloader = DataLoader(dataset=training_dataset, batch_size=batch_size // 2, transpose=True, num_workers=1, pad_idx=1, sampler=training_data_sampler) validation_dataloader = DataLoader(dataset=validation_dataset, batch_size=batch_size, transpose=True, num_workers=1, pad_idx=1, sampler=validation_data_sampler) self.model_data = ModelData(path=classifier_data_dir, trn_dl=training_dataloader, val_dl=validation_dataloader)
def _train_classifier(self, train_ids, train_labels, batch_size=4, val_ids=None, val_labels=None): # change from multi-label to multi-class: def one_hot_idxs(idxs, n_classes): res = np.zeros(n_classes) res[idxs] = 1. return res onehot_train_labels = np.array( [one_hot_idxs(l, self._n_classes) for l in train_labels]) onehot_val_labels = np.array( [one_hot_idxs(l, self._n_classes) for l in val_labels]) train_ds = TextDataset(train_ids, onehot_train_labels) val_ds = TextDataset(val_ids, onehot_val_labels) train_sampler = SortishSampler(train_ids, key=lambda x: len(train_ids[x]), bs=batch_size) val_sampler = SortSampler(val_ids, key=lambda x: len(val_ids[x])) train_dl = DataLoader(train_ds, batch_size, num_workers=1, transpose=True, pad_idx=1, sampler=train_sampler) val_dl = DataLoader(val_ds, batch_size, num_workers=1, transpose=True, pad_idx=1, sampler=val_sampler) md = ModelData("tmp", train_dl, val_dl) m = get_rnn_classifier( self._bptt, 20 * 70, self._n_classes, self._vocab.size, emb_sz=self._embedding_size, n_hid=self._n_hidden_activations, n_layers=self._n_layers, pad_token=1, layers=[self._embedding_size * 3, 128, self._n_classes], drops=[self._dropouts_classifier[4], 0.1], dropouti=self._dropouts_classifier[0], wdrop=self._dropouts_classifier[1], dropoute=self._dropouts_classifier[2], dropouth=self._dropouts_classifier[3]) self._classifier_model = RNN_Learner(md, TextModel(to_gpu(m)), opt_fn=self.OPT_FN) self._classifier_model.reg_fn = partial(seq2seq_reg, alpha=2, beta=1) self._classifier_model.clip = 25. # or 0.3 ?! def binary_ce_wrapper(predicted, gt): out = F.sigmoid(predicted) return binary_cross_entropy(out, gt) self._classifier_model.crit = binary_ce_wrapper jaccard_0_5 = partial(self.func_metric, func=jaccard_index) jaccard_0_5.__name__ = "jaccard_0_5" precision_0_5 = partial(self.func_metric, func=precision) precision_0_5.__name__ = "precision_0_5" recall_0_5 = partial(self.func_metric, func=recall) recall_0_5.__name__ = "recall_0_5" f1_0_5 = partial(self.func_metric, func=f1) f1_0_5.__name__ = "f1_0_5" self._classifier_model.metrics = [ jaccard_0_5, precision_0_5, recall_0_5, f1_0_5 ] lr = 3e-3 lrm = 2.6 lrs = np.array( [lr / (lrm**4), lr / (lrm**3), lr / (lrm**2), lr / lrm, lr]) self._classifier_model.load_encoder('enc_weights') self._classifier_model.freeze_to(-1) self._classifier_model.fit( lrs, 1, cycle_len=1, use_clr=(8, 3), callbacks=[LoggingCallback(save_path="./tmp/log")]) self._classifier_model.freeze_to(-2) self._classifier_model.fit( lrs, 1, cycle_len=1, use_clr=(8, 3), callbacks=[LoggingCallback(save_path="./tmp/log")]) self._classifier_model.unfreeze() self._classifier_model.fit( lrs, 1, cycle_len=24, use_clr=(32, 10), callbacks=[LoggingCallback(save_path="./tmp/log")]) self._classifier_model.save('classifier_weights')
df_train.label.value_counts() # In[19]: bs = 64 trn_ds = TextDataset(tokens_train, df_train.label.values) val_ds = TextDataset(tokens_val, df_val.label.values) trn_samp = SortishSampler(tokens_train, key=lambda x: len(tokens_train[x]), bs=bs//2) val_samp = SortSampler(tokens_val, key=lambda x: len(tokens_val[x])) trn_dl = DataLoader(trn_ds, bs//2, transpose=False, num_workers=1, pad_idx=2, sampler=trn_samp) val_dl = DataLoader(val_ds, bs, transpose=False, num_workers=1, pad_idx=2, sampler=val_samp) model_data = ModelData(path, trn_dl, val_dl) # In[20]: model= get_transformer_classifier( n_tok=n_toks, emb_sz=EMB_DIM, n_head=12, n_layer=3, n_ctx=200, max_seq_len=100, clf_layers=[EMB_DIM, 50, 3], pad_token=2, embd_pdrop=0.1,