def train(self, seed, super_category, sub_category, round_id, oversampling_ratio): # generate dataset manager dataset = DatasetManager(self.doc_data, super_category, sub_category, round_id, oversampling_ratio) current_save_dir = os.path.join( self.save_dir, 'SVM_BOW_Results', 'seed%d' % seed, sub_category, 'oversampling_ratio' + str(oversampling_ratio), 'round' + str(round_id)) if not os.path.exists(current_save_dir): os.makedirs(current_save_dir) file = open(os.path.join(current_save_dir, 'results.txt'), 'w') if oversampling_ratio == 0: # no oversampling kernel_svm = svm.SVC(kernel=self.kernel, gamma=self.gamma) else: # oversampling ratio 1:1 1:3 1:5 1:7 if dataset.ratio > oversampling_ratio: # print(dataset.ratio / oversampling_ratio) kernel_svm = svm.SVC(kernel=self.kernel, gamma=self.gamma, class_weight={ 0: dataset.ratio / oversampling_ratio, 1: 1 }) else: kernel_svm = svm.SVC(kernel=self.kernel, gamma=self.gamma) # using the off-the-shelf toolkit, no early stopping batch_input, batch_output = dataset.train_valid_set_input, dataset.train_valid_set_output train_inputs, train_outputs = self.input_normalize( batch_input, batch_output) print('SVM is training, it may take a few minitues ... ') kernel_svm.fit(train_inputs, train_outputs) test_inputs, test_outputs = self.input_normalize( dataset.testset_input, dataset.testset_output) kernel_svm_prediction = kernel_svm.predict(test_inputs) f1_score, test_metric = F1_score(kernel_svm_prediction, test_outputs) precision, recall, accu, TP, FP, TN, FN = test_metric print_result = '=== test F1 score: %0.4f=== \n' \ '=== other metrics: pre=%0.4f recall=%0.4f accu=%0.4f TP=%0.4f FP=%0.4f TN=%0.4f FN=%0.4f===' \ % (f1_score, precision, recall, accu, TP, FP, TN, FN) print(print_result) file.writelines(print_result + '\n') file.writelines('predictions (1 means positive, 0 means negative):' + '\n') # print prediction for each test data (1/10 of the whole labelled data) for i, ID in enumerate(dataset.testset_ids): print_result = '%s %d' % (ID, 1 - test_outputs[i]) file.writelines(print_result + '\n') file.close()
def test_each(config, models): dev_data = pickle.load( open(os.path.join(config.data_path, config.dev_name), "rb")) f1 = F1_score(pred='output', target='target') #for model_name, model in zip(config.ensemble_models[:-1], models[:-1]): # print(model_name) # tester = Tester(dev_data, model, metrics=f1, device=config.device, batch_size=config.batch_size) # tester.test() dev_data = pickle.load( open(os.path.join(config.bert_data_path, config.dev_name), "rb")) print(config.ensemble_models[-1]) tester = Tester(dev_data, models[-1], metrics=f1, device=config.device, batch_size=config.batch_size) tester.test()
def train(self, seed, super_category, sub_category, round_id, oversampling_ratio): # generate dataset manager dataset = DatasetManager(self.doc_data, super_category, sub_category, round_id, oversampling_ratio) current_save_dir = os.path.join( self.save_dir, 'FNN_PVDM_%dd_Results' % self.doc_emb_size, 'seed%d' % seed, sub_category, 'oversampling_ratio' + str(oversampling_ratio), 'round' + str(round_id)) if not os.path.exists(current_save_dir): os.makedirs(current_save_dir) best_valid_F1 = 0 tolerance_count = 0 average_loss = 0 lr = self.init_learning_rate model = FNN_DocVec( name="FNN_PVDM%dd_label_%s_round_%d_ratio_%d" % (self.doc_emb_size, sub_category, round_id, oversampling_ratio), doc_emb=self.doc_emb_size, max_grad_norm=self.max_grad_norm, FNN_hidden_size=self.fnn_hidden_size, learning_rate=lr, l2_normalisation=self.l2_normalisation) with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: sess.run(tf.group(tf.global_variables_initializer())) saver = tf.train.Saver() file = open(os.path.join(current_save_dir, 'results.txt'), 'w') best_test_F1 = 0 for step in range(1, self.total_steps + 1): batch_input_list, batch_output_list = dataset.next_batch() inputs, outputs = self.input_normalize(batch_input_list, batch_output_list) _, training_loss, train_prob = sess.run( [model.train_op, model.final_loss, model.prob], feed_dict={ model.input: inputs, model.output: outputs, model.keep_prob: 0.8, }) # print(train_prob) average_loss += training_loss / self.plot_every_steps if step % self.plot_every_steps == 0: lr = max(self.min_learning_rate, lr * self.decay_rate) sess.run(model.update_lr, feed_dict={model.new_lr: lr}) valid_input, valid_output \ = self.input_normalize(dataset.validset_input, dataset.validset_output) valid_prob = sess.run(model.prob, feed_dict={ model.input: valid_input, model.output: valid_output, model.keep_prob: 1.0, }) valid_F1, _ = F1_score(np.squeeze(valid_prob), valid_output) test_input, test_output = \ self.input_normalize(dataset.testset_input, dataset.testset_output) test_prob = sess.run(model.prob, feed_dict={ model.input: test_input, model.output: test_output, model.keep_prob: 1.0, }) test_F1, test_metrics = F1_score(np.squeeze(test_prob), test_output) precision, recall, accu, TP, FP, TN, FN = test_metrics print_result = "label %s round %2d step %5d, loss=%0.4f valid_F1=%0.4f test_F1=%0.4f\n" \ " other test_metrics: pre=%0.4f recall=%0.4f accu=%0.4f TP=%0.4f FP=%0.4f TN=%0.4f FN=%0.4f" % \ (sub_category, round_id, step, average_loss, valid_F1, test_F1, precision, recall, accu, TP, FP, TN, FN) print(print_result) print() file.writelines(print_result + '\n') average_loss = 0 if valid_F1 >= best_valid_F1: best_valid_F1 = valid_F1 best_test_F1 = test_F1 best_test_metric = test_metrics best_test_prob = test_prob tolerance_count = 0 if self.is_model_save == 'True': saver.save( sess, os.path.join( current_save_dir, os.path.join("model", 'model.ckpt'))) else: tolerance_count += 1 if tolerance_count > self.early_stopping_tolerance: break # stop trainig if too bad if best_valid_F1 == 0 and step > 1200: break precision, recall, accu, TP, FP, TN, FN = best_test_metric print_result = '=== best valid F1 score: %0.4f, test F1 score: %0.4f=== \n' \ '=== other test_metrics: pre=%0.4f recall=%0.4f accu=%0.4f TP=%0.4f FP=%0.4f TN=%0.4f FN=%0.4f===' \ % (best_valid_F1, best_test_F1, precision, recall, accu, TP, FP, TN, FN) print(print_result) file.writelines(print_result + '\n') file.writelines( 'predictions (1 means positive, 0 means negative):' + '\n') # print prediction for each test data (1/10 of the whole labelled data) for i, ID in enumerate(dataset.testset_ids): print_result = '%s %d' % (ID, best_test_prob[i] < 0.5) file.writelines(print_result + '\n') file.close()
new_rnn = reduced_rnn_net(old_rnn, int(row['row']), int(row['col']), hidden_num) print( "\n======= RNN hidden size: {}==========\n".format(hidden_num)) start_time = time.time() # Unsqueeze from 2-dimension to 3-dimension to match the rnn model. acc, pred = test_model(new_rnn, flat_input_test, y_test, test_seq_lens) stop_time = time.time() print("Execution time: %s ms" % ((stop_time - start_time) * 1000)) times.append((stop_time - start_time) * 1000) mat = confusion(x_test.size(0), 3, pred, y_test) F1_score(mat) # Save the new network and evaluate its vector angle. rnns.append(new_rnn) old_rnn = new_rnn saveNNParas(new_rnn, x_test, hidden_num) vectors = pd.read_excel('vector_angle.xls', header=None) if (vectors.empty): cnt = 10 print("\n Finished: Vectors are empty! \n") break df = pd.DataFrame({ 'row': vectors.iloc[:, 0], 'col': vectors.iloc[:, 1],
def train(config, task_name): train_data = pickle.load( open(os.path.join(config.bert_data_path, config.train_name), "rb")) print(train_data[0]) # debug if config.debug: train_data = train_data[0:30] dev_data = pickle.load( open(os.path.join(config.bert_data_path, config.dev_name), "rb")) print(dev_data[0]) # test_data = pickle.load(open(os.path.join(config.bert_data_path, config.test_name), "rb")) schemas = get_schemas(config.source_path) state_dict = torch.load(config.bert_path) # print(state_dict) text_model = BertForMultiLabelSequenceClassification.from_pretrained( config.bert_folder, state_dict=state_dict, num_labels=len(schemas)) # optimizer param_optimizer = list(text_model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] num_train_optimization_steps = int( len(train_data) / config.batch_size / config.update_every) * config.epoch if config.local_rank != -1: num_train_optimization_steps = num_train_optimization_steps // torch.distributed.get_world_size( ) optimizer = BertAdam( lr=config.lr, warmup=config.warmup_proportion, t_total=num_train_optimization_steps).construct_from_pytorch( optimizer_grouped_parameters) timing = TimingCallback() early_stop = EarlyStopCallback(config.patience) logs = FitlogCallback(dev_data) f1 = F1_score(pred='output', target='label_id') trainer = Trainer(train_data=train_data, model=text_model, loss=BCEWithLogitsLoss(), batch_size=config.batch_size, check_code_level=-1, metrics=f1, metric_key='f1', n_epochs=int(config.epoch), dev_data=dev_data, save_path=config.save_path, print_every=config.print_every, validate_every=config.validate_every, update_every=config.update_every, optimizer=optimizer, use_tqdm=False, device=config.device, callbacks=[timing, early_stop, logs]) trainer.train() # test result tester = Tester( dev_data, text_model, metrics=f1, device=config.device, batch_size=config.batch_size, ) tester.test()
def train(config, task_name): train_data = pickle.load( open(os.path.join(config.data_path, config.train_name), "rb")) # debug if config.debug: train_data = train_data[0:30] dev_data = pickle.load( open(os.path.join(config.data_path, config.dev_name), "rb")) # test_data = pickle.load(open(os.path.join(config.data_path, config.test_name), "rb")) vocabulary = pickle.load( open(os.path.join(config.data_path, config.vocabulary_name), "rb")) # load w2v data # weight = pickle.load(open(os.path.join(config.data_path, config.weight_name), "rb")) if task_name == "lstm": text_model = LSTM(vocab_size=len(vocabulary), embed_dim=config.embed_dim, output_dim=config.class_num, hidden_dim=config.hidden_dim, num_layers=config.num_layers, dropout=config.dropout) elif task_name == "lstm_maxpool": text_model = LSTM_maxpool(vocab_size=len(vocabulary), embed_dim=config.embed_dim, output_dim=config.class_num, hidden_dim=config.hidden_dim, num_layers=config.num_layers, dropout=config.dropout) elif task_name == "cnn": text_model = CNN(vocab_size=len(vocabulary), embed_dim=config.embed_dim, class_num=config.class_num, kernel_num=config.kernel_num, kernel_sizes=config.kernel_sizes, dropout=config.dropout, static=config.static, in_channels=config.in_channels) elif task_name == "rnn": text_model = RNN(vocab_size=len(vocabulary), embed_dim=config.embed_dim, output_dim=config.class_num, hidden_dim=config.hidden_dim, num_layers=config.num_layers, dropout=config.dropout) # elif task_name == "cnn_w2v": # text_model = CNN_w2v(vocab_size=len(vocabulary), embed_dim=config.embed_dim, # class_num=config.class_num, kernel_num=config.kernel_num, # kernel_sizes=config.kernel_sizes, dropout=config.dropout, # static=config.static, in_channels=config.in_channels, # weight=weight) elif task_name == "rcnn": text_model = RCNN(vocab_size=len(vocabulary), embed_dim=config.embed_dim, output_dim=config.class_num, hidden_dim=config.hidden_dim, num_layers=config.num_layers, dropout=config.dropout) #elif task_name == "bert": # text_model = BertModel.from_pretrained(config.bert_path) optimizer = Adam(lr=config.lr, weight_decay=config.weight_decay) timing = TimingCallback() early_stop = EarlyStopCallback(config.patience) logs = FitlogCallback(dev_data) f1 = F1_score(pred='output', target='target') trainer = Trainer(train_data=train_data, model=text_model, loss=BCEWithLogitsLoss(), batch_size=config.batch_size, check_code_level=-1, metrics=f1, metric_key='f1', n_epochs=config.epoch, dev_data=dev_data, save_path=config.save_path, print_every=config.print_every, validate_every=config.validate_every, optimizer=optimizer, use_tqdm=False, device=config.device, callbacks=[timing, early_stop, logs]) trainer.train() # test result tester = Tester( dev_data, text_model, metrics=f1, device=config.device, batch_size=config.batch_size, ) tester.test()
def ensemble(config, models, sum_prob=False, weight=[1, 1, 1, 1, 1]): f1 = F1_score(pred='output', target='target') f1.tp.cuda() f1.fp.cuda() f1.fn.cuda() dev_data = pickle.load( open(os.path.join(config.data_path, config.dev_name), "rb")) bert_dev_data = pickle.load( open(os.path.join(config.bert_data_path, config.dev_name), "rb")) data_iterator = Batch(dev_data, config.ensemble_batch, sampler=SequentialSampler(), as_numpy=False) bert_data_iterator = Batch(bert_dev_data, config.ensemble_batch, sampler=SequentialSampler(), as_numpy=False) for model in models: model.cuda() eval_results = {} weight = torch.tensor(weight) weight.cuda() weight_sum = torch.sum(weight).float() with torch.no_grad(): for i, ((batch_x, batch_y), (bert_batch_x, bert_batch_y)) in enumerate( zip(data_iterator, bert_data_iterator)): print('batch', i) #if i > 10: # break # batch text = batch_x['text'].cuda() target = batch_y['target'].cuda() # bert batch input_ids = bert_batch_x['input_ids'].cuda() token_type_ids = bert_batch_x['token_type_ids'].cuda() attention_mask = bert_batch_x['attention_mask'].cuda() label_id = bert_batch_y['label_id'].cuda() #assert torch.equal(target, label_id) pred = models[-1](input_ids, token_type_ids, attention_mask) pred['output'] *= weight[-1] #if not sum_prob: # pred['output'][pred['output'] >= 0.5] = 1.0 * weight[-1] # pred['output'][pred['output'] < 0.5] = 0.0 # for i, model in enumerate(models[:-1]): # temp = model(text)['output'] # temp[temp >= 0.5] = 1.0 * weight[i] # temp[temp < 0.5] = 0.0 # pred['output'] += temp #else: for i, model in enumerate(models[:-1]): pred['output'] += model(text)['output'] * weight[i] pred['output'] /= weight_sum #bert_batch_y['label_id'].cuda() f1({'output': pred['output'].cuda()}, {'label_id': bert_batch_y['label_id'].cuda()}) eval_result = f1.get_metric() metric_name = f1.__class__.__name__ eval_results[metric_name] = eval_result print("[ensemble] \n{}".format(_format_eval_results(eval_results)))