def get_oof(clfs, raw_texts, raw_labels, test_data, word2index, attr_dict): NFOLDS = len(clfs) n_train = len(raw_texts) n_test = len(test_data.sentences) class_num = 10 oof_train = np.zeros((n_train, class_num)) oof_train_y = np.zeros((n_train, class_num)) oof_test = np.zeros((n_test, class_num)) oof_test_skf = np.zeros((NFOLDS, n_test, class_num)) kf = 0 for (train_index, test_index), checkpoint in zip(kfold_split(n_train, NFOLDS), clfs): print(checkpoint) clf = torch.load(checkpoint) kf += 1 print("FOLD:", kf) print("TRAIN:", str(len(train_index)), "TEST:", str(len(test_index))) # train_index, test_index = train_index.tolist(), test_index.tolist() dev_texts, dev_labels = [raw_texts[i] for i in test_index ], [raw_labels[i] for i in test_index] dev_data = Data((dev_texts, dev_labels), word2index, attr_dict, args) if args.use_elmo != 0: dev_elmo = load_elmo(dev_texts) dev_data.add_feature(dev_elmo) with torch.no_grad(): dev_predict, oof_dev = train.predict_with_logit( clf, dev_data, args) pred_acc_p = score(dev_predict, dev_data.labels) print("[p:%.4f, r:%.4f, f:%.4f] acc:%.4f" % (pred_acc_p[0], pred_acc_p[1], pred_acc_p[2], pred_acc_p[3])) # label_prf = label_analysis(dev_predict, dev_data.labels) # for i in range(len(label_prf)): # print("%s : [%.4f, %.4f, %.4f] %.4f" % # (list(attr_dict.keys())[i], label_prf[i][0], label_prf[i][1], label_prf[i][2], label_prf[i][3])) oof_train[test_index] = oof_dev dev_y = [l[0].detach().numpy() for l in dev_data.labels] oof_train_y[test_index] = dev_y _, oof_test_skf[kf - 1, :, :] = train.predict_with_logit( clf, test_data, args) oof_test[:] = oof_test_skf.mean(axis=0) dir = os.path.dirname(clfs[0]) if not os.path.exists(os.path.join(dir, 'npy')): os.mkdir(os.path.join(dir, 'npy')) print(dir) np.save(os.path.join(dir, 'npy', "oof_train"), oof_train) np.save(os.path.join(dir, 'npy', "oof_train_y"), oof_train_y) np.save(os.path.join(dir, 'npy', "oof_test"), oof_test) return oof_train, oof_train_y, oof_test
def dev(): model = AttributeClassifier() check_point = "checkpoints5/checkpoint_AttA3_0.8666.pt" model.load_model(check_point) f_train = "data/attribute_data.txt" # f_test = "data/test_attr2.txt" f_w2v = "../embedding/embedding_all_merge_300.txt" f_dict = "../dataset/attribute.json" print(f_w2v) raw_texts, raw_labels = load_attr_data(filename=f_train) W, word2index = load_w2v(f_w2v) attr_list, attr_dict = parse_json(f_dict) kf = 0 _, test_index = kfold_split(len(raw_texts), args.folds)[2] test_texts, test_labels = [raw_texts[i] for i in test_index ], [raw_labels[i] for i in test_index] test_data = Data((test_texts, test_labels), word2index, attr_dict, args) test_predict = train.predict(model.classifier, test_data, args) pred_acc_t = score(test_predict, test_data.labels) print(pred_acc_t)
def train(rnn, train_data, dev_data, test_data, attr_dict, W, args): # Train: # HyperParameter n_epochs = args.EPOCHS learning_rate = args.lr # If you set this too high, it might explode. If too low, it might not learn if W is not None: W = torch.from_numpy(W) rnn.word_rep.word_embed.weight = nn.Parameter(W) # if W is not None: # rnn.word_rep.word_embed.weight = nn.Parameter(W) print("CUDA: " + str(cuda_flag)) if cuda_flag: rnn = rnn.cuda() # rnn = TextCNN(300, output_size, max_length) if args.optimizer == "SGD": optimizer = torch.optim.SGD(rnn.parameters(), lr=learning_rate) if args.optimizer == "Adam": optimizer = torch.optim.Adam(rnn.parameters()) if args.optimizer == "Adadelta": optimizer = torch.optim.Adadelta(rnn.parameters()) if args.freeze: for param in rnn.word_rep.word_embed.parameters(): param.requires_grad = False print_every = 100 plot_every = 30 # Keep track of losses for plotting current_loss = [] all_losses = [] test_acc = [] acc_index = 0 es_len = 0 start = time.time() max_acc = 0 # scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5) np.random.seed([3, 1415]) for epoch in range(1, n_epochs + 1): iterations = 0 loss_sum = 0 # scheduler.step() index_list = np.arange(len(train_data.sentences)) np.random.shuffle(index_list) # print(index_list) for index in index_list: iterations += 1 input_tensors, category_tensor = train_data.get(index, cuda_flag) loss = rnn.optimize_step(input_tensors, category_tensor, optimizer) # print(loss) current_loss.append(loss) loss_sum += loss # Add current loss avg to list of losses if (index + 1) % plot_every == 0: # print(batch_epoch) all_losses.append(sum(current_loss) / len(current_loss)) current_loss = [] if iterations % (len(index_list) // 1) == 0: with torch.no_grad(): dev_predict = predict(rnn, dev_data, args) pred_acc_p = score(dev_predict, dev_data.labels) print("Epoch:%d" % epoch) print("[p:%.4f, r:%.4f, f:%.4f] acc:%.4f" % (pred_acc_p[0], pred_acc_p[1], pred_acc_p[2], pred_acc_p[3])) # with torch.no_grad(): # test_predict = predict(rnn, test_data, args) # pred_acc_t = score(test_predict, test_data.labels) # print("[p:%.4f, r:%.4f, f:%.4f] acc:%.4f" % # (pred_acc_t[0], pred_acc_t[1], pred_acc_t[2], pred_acc_t[3])) if pred_acc_p[2] > max_acc: best_predict = dev_predict # label_prf = label_analysis(best_predict, test_data.labels) # for i in range(len(label_prf)): # print("%s : [%.4f, %.4f, %.4f] %.4f" % # (list(attr_dict.keys())[i], label_prf[i][0], label_prf[i][1], label_prf[i][2], label_prf[i][3])) max_acc = pred_acc_p[2] max_print = ("Epoch%d\n" % epoch + "[p:%.4f, r:%.4f, f:%.4f] acc:%.4f\n" % (pred_acc_p[0], pred_acc_p[1], pred_acc_p[2], pred_acc_p[3])) best_model = "%s/checkpoint_%s_%.6f.pt" % ( args.check_dir, args.model, max_acc) best_dict = copy.deepcopy(rnn) # test_acc.append(pred_acc) print("Epoch: %d, loss: %.4f" % (epoch, loss_sum)) print(max_acc) print(max_print) label_prf = label_analysis(best_predict, test_data.labels) for i in range(len(label_prf)): print("%s : [%.4f, %.4f, %.4f] %.4f" % (list(attr_dict.keys())[i], label_prf[i][0], label_prf[i][1], label_prf[i][2], label_prf[i][3])) # plt.figure() plt.plot(all_losses) time_stamp = time.asctime().replace(':', '_').split() print(time_stamp) plt.savefig("fig/foor_%s.png" % '_'.join(time_stamp)) # plt.show() return best_dict, max_acc