def main(): opt = Options() print('Use {}'.format(opt.pooling_type_str_dict[opt.pooling_type])) train_sents, train_labels = pickle.load(open(opt.train_path, 'rb')) valid_sents, valid_labels = pickle.load(open(opt.valid_path, 'rb')) test_sents, test_labels = pickle.load(open(opt.test_path, 'rb')) # np.set_printoptions(precision=3) np.set_printoptions(threshold=np.inf) # emb = Embedding(opt.vocab_size, 200, padding_idx=0, trainable=False) cnn = ML_CNN.CNN_Module(n_classes=opt.classifier_output_size) if opt.use_cuda: emb.cuda() cnn.cuda() param = [] param.extend(emb.parameters()) param.extend(cnn.parameters()) # optimizer = torch.optim.Adam(param, lr=opt.lr, weight_decay=0.01) # optimizer = torch.optim.Adam(param, lr=opt.lr, weight_decay=0.00001) optimizer = torch.optim.Adam(param, lr=opt.lr) criteron = torch.nn.CrossEntropyLoss() if opt.restore: if os.path.exists(opt.feature_net_path): print("Load pretrained embedding") emb.load_state_dict(torch.load(opt.feature_net_path)) else: print("No pretrained embedding") if os.path.exists(opt.classifier_net_path): print("Load pretrained cnn classifier") cnn.load_state_dict(torch.load(opt.classifier_net_path)) else: print("No pretrained cnn classifier") best_acc = -1 for epoch in range(opt.max_epochs): print("Starting epoch %d" % epoch) kf = get_minibatches_idx(len(train_sents), opt.batch_size, shuffle=True) epoch_losses = [] cnn.train() emb.train() for iteridx, train_index in kf: if len(train_index) <= 1: continue sents = [train_sents[t] for t in train_index] labels = [train_labels[t] for t in train_index] # X_batch, X_lengths, X_labels = prepare_data_for_rnn(sents, labels) X_batch, X_labels = prepare_data_for_cnn(sents, labels) X_batch = Variable(X_batch) X_labels = Variable(X_labels) if opt.use_cuda: X_batch = X_batch.cuda() X_labels = X_labels.cuda() optimizer.zero_grad() features = emb(X_batch) output = cnn(features) loss = criteron(output, X_labels) local_loss = loss.data[0] epoch_losses.append(local_loss) loss.backward() optimizer.step() if iteridx % opt.print_freq == 0: count = output.size(0) topK_correct = test_result(output.cpu().data, X_labels.cpu().data, topK=topK) topK_acc = [float(tmp) / count for tmp in topK_correct] topK_str = " , ".join(["acc@{}: {}".format(k, tmp_acc) for k, tmp_acc in zip(topK, topK_acc)]) print("Epoch {} Iteration {} loss: {} , {}".format(epoch + 1, iteridx + 1, local_loss, topK_str)) ave_loss = sum(epoch_losses) / len(epoch_losses) kf = get_minibatches_idx(len(valid_sents), opt.batch_size, shuffle=True) count = 0 all_topK_correct = np.zeros(len(topK), dtype=int) for _, valid_index in kf: emb.eval() cnn.eval() sents = [valid_sents[t] for t in valid_index] labels = [valid_labels[t] for t in valid_index] X_batch, X_labels = prepare_data_for_cnn(sents, labels) X_batch = Variable(X_batch) X_labels = Variable(X_labels) if opt.use_cuda: X_batch = X_batch.cuda() X_labels = X_labels.cuda() features = emb(X_batch) output = cnn(features) topK_correct = test_result(output.cpu().data, X_labels.cpu().data, topK=topK) topK_correct = np.array(topK_correct) all_topK_correct += topK_correct bsize = output.size(0) count += bsize all_topK_acc = all_topK_correct / float(count) all_topK_acc = all_topK_acc.tolist() all_topK_str = " , ".join(["val_acc@{}: {}".format(k, tmp_acc) for k, tmp_acc in zip(topK, all_topK_acc)]) print("Epoch {} Avg_loss: {}, {}".format(epoch+1, ave_loss, all_topK_str)) acc = all_topK_acc[important_K] if acc > best_acc: print('Dump current model due to current acc {} > past best acc {}'.format(acc, best_acc)) torch.save(cnn.state_dict(), opt.classifier_net_path) best_acc = acc fscore_records = [{k:FScore() for k in topK} for i in range(opt.classifier_output_size)] kf = get_minibatches_idx(len(test_sents), opt.batch_size, shuffle=True) emb.eval() cnn.eval() for _, test_index in kf: sents = [test_sents[t] for t in test_index] labels = [test_labels[t] for t in test_index] X_batch, X_labels = prepare_data_for_cnn(sents, labels) X_batch = Variable(X_batch) X_labels = Variable(X_labels) if opt.use_cuda: X_batch = X_batch.cuda() X_labels = X_labels.cuda() features = emb(X_batch) output = cnn(features) update_F1(output.cpu().data, X_labels.cpu().data, opt.classifier_output_size, topK, fscore_records) with open('F_score_dir/{}.pkl'.format(epoch+1),'w') as f: print('dumping fscore in epoch {}'.format(epoch+1)) pickle.dump(fscore_records, f) print('Loading best model') cnn.load_state_dict(torch.load(opt.classifier_net_path)) print('Testing Data') kf = get_minibatches_idx(len(test_sents), opt.batch_size, shuffle=True) count = 0 all_topK_correct = np.zeros(len(topK), dtype=int) fscore_records = [{k:FScore() for k in topK} for i in range(opt.classifier_output_size)] for _, test_index in kf: emb.eval() cnn.eval() sents = [test_sents[t] for t in test_index] labels = [test_labels[t] for t in test_index] X_batch, X_labels = prepare_data_for_cnn(sents, labels) X_batch = Variable(X_batch) X_labels = Variable(X_labels) if opt.use_cuda: X_batch = X_batch.cuda() X_labels = X_labels.cuda() features = emb(X_batch) output = cnn(features) update_F1(output.cpu().data, X_labels.cpu().data, opt.classifier_output_size, topK, fscore_records) topK_correct = test_result(output.cpu().data, X_labels.cpu().data, topK=topK) topK_correct = np.array(topK_correct) all_topK_correct += topK_correct bsize = output.size(0) count += bsize all_topK_acc = all_topK_correct / float(count) all_topK_acc = all_topK_acc.tolist() all_topK_str = " , ".join(["test_acc@{}: {}".format(k, tmp_acc) for k, tmp_acc in zip(topK, all_topK_acc)]) print("Training end {}".format(all_topK_str)) with open('F_score_dir/best.pkl','w') as f: print('dumping fscore in') pickle.dump(fscore_records, f)
# vocab.append_sents(valid_sents, fixed_vocab_set=fixed_vocab_set) vocab.append_sents(test_sents, fixed_vocab_set=fixed_vocab_set) # print('vocab size {} before shrink'.format(vocab.vocab_len)) vocab.shrink_vocab(2) print('vocab size {} after shrink'.format(vocab.vocab_len)) print('read vec') word_list = [vocab.idx2word[i] for i in range(len(vocab.idx2word))] vec = read_vec(pubmed_w2v_path, word_list) assert vec.shape[0] == vocab.vocab_len print('build emb layer') emb = Embedding(vocab.vocab_len, vec.shape[1], padding_idx=0, trainable=False) emb.initialize_embedding(vec) emb.cuda() torch.save(emb.state_dict(), emb_path) print('dump data') train_sents = convert_sents_to_idx(train_sents, vocab) test_sents = convert_sents_to_idx(test_sents, vocab) valid_sents = convert_sents_to_idx(valid_sents, vocab) dump_preprocessed_data(opt.train_path, train_sents, train_labels) dump_preprocessed_data(opt.test_path, test_sents, test_labels) dump_preprocessed_data(opt.valid_path, valid_sents, valid_labels) dump_vocab(opt.vocab_path, vocab)