def load(cls, config_path): with open(config_path, 'r') as fread: config_dict = json.load(fread) path_config = config_dict['Path'] model_config = config_dict['Model'] with open(os.path.join(path_config['vocab_dir'], 'category'), 'rb') as fread: category_vocab = pickle.load(fread) out_size = len(category_vocab) words_embed, words_vocab = load_full_embedding_with_vocab( model_config['embed_dir']) model = cls(words_embed=words_embed, out_channels=model_config['out_channels'], conv_widths=model_config['conv_widths'], hidden_size=model_config['hidden_size'], out_size=out_size, cuda_device=model_config['cuda_device']) model.load_state_dict( torch.load(os.path.join(path_config['model_dir'], 'net.pt'))) model.eval() return model
def load(cls, config_path): with open(config_path, 'r') as fread: config_dict = json.load(fread) path_config = config_dict['Path'] model_config = config_dict['Model'] words_embed, words_vocab = load_full_embedding_with_vocab(path_config['embed_dir']) model = cls(words_embed=words_embed, out_channels=model_config['out_channels'], hidden_size=model_config['hidden_size'], cuda_device=model_config['cuda_device'], dropout=model_config['dropout'], h=model_config['h'], num_mhas=model_config['num_mhas']) model.load_state_dict(torch.load(os.path.join(path_config['model_dir'], 'net.pt'))) model.eval() return model
def main(config_path): with open(config_path, 'r') as fread: config_dict = json.load(fread) # path path_config = config_dict['Path'] model_dir = path_config['model_dir'] train = path_config['train'] dev = path_config['dev'] dev_ref = path_config['dev_ref'] test = path_config['test'] test_ref = path_config['test_ref'] test_result = path_config['test_result'] print('Loading question analysis models...') category_model = BaselineCategoryClassifier.load( path_config['category_model_config']) focus_model = BaselineFocusClassifier.load( path_config['focus_model_config']) words_embed, words_vocab = load_full_embedding_with_vocab( path_config['embed_dir']) with open(path_config['category_vocab'], 'rb') as fread: category_vocab = pickle.load(fread) # dataset dataset_config = config_dict['Dataset'] pad_size = dataset_config['pad_size'] batch_size = dataset_config['batch_size'] print('Loading train data...') train_reader = WikiqaBaselineReader(train, category_model, focus_model, words_vocab.stoi, category_vocab.itos, PAD_TOKEN='<pad>', pad_size=pad_size) dev_reader = WikiqaBaselineReader(dev, category_model, focus_model, words_vocab.stoi, category_vocab.itos, PAD_TOKEN='<pad>', pad_size=pad_size) vocabs = {'q_words': words_vocab, 'a_words': words_vocab} train_reader.set_vocabs(vocabs) dev_reader.set_vocabs(vocabs) train_iterator = train_reader.get_dataset_iterator(batch_size, train=True) dev_iterator = dev_reader.get_dataset_iterator(batch_size, train=False, sort=False) # model model_config = config_dict['Model'] conv_width = model_config['conv_width'] out_channels = model_config['out_channels'] hidden_size = model_config['hidden_size'] cuda_device = model_config['cuda_device'] clf = BaselineAnswerSelectionClassifier(words_embed=words_embed, out_channels=out_channels, conv_width=conv_width, hidden_size=hidden_size, cuda_device=cuda_device) # train train_config = config_dict['Train'] num_epoch = train_config['epoch'] weight_decay = train_config['weight_decay'] lr = train_config['lr'] early_stopping = train_config['early_stopping'] input_names = [ 'q_words', 'a_words', 'q_word_over', 'a_word_over', 'q_sem_over', 'a_sem_over' ] optimizer = optim.Adam(clf.parameters(), lr=lr, weight_decay=weight_decay, eps=1e-5) if cuda_device is not None: clf.cuda(device=cuda_device) def callback(verbose=True): train_labels, train_scores = get_label_score(clf, train_iterator, cuda_device, 'label', input_names=input_names) train_predicts = train_scores.argmax(axis=-1) train_scores = train_scores[:, 1] if verbose: print('train_acc: %.2f' % sklearn.metrics.accuracy_score(train_labels, train_predicts)) print( 'train_precision: %.2f' % sklearn.metrics.precision_score(train_labels, train_predicts)) print('train_average_precision: %.2f' % sklearn.metrics.average_precision_score( train_labels, train_scores)) dev_labels, dev_scores = get_label_score(clf, dev_iterator, cuda_device, 'label', input_names=input_names) dev_predicts = dev_scores.argmax(axis=-1) dev_scores = dev_scores[:, 1] if verbose: print('dev_acc: %.2f' % sklearn.metrics.accuracy_score(dev_labels, dev_predicts)) print('dev_precision: %.2f' % sklearn.metrics.precision_score(dev_labels, dev_predicts)) print('dev_average_precision: %.2f' % sklearn.metrics.average_precision_score( dev_labels, dev_scores)) index = 0 aps = [] # for mean average precision score rrs = [] # for mean reciprocal rank score for query_labels in filtered_ref_generator(dev_ref): query_scores = dev_scores[index:index + len(query_labels)] index += len(query_labels) aps.append( sklearn.metrics.average_precision_score( query_labels, query_scores)) query_rel_best = np.argmin(-query_scores * query_labels) rrs.append( 1 / (np.argsort(np.argsort(-query_scores))[query_rel_best] + 1)) # if verbose: # print('DEBUGGING ap:', aps[-1]) # print('DEBUGGING rel_best:', query_rel_best) # print('DEBUGGING score:', query_scores) # print('DEBUGGING labels:', query_labels) # print('DEBUGGING RR:', rrs[-1]) # print() if verbose: print('dev_MAP: %.2f' % np.mean(aps)) print('dev_MRR: %.2f' % np.mean(rrs)) return np.mean(aps) print('Training...') best_state_dict = train_model(clf, optimizer, train_iterator, label_name='label', num_epoch=num_epoch, cuda_device=cuda_device, early_stopping=early_stopping, input_names=input_names, callback=callback) print() if best_state_dict is not None: clf.load_state_dict(best_state_dict) torch.save(clf.state_dict(), os.path.join(model_dir, './net.pt')) # test print('Loading test data...') test_reader = WikiqaBaselineReader(test, category_model, focus_model, words_vocab.stoi, category_vocab.itos, PAD_TOKEN='<pad>', pad_size=pad_size) test_reader.set_vocabs(vocabs) test_iterator = test_reader.get_dataset_iterator(batch_size, train=False, sort=False) print('Testing...') test_labels, test_scores = get_label_score(clf, test_iterator, cuda_device, 'label', input_names=input_names) test_predicts = test_scores.argmax(axis=-1) test_scores = test_scores[:, 1] print('test_acc: %.2f' % sklearn.metrics.accuracy_score(test_labels, test_predicts)) print('test_precision: %.2f' % sklearn.metrics.precision_score(test_labels, test_predicts)) print('test_average_precision: %.2f' % sklearn.metrics.average_precision_score(test_labels, test_scores)) index = 0 aps = [] # for mean average precision score rrs = [] # for mean reciprocal rank score for query_labels in filtered_ref_generator(test_ref): query_scores = test_scores[index:index + len(query_labels)] index += len(query_labels) aps.append( sklearn.metrics.average_precision_score(query_labels, query_scores)) query_rel_best = np.argmin(-query_scores * query_labels) rrs.append(1 / (np.argsort(np.argsort(-query_scores))[query_rel_best] + 1)) print('test_MAP: %.2f' % np.mean(aps)) print('test_MRR: %.2f' % np.mean(rrs))
def main(config_path): with open(config_path, 'r') as fread: config_dict = json.load(fread) # path path_config = config_dict['Path'] model_dir = path_config['model_dir'] train = path_config['train'] dev = path_config['dev'] dev_ref = path_config['dev_ref'] test = path_config['test'] test_ref = path_config['test_ref'] # dataset dataset_config = config_dict['Dataset'] batch_size = dataset_config['batch_size'] print('Loading train data...') train_reader = WikiqaReader(train, PAD_TOKEN='<pad>') dev_reader = WikiqaReader(dev, PAD_TOKEN='<pad>') words_embed, words_vocab = load_full_embedding_with_vocab( path_config['embed_dir']) vocabs = {'q_words': words_vocab, 'a_words': words_vocab} train_reader.set_vocabs(vocabs) dev_reader.set_vocabs(vocabs) train_iterator = train_reader.get_dataset_iterator(batch_size, train=True) dev_iterator = dev_reader.get_dataset_iterator(batch_size, train=False, sort=False) test_reader = WikiqaReader(test, PAD_TOKEN='<pad>') test_reader.set_vocabs(vocabs) test_iterator = test_reader.get_dataset_iterator(batch_size, train=False, sort=False) # model model_config = config_dict['Model'] conv_width = model_config['conv_width'] out_channels = model_config['out_channels'] hidden_size = model_config['hidden_size'] cuda_device = model_config['cuda_device'] dropout = model_config['dropout'] h = model_config['h'] clf = SelfAttentionCnnClassifier(words_embed=words_embed, out_channels=out_channels, conv_width=conv_width, hidden_size=hidden_size, cuda_device=cuda_device, h=h, dropout=dropout) # train train_config = config_dict['Train'] num_epoch = train_config['epoch'] weight_decay = train_config['weight_decay'] lr = train_config['lr'] early_stopping = train_config['early_stopping'] factor = train_config['factor'] warmup = train_config['warmup'] input_names = ['q_words', 'a_words'] # optimizer = optim.Adam(clf.parameters(), lr=lr, weight_decay=weight_decay, eps=1e-5) optimizer = NoamOpt( clf.len_embed, factor, warmup, optim.Adam(clf.parameters(), lr=0, weight_decay=weight_decay, eps=1e-5)) if cuda_device is not None: clf.cuda(device=cuda_device) def callback(verbose=True): train_labels, train_scores = get_label_score(clf, train_iterator, cuda_device, 'label', input_names=input_names) train_predicts = train_scores.argmax(axis=-1) train_scores = train_scores[:, 1] if verbose: print('train_acc: %.2f' % sklearn.metrics.accuracy_score(train_labels, train_predicts)) print( 'train_precision: %.2f' % sklearn.metrics.precision_score(train_labels, train_predicts)) print('train_average_precision: %.2f' % sklearn.metrics.average_precision_score( train_labels, train_scores)) dev_labels, dev_scores = get_label_score(clf, dev_iterator, cuda_device, 'label', input_names=input_names) dev_predicts = dev_scores.argmax(axis=-1) dev_scores = dev_scores[:, 1] if verbose: print('dev_acc: %.2f' % sklearn.metrics.accuracy_score(dev_labels, dev_predicts)) print('dev_precision: %.2f' % sklearn.metrics.precision_score(dev_labels, dev_predicts)) print('dev_average_precision: %.2f' % sklearn.metrics.average_precision_score( dev_labels, dev_scores)) index = 0 dev_aps = [] # for mean average precision score rrs = [] # for mean reciprocal rank score for query_labels in filtered_ref_generator(dev_ref): query_scores = dev_scores[index:index + len(query_labels)] index += len(query_labels) dev_aps.append( sklearn.metrics.average_precision_score( query_labels, query_scores)) query_rel_best = np.argmin(-query_scores * query_labels) rrs.append( 1 / (np.argsort(np.argsort(-query_scores))[query_rel_best] + 1)) if verbose: print('dev_MAP: %.2f' % np.mean(dev_aps)) print('dev_MRR: %.2f' % np.mean(rrs)) test_labels, test_scores = get_label_score(clf, test_iterator, cuda_device, 'label', input_names=input_names) test_predicts = test_scores.argmax(axis=-1) test_scores = test_scores[:, 1] if verbose: print('test_acc: %.2f' % sklearn.metrics.accuracy_score(test_labels, test_predicts)) print('test_precision: %.2f' % sklearn.metrics.precision_score(test_labels, test_predicts)) print('test_average_precision: %.2f' % sklearn.metrics.average_precision_score( test_labels, test_scores)) index = 0 test_aps = [] # for mean average precision score rrs = [] # for mean reciprocal rank score for query_labels in filtered_ref_generator(test_ref): query_scores = test_scores[index:index + len(query_labels)] index += len(query_labels) test_aps.append( sklearn.metrics.average_precision_score( query_labels, query_scores)) query_rel_best = np.argmin(-query_scores * query_labels) rrs.append( 1 / (np.argsort(np.argsort(-query_scores))[query_rel_best] + 1)) if verbose: print('test_MAP: %.2f' % np.mean(test_aps)) print('test_MRR: %.2f' % np.mean(rrs)) return np.mean(dev_aps) print('Training...') best_state_dict = train_model(clf, optimizer, train_iterator, label_name='label', num_epoch=num_epoch, cuda_device=cuda_device, early_stopping=early_stopping, input_names=input_names, callback=callback) print() if best_state_dict is not None: clf.load_state_dict(best_state_dict) torch.save(clf.state_dict(), os.path.join(model_dir, './net.pt')) print('Testing...') test_labels, test_scores = get_label_score(clf, test_iterator, cuda_device, 'label', input_names=input_names) test_predicts = test_scores.argmax(axis=-1) test_scores = test_scores[:, 1] print('test_acc: %.2f' % sklearn.metrics.accuracy_score(test_labels, test_predicts)) print('test_precision: %.2f' % sklearn.metrics.precision_score(test_labels, test_predicts)) print('test_average_precision: %.2f' % sklearn.metrics.average_precision_score(test_labels, test_scores)) index = 0 aps = [] # for mean average precision score rrs = [] # for mean reciprocal rank score for query_labels in filtered_ref_generator(test_ref): query_scores = test_scores[index:index + len(query_labels)] index += len(query_labels) aps.append( sklearn.metrics.average_precision_score(query_labels, query_scores)) query_rel_best = np.argmin(-query_scores * query_labels) rrs.append(1 / (np.argsort(np.argsort(-query_scores))[query_rel_best] + 1)) print('test_MAP: %.4f' % np.mean(aps)) print('test_MRR: %.4f' % np.mean(rrs))
def main(config_path): with open(config_path, 'r') as fread: config_dict = json.load(fread) path_config = config_dict['Path'] model_dir = path_config['model_dir'] vocab_dir = path_config['vocab_dir'] train = path_config['train'] # dataset dataset_config = config_dict['Dataset'] pad_size = dataset_config['pad_size'] batch_size = dataset_config['batch_size'] print('Loading train data...') train_reader = QFocusReader(train, PAD_TOKEN='<pad>', pad_size=pad_size) train_reader.build_vocabs(vocab_dir) # model model_config = config_dict['Model'] conv_width = model_config['conv_width'] hidden_size = model_config['hidden_size'] out_channels = model_config['out_channels'] cuda_device = model_config['cuda_device'] num_filters = model_config['num_filters'] # load pretrained vocab words_embed, words_vocab = load_full_embedding_with_vocab( model_config['embed_dir']) train_reader.set_vocabs({'words': words_vocab}) vocabs = train_reader.get_vocabs() # will be used to test time train_config = config_dict['Train'] num_epoch = train_config['epoch'] weight_decay = train_config['weight_decay'] lr = train_config['lr'] kfold = train_config['kfold'] # cross-val folds = train_reader.get_cross_val_dataset_iterator(batch_size=batch_size, k_fold=kfold) fold_accs = [] for test_idx in range(kfold): clf = BaselineFocusClassifier(words_embed=words_embed, out_channels=out_channels, cuda_device=cuda_device, conv_width=conv_width, hidden_size=hidden_size, num_filters=num_filters) optimizer = optim.Adam(clf.parameters(), lr=lr, weight_decay=weight_decay, eps=1e-5) if cuda_device is not None: clf.cuda(device=cuda_device) train_iterator = [ fold for fold_idx, fold in enumerate(folds) if fold_idx != test_idx ] train_model(clf, optimizer, train_iterator, num_epoch=num_epoch, cuda_device=cuda_device, early_stopping=0, label_name='focus') # test print('Testing...') acc = test_metric(clf, folds[test_idx], cuda_device, label_name='focus') print('test accuracy:', acc) fold_accs.append(acc) print() print('test accuracies:', fold_accs) print('mean accuracies:', np.mean(fold_accs)) print() print('Final Training...') clf = BaselineFocusClassifier(words_embed=words_embed, out_channels=out_channels, cuda_device=cuda_device, conv_width=conv_width, hidden_size=hidden_size, num_filters=num_filters) optimizer = optim.Adam(clf.parameters(), lr=lr, weight_decay=weight_decay, eps=1e-5) if cuda_device is not None: clf.cuda(device=cuda_device) train_iterator = train_reader.get_dataset_iterator(batch_size) def callback(verbose=False): train_acc = test_metric(clf, train_iterator, cuda_device, 'focus', return_info=False) if verbose: print('train_acc: %.3f' % (train_acc)) # train best_state_dict = train_model(clf, optimizer, train_iterator, num_epoch=num_epoch, cuda_device=cuda_device, label_name='focus', callback=callback) if best_state_dict is not None: clf.load_state_dict(best_state_dict) torch.save(clf.state_dict(), os.path.join(model_dir, './net.pt')) print('Done!')
def main(config_path): with open(config_path, 'r') as fread: config_dict = json.load(fread) path_config = config_dict['Path'] model_dir = path_config['model_dir'] vocab_dir = path_config['vocab_dir'] train = path_config['train'] test = path_config['test'] test_result = path_config['test_result'] # dataset dataset_config = config_dict['Dataset'] pad_size = dataset_config['pad_size'] batch_size = dataset_config['batch_size'] print('Loading train data...') train_reader = UIUCReader(train, PAD_TOKEN='<pad>', pad_size=pad_size) train_reader.build_vocabs(vocab_dir) train_iterator = train_reader.get_dataset_iterator(batch_size, train=True) # model model_config = config_dict['Model'] pad_size = dataset_config['pad_size'] conv_widths = model_config['conv_widths'] hidden_size = model_config['hidden_size'] out_channels = model_config['out_channels'] cuda_device = model_config['cuda_device'] # cuda_device = None # debugging out_size = len(train_reader.get_vocab('category')) # load pretrained vocab words_embed, words_vocab = load_full_embedding_with_vocab( model_config['embed_dir']) train_reader.set_vocabs({'words': words_vocab}) vocabs = train_reader.get_vocabs() # will be used to test time clf = BaselineCategoryClassifier(words_embed=words_embed, out_channels=out_channels, cuda_device=cuda_device, conv_widths=conv_widths, hidden_size=hidden_size, out_size=out_size) # train train_config = config_dict['Train'] num_epoch = train_config['epoch'] weight_decay = train_config['weight_decay'] lr = train_config['lr'] early_stopping = train_config['early_stopping'] optimizer = optim.Adam(clf.parameters(), lr=lr, weight_decay=weight_decay, eps=1e-5) if cuda_device is not None: clf.cuda(device=cuda_device) print('Loading test data...') test_reader = UIUCReader(test, PAD_TOKEN='<pad>', pad_size=pad_size) test_reader.set_vocabs(vocabs) test_iterator = test_reader.get_dataset_iterator(batch_size, train=False, sort=False) def callback(verbose=False): train_acc = test_metric(clf, train_iterator, cuda_device, 'category', return_info=False) if verbose: print('train_acc: %.3f' % (train_acc)) test_acc = test_metric(clf, test_iterator, cuda_device, 'category', return_info=False) if verbose: print('test_acc: %.3f' % (test_acc)) return test_acc print('Training...') best_state_dict = train_model(clf, optimizer, train_iterator, label_name='category', num_epoch=num_epoch, cuda_device=cuda_device, early_stopping=early_stopping, callback=callback) print() if best_state_dict is not None: clf.load_state_dict(best_state_dict) torch.save(clf.state_dict(), os.path.join(model_dir, './net.pt')) # test print('Loading test data...') test_reader = UIUCReader(test, PAD_TOKEN='<pad>', pad_size=pad_size) test_reader.set_vocabs(vocabs) print('Testing...') acc, categories, predicts, sents = test_metric( clf, test_reader.get_dataset_iterator(batch_size), cuda_device, label_name='category', return_info=True) print('test accuracy:', acc) print('Writing test result...') with open(test_result, 'w') as fwrite: for category, predict, sent in zip(categories, predicts, sents): fwrite.write( '%s\t%s\t%s\n' % (test_reader.get_vocab('category').itos[category], test_reader.get_vocab('category').itos[predict], ' '.join([ test_reader.get_vocab('words').itos[word] for word in sent ]))) print('Done!')