def main(): # create instance of config config = Config() # build model model = NERModel(config) model.build("train") model.restore_session(config.dir_model) # create dataset if len(sys.argv) == 2: if sys.argv[1] == 'test': test = CoNLLDataset(config.filename_test, config.processing_word, config.processing_tag, max_length=None) elif sys.argv[1] == 'dev': test = CoNLLDataset(config.filename_dev, config.processing_word, config.processing_tag, max_length=None) else: assert len(sys.argv) == 1 test = CoNLLDataset(config.filename_test, config.processing_word, config.processing_tag, max_length=None) # evaluate and interact model.evaluate(test)
def main(): # create instance of config config = Config() # create datasets dev = CoNLLDataset(config.filename_dev, config.processing_word, config.processing_tag, config.processing_pos, config.processing_chunk, config.max_iter) train = CoNLLDataset(config.filename_train, config.processing_word, config.processing_tag, config.processing_pos, config.processing_chunk, config.max_iter) test = CoNLLDataset(config.filename_test, config.processing_word, config.processing_tag, config.processing_pos, config.processing_chunk, config.max_iter) max_sequence_length = max(max([len(seq[0]) for seq in train]), max([len(seq[0]) for seq in dev]), max([len(seq[0]) for seq in test])) max_word_length = max( max([len(word[0]) for seq in train for word in seq[0]]), max([len(word[0]) for seq in test for word in seq[0]]), max([len(word[0]) for seq in dev for word in seq[0]])) print(max_word_length, max_sequence_length) model = NERModel(config, max_word_length, max_sequence_length) model.build() model.train(train, dev) model.restore_session(config.dir_model) model.evaluate(test)
def main(): # create instance of config config = Config() config.dim_char = arg.dim_char config.hidden_size_char = arg.hidden_size_char config.hidden_size_lstm_1 = arg.hidden_size_lstm_1 config.hidden_size_lstm_2 = arg.hidden_size_lstm_2 config.cls_hidden_size = arg.cls_hidden_size config.batch_sample = arg.batch_sample config.elmo_scale = arg.elmo_scale config.lr_method = arg.lr_method config.batch_size = arg.batch_size config.learning_rate = arg.learning_rate config.decay_logic = arg.decay_logic config.run_name = arg.run_name config.input_feature_dim = 600 #config.hidden_size_lstm * 2 #+ 1024 config.dir_saved_roi = arg.dir_saved_roi # build model model = NERModel(config) model.build() model.restore_session(config.dir_model + config.run_name + '/') # create dataset config.filename_test = config.dir_saved_roi + "test_word_ids/" test = CoNLLDataset(config.filename_test) # evaluate and interact model.evaluate(test, config.test_total_entity)
def main(): # create instance of config dir_output = "./results/" + sys.argv[2] + "/" config = Config(dir_output, load=False) config.filename_words = "./data/words_" + sys.argv[2] + ".txt" config.filename_chars = "./data/chars_" + sys.argv[2] + ".txt" config.filename_tags = "./data/tags_" + sys.argv[2] + ".txt" #config.dir_output = "./results/" + sys.argv[2] + "/" config.dir_model = config.dir_output + "model.weights/" config.path_log = config.dir_output + "log.txt" #config.filename_dev = sys.argv[1] config.filename_test = sys.argv[1] #config.filename_train = sys.argv[3] config.filename_pred = sys.argv[1].replace(".txt", ".pred") config.load() # build model model = NERModel(config) model.build() model.restore_session(config.dir_model) # create dataset #test = CoNLLDataset(config.filename_test, config.processing_word, # config.processing_tag, config.max_iter) test = CoNLLDataset(sys.argv[1], config.processing_word, config.processing_tag, config.max_iter) # evaluate and interact model.evaluate(test)
def main(): # create instance of config config = Config() config.dir_model = config.dir_output + "model.finetuning.weights/" # build model model = NERModel(config) model.build("fine_tuning") model.restore_session(config.dir_model) # create dataset if len(sys.argv) == 2: if sys.argv[1] == 'test': test = CoNLLDataset(config.filename_test, config.processing_word, config.processing_tag, config.max_iter) elif sys.argv[1] == 'dev': test = CoNLLDataset(config.filename_dev, config.processing_word, config.processing_tag, config.max_iter) else: assert len(sys.argv) == 1 test = CoNLLDataset(config.filename_test, config.processing_word, config.processing_tag, config.max_iter) # evaluate and interact model.evaluate(test)
def main(): # create instance of config config_file = sys.argv[1] config = Config(config_file) print("dir model : ", config.dir_model) # build model model = NERModel(config) model.build() model.restore_session(config.dir_model) # model.reinitialize_weights("words") # model.reinitialize_weights("chars") # model.reinitialize_weights("train_step") # Evaluate on another data set if len(sys.argv) > 2: test_file_name = sys.argv[2] test = CoNLLDataset(test_file_name, config.processing_word, config.processing_tag, config.max_iter) print("Testing on ", test_file_name, "..") # create dataset else: test = CoNLLDataset(config.filename_test, config.processing_word, config.processing_tag, config.max_iter) print("Testing on ", config.filename_test, "..") # evaluate and interact # model.predict_test(test, output=sys.stdout) model.evaluate(test)
def main(): # Предсказания моделью первого уровня # config_first = Config(dir_output='./results/train_first/') model = NERModel(config_first) model.build() model.restore_session(config_first.dir_model) test = CoNLLDataset(config_first.filename_test, config_first.processing_word, config_first.processing_tag, config_first.max_iter) print() print('Predicting first stage!') model.evaluate(test) print() test_predictions = model.predict_test(test) formatted_predictions = format_predictions(test_predictions, 'test', config_first) # Предсказания моделью второго уровня # tf.reset_default_graph() config_second = Config(dir_output='./results/train_second/') model = NERModel2(config_second) model.build() model.restore_session(config_second.dir_model) print() print('Predicting second stage!') model.evaluate(formatted_predictions) print()
def main(args): # create instance of config config = Config() dev = CoNLLDataset(config.filename_dev, config.processing_word, config.processing_tag, config.max_iter) train = CoNLLDataset(config.filename_train, config.processing_word, config.processing_tag, config.max_iter) test = CoNLLDataset(config.filename_test, config.processing_word, config.processing_tag, config.max_iter) max_sequence_length = max(max([len(seq[0]) for seq in train]), max([len(seq[0]) for seq in dev]), max([len(seq[0]) for seq in test])) max_word_length = max( max([len(word[0]) for seq in train for word in seq[0]]), max([len(word[0]) for seq in test for word in seq[0]]), max([len(word[0]) for seq in dev for word in seq[0]])) print(max_word_length, max_sequence_length) if args == "bilstm": model = NERModel(config, max_word_length, max_sequence_length) elif args == "gram_cnn": model = GRAM_CNNModel(config, max_word_length, max_sequence_length) elif args == "gate_cnn": model = CNNModel(config, max_word_length, max_sequence_length) elif args == "id_cnn": model = Dilated_CNNModel(config, max_word_length, max_sequence_length) model.build() model.restore_session(config.dir_model) model.evaluate(test)
def main(): # create instance of config config = Config() config.layer = int(sys.argv[1]) config.step = int(sys.argv[2]) if config.task == 'pos': print("USING POS") config.filename_train = "data/train.pos" # test config.filename_dev = "data/dev.pos" config.filename_test = "data/test.pos" else: print("USING NER") print("iteration: " + str(config.layer)) print("step: " + str(config.step)) # build model model = NERModel(config) model.build() model.restore_session(config.dir_model) # create dataset test = CoNLLDataset(config.filename_test, config.processing_word, config.processing_tag, config.max_iter) # evaluate and interact model.evaluate(test)
def evaluate(): augment_pred = [] with NERModel(config) as model: # create datasets augment = CoNLLDataset(config.filename_augment, config.processing_word, config.processing_tag, config.max_iter) test = CoNLLDataset(config.filename_test, config.processing_word, config.processing_tag, config.max_iter) # build model model = NERModel(config) model.build() model.restore_session(config.dir_model) # evaluate model.logger.info("\nEvaluation on Test") model.evaluate(test) model.logger.info("\nEvaluation on Augment") model.evaluate(augment, augment_pred) # model.logger.debug(augment_pred) # clear memory del model return augment_pred
def main(): # create instance of config config = Config() # build model model = NERModel(config) model.build("train") model.restore_session(config.dir_model) # create dataset processing_word = get_processing_word(lowercase=True) if len(sys.argv) == 2: if sys.argv[1] == 'test': test = CoNLLDataset(config.filename_test, processing_word) elif sys.argv[1] == 'dev': test = CoNLLDataset(config.filename_dev, processing_word) else: assert len(sys.argv) == 1 test = CoNLLDataset(config.filename_test, processing_word) test4cl = CoNLLdata4classifier(test, processing_word=config.processing_word, processing_tag=config.processing_tag) # evaluate and interact model.evaluate(test4cl)
def main(): # create instance of config config = Config() config.dim_char = arg.dim_char config.hidden_size_char = arg.hidden_size_char config.hidden_size_lstm_1 = arg.hidden_size_lstm_1 config.hidden_size_lstm_2 = arg.hidden_size_lstm_2 config.batch_sample = arg.batch_sample config.elmo_scale = arg.elmo_scale config.lr_method = arg.lr_method config.batch_size = arg.batch_size config.learning_rate = arg.learning_rate config.decay_logic = arg.decay_logic config.run_name = arg.run_name # build model model = NERModel(config) model.build() model.restore_session(config.dir_model + config.run_name + '/') # create dataset test = CoNLLDataset(config.filename_test, config.elmofile_test, config.processing_word, config.processing_postags, config.generate_anchor, config.max_iter) model.evaluate(test)
def train_active(train, dev, test, select, config, modename): """ Input: train set, test set, selection set, configurations Output: accuracy on dev set, test set, prediction on selection set Select Most & Least Certain Examples from Select set """ # build model #tf.reset_default_graph() #gc.collect() #tf.get_variable_scope().reuse_variables() model = NERModel(config) model.build() print("Start training model...") print("Training size ", len(train)) model.train(train, dev) # restore session model.restore_session(config.dir_model) # evaluate print("===Evaluating on test set:===") mode = "test" + modename model.evaluate(test, mode) # run on selection set print("Selecting samples for active learning...") if len(select) == 0: return [] l = [] for sent in select: output = model.predict(sent[0]) l.append(output[1][0]) #sort l return l #most uncertain and least uncertain
def main(): # create instance of config config = Config() # build model model = NERModel(config) model.restore_session(config.dir_model) # evaluate and interact model.evaluate(config.dataset_test) interactive_shell(model)
def main(): # create instance of config config = Config() # build model model = NERModel(config) model.build() model.restore_session(config.dir_model) # create dataset test = CoNLLDataset(config.filename_test, config.processing_word, config.processing_tag, config.max_iter) # evaluate and interact model.evaluate(test)
def main(): # create instance of config config = Config() # build model model = NERModel(config) model.build() model.restore_session(config.dir_model) # "results/test/model.weights/" # create dataset test = CoNLLDataset(config.filename_test, config.processing_word, config.processing_tag, config.max_iter) # evaluate and interact model.evaluate(test) interactive_shell(model) # test in commend lines
def main(): # create instance of config config = Config() # build model model = NERModel(config) model.build() model.restore_session(config.dir_model) # create dataset test = CoNLLDataset(config.filename_test, config.processing_word, config.processing_tag, config.max_iter) # evaluate and interact model.evaluate(test) interactive_shell(model)
def main(data_prefix = None): # create instance of config config = Config() if data_prefix: cwd = os.getcwd() config.filename_dev = os.path.join(cwd, 'data', data_prefix + '_' + os.path.basename(config.filename_dev)) config.filename_test = os.path.join(cwd, 'data', data_prefix + '_' + os.path.basename(config.filename_test)) config.filename_train = os.path.join(cwd, 'data', data_prefix + '_' + os.path.basename(config.filename_train)) # build model model = NERModel(config) model.build() model.restore_session(config.dir_model) # create dataset test = CoNLLDataset(config.filename_test, config.processing_word, config.processing_tag, config.max_iter) model.evaluate(test)
def main(): config = Config() # ----------------------------------------------------- # restore model # ----------------------------------------------------- model = NERModel(config) model.build() model.restore_session(config.dir_model) # -------------------------------------------------------------- # create dataset (test for evaluation & dev for active learning) # -------------------------------------------------------------- test = CoNLLDataset(config.filename_test, config.processing_word, config.processing_tag, config.max_iter) dev = CoNLLDataset(config.train_split[config.sample_split], config.processing_word, config.processing_tag, config.max_iter) sick = CoNLLDataset(config.filename_sick, config.processing_word, config.processing_scores, config.max_iter) # ----------------------------------------------------- # encode SICK dataset using pretrained NER model #------------------------------------------------------ if config.encode: model.get_encoded(sick) # ----------------------------------------------------- # determine threshold #----------------------------------------------------- #determine threshold for active learning #threshold = 20 #model.get_threshold(test, threshold) # ----------------------------------------------------- # ----------------------------------------------------- # evaluate and interact # ----------------------------------------------------- model.evaluate(test, dev, "test")
def main(): # create instance of config config = Config() # build model model = NERModel(config) model.build() model.restore_session(config.dir_model) option = input('>>>>> input the number of your option --\n' 'evaluate_on_testset(0)/interactive_shell(1): ') if option == '0': # evaluate the model on test set # create dataset test = CoNLLDataset(config.filename_test, config.processing_word, config.processing_tag, config.max_iter) # evaluate and interact model.evaluate(test) elif option == '1': interactive_shell(model) else: raise Exception('--> The number should be 0 or 1. -->')
def main(): # create instance of config config = Config() # build model model = NERModel(config) model.build() model.restore_session(config.dir_model) # create dataset test1 = FKDataset(config.filename_test1, config.processing_word, config.processing_tag, config.max_iter) test2 = FKDataset(config.filename_test2, config.processing_word, config.processing_tag, config.max_iter) # evaluate and interact # evaluate and interact print("testing over task 1") model.evaluate(test1, 0) #interactive_shell(model) print("testing over task 2") model.evaluate(test2, 1)
def main(): # create instance of config config = Config() # build model model = NERModel(config) model.build() # model.restore_session("results/crf/model.weights/") # optional, restore weights # model.reinitialize_weights("proj") # create datasets dev = CoNLLDataset(config.filename_dev, config.processing_word, config.processing_tag, config.max_iter) train = CoNLLDataset(config.filename_train, config.processing_word, config.processing_tag, config.max_iter) test = CoNLLDataset(config.filename_test, config.processing_word, config.processing_tag, config.max_iter) # train model model.train(train, dev) model.evaluate(test)
def main(): # create instance of config config = Config() # build model print 'building\n' model = NERModel(config) model.build() print 'restoring session\n' model.restore_session(config.dir_model) print 'finished' # create dataset test = CoNLLDataset(config.filename_test, config.processing_word, config.processing_tag, config.max_iter) print 'created dataset\n' # evaluate and interact print 'evaluating\n' model.evaluate(test)
def main(): # create instance of config config = Config() config.dim_char = arg.dim_char config.hidden_size_char = arg.hidden_size_char config.hidden_size_lstm_1 = arg.hidden_size_lstm_1 config.hidden_size_lstm_2 = arg.hidden_size_lstm_2 config.cls_hidden_size = arg.cls_hidden_size config.batch_sample = arg.batch_sample config.elmo_scale = arg.elmo_scale config.lr_method = arg.lr_method config.batch_size = arg.batch_size config.learning_rate = arg.learning_rate config.decay_logic = arg.decay_logic config.run_name = arg.run_name config.input_feature_dim = 600 #config.hidden_size_lstm * 2 #+ 1024 config.dir_saved_roi = arg.dir_saved_roi # build model model = NERModel(config) model.build() # create datasets config.filename_dev = config.dir_saved_roi + "dev_word_ids/" config.filename_test = config.dir_saved_roi + "test_word_ids/" config.filename_train = config.dir_saved_roi + "train_word_ids/" dev = CoNLLDataset(config.filename_dev) print("Loading dev set done!") train = CoNLLDataset(config.filename_train) print("Loading train set done!") test = CoNLLDataset(config.filename_test) print("Loading test set done!") # train model model.train(train, dev, config.dev_total_entity) model.evaluate(test, config.test_total_entity)
def main(interactive=False): # create instance of config config = Config() # build model model = NERModel(config) model.build() model.restore_session(config.dir_model) # create dataset test = CoNLLDataset(config.filename_test, config.processing_word, config.processing_tag, config.max_iter) # evaluate model.evaluate(test) # run CoNLL evaluation script os.system("%s < %s > %s" % (config.conll_eval, config.conll_output, config.conll_score)) os.system("%s -l < %s > %s.latex" % (config.conll_eval, config.conll_output, config.conll_score)) # interact if interactive: interactive_shell(model)
def main(): # create instance of config config = Config() # build model model = NERModel(config) model.build() model.restore_session(config.dir_model) # create dataset dev = CoNLLDataset(config.filename_dev, config.processing_word, config.processing_tag, config.max_iter) test = CoNLLDataset(config.filename_test, config.processing_word, config.processing_tag, config.max_iter) # evaluate and interact print("\n") # print("--->", sys.argv[1]) print("Evaluating the system for hierarchical labels (" + sys.argv[1] + ")") print("Testing on dev set") model.evaluate(dev) print("Testing on test set") model.evaluate(test)
def main(): # create instance of config config = Config() # build model model = NERModel(config) model.build() model.restore_session(config.dir_model) # create dataset test = CoNLLDataset(config.filename_test, config.processing_word, config.processing_tag, config.max_iter) # evaluate and interact predictions = model.evaluate(test) #print("Printing predictions " + str(predictions)) interactive_shell(model)
class NERClassifier(object): MODEL_DIR = os.path.join(os.path.dirname(__file__), '../results/crf/final_model.weights/') def __init__(self): self.config = Config() self.config.dim_word = 250 self.config.dim_char = 50 self.model = NERModel(self.config) self.model.build() self.model.restore_session(self.MODEL_DIR) def _evaluate(self, test_set_loc): test = CoNLLDataset(test_set_loc, self.config.processing_word, self.config.processing_tag, self.config.max_iter) # evaluate and interact metrics = self.model.evaluate(test) f1 = metrics['f1'] acc = metrics['acc'] all_entities = self.model.evaluate_final(test) return f1, acc, all_entities def assess(self, set='test'): """ Calculates the model accuracy metrics for the train, dev, and test sets """ # Evaluate the training set train_loc = self.config.filename_train train_metrics = self._evaluate(train_loc) # Evaluate on the dev set dev_loc = self.config.filename_dev dev_metrics = self._evaluate(dev_loc) # Evaluate on test set test_loc = self.config.filename_test test_metrics = self._evaluate(test_loc) return train_metrics, dev_metrics, test_metrics
def main(): # create instance of config config = Config() # build model model = NERModel(config) model.build() model.restore_session(config.dir_model) # create dataset test = CoNLLDataset(config.filename_test, config.processing_word, config.processing_tag, config.max_iter) dev = CoNLLDataset(config.filename_dev, config.processing_word, config.processing_tag, config.max_iter) augment = [] for split in config.augment_list: augment.append( CoNLLDataset(config.filename_augment.get(split), config.processing_word, config.processing_tag, config.max_iter)) next_split = min(len(config.augment_list), len(config.splits) - 1) # evaluate on dev model.results_logger.info("\nDev") model.evaluate(dev) # evaluate on test model.results_logger.info("\nTest") model.evaluate(test) if len(config.augment_list) > 0: # evaluate on current augment augment_pred = [] model.results_logger.info("\nAugment split: {}".format( config.augment_list[-1])) model.evaluate(augment[-1], augment_pred) # save current augment predictions with open( config.dir_output + 'preds-{}.pkl'.format(config.augment_list[-1]), 'wb') as f: pickle.dump(augment_pred, f) if next_split > -1: next_augment = CoNLLDataset( config.filename_augment.get(config.splits[next_split]), config.processing_word, config.processing_tag, config.max_iter) # evaluate on the next augment split and save predictions augment_pred = [] model.results_logger.info("\nNext Augment split: {}".format( config.splits[next_split])) model.evaluate(next_augment, augment_pred) # save next augment split predictions with open( config.dir_output + 'preds-{}.pkl'.format(config.splits[next_split]), 'wb') as f: pickle.dump(augment_pred, f)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--train_lang', type=str, default='en') parser.add_argument('--dev_lang', type=str, default='en') parser.add_argument('--test_lang', type=str, default='en') parser.add_argument('--is_pos', type=int, default=0, help='NER or POS?') parser.add_argument('--dataset', type=str, default='conll2003', help='Dataset directory') parser.add_argument('--dir', type=str, default=None, help='Output directory') parser.add_argument('--use_chars', type=int, default=1, help='Use character LSTM or not') parser.add_argument('--char_init', type=int, default=0, help='Whether initialize char embedding') parser.add_argument('--epoch', type=int, default=30) parser.add_argument('--emb_type', type=str, default='word', help='word | trans | word_trans') parser.add_argument('--emb_dim', type=int, default=300, help='Dimension of word embeddings') parser.add_argument('--model_dir', type=str, default='data/output_model_config_fb_wikitext103/', help='Transformer directory model') parser.add_argument('--layer', type=int, default=None, help='Select a single layer from Transformer') parser.add_argument('--trans_concat', type=str, default='all', help='all | sws | fws') parser.add_argument('--trans_dim', type=int, default=512, help='Transformer hidden size') parser.add_argument('--trans_layer', type=int, default=7, help='The total number of Transformer layers') parser.add_argument('--trans_type', type=str, default='monolingual', help="monolingual | crosslingual") parser.add_argument('--trans_vocab_src', type=str, default='data/transformer_wiki_vocab_20w', help='Source language Transformer vocabulary') parser.add_argument('--trans_vocab_tgt', type=str, default=None, help='Target language Transformer vocabulary') args = parser.parse_args() # with tf.device('/cpu:0'): # create instance of config # print(args.use_chars, type(args.use_chars)) langs = [args.test_lang] #config = Config(mix_vocab=args.mix_vocab, use_crf=args.use_crf, mono_trans=args.mono_trans, is_pos=args.is_pos, emb_dim=args.emb_dim, tgt_lang=args.test_lang, no_glove=args.no_glove, select_layer=args.select_layer, weighted_sum_full=args.weighted_sum_full, naive_proj=args.naive_proj, highway=args.highway, weighted_sum=args.trans_weighted_sum, trans_dim=args.trans_dim, dataset=args.dataset, trans_vocab=args.trans_vocab, use_transformer=args.use_trans, dir_=args.dir, use_chars=args.use_chars, use_attn=args.use_attn, char_init=args.char_init, model_dir=args.model_dir, trans_to_output=args.trans_to_output, epoch=args.epoch) config = Config(args) # create dataset test = CoNLLDataset(config.filename_test, config.processing_word, config.processing_tag, config.max_iter, lang=args.test_lang) # build model #n_vocab = len(config.vocab_trans) #n_ctx = test.max_seq model = NERModel(config) model.build() model.restore_session(config.dir_model) # evaluate and interact model.evaluate(test)