def main(config): # load vocabs vocab_words = load_vocab(config.words_filename) vocab_tags = load_vocab(config.tags_filename) vocab_chars = load_vocab(config.chars_filename) # get processing functions processing_word = get_processing_word(vocab_words, vocab_chars, lowercase=True, chars=config.chars) processing_tag = get_processing_word(vocab_tags, lowercase=False, allow_unk=False) # get pre trained embeddings embeddings = get_trimmed_glove_vectors(config.trimmed_filename) # create dataset dev = CoNLLDataset(config.dev_filename, processing_word, processing_tag, config.max_iter) test = CoNLLDataset(config.test_filename, processing_word, processing_tag, config.max_iter) train = CoNLLDataset(config.train_filename, processing_word, processing_tag, config.max_iter) # build model model = NERModel(config, embeddings, ntags=len(vocab_tags), nchars=len(vocab_chars)) model.build() # train, evaluate and interact model.train(train, dev, vocab_tags) model.evaluate(test, vocab_tags) model.interactive_shell(vocab_tags, processing_word)
def main(config): # load vocabs vocab_words, idx2words = load_vocab(config.words_filename) vocab_tags, _ = load_vocab(config.tags_filename) vocab_chars, _ = load_vocab(config.chars_filename) vocab_pos, _ = load_vocab(config.pos_filename) # get processing functions processing_word = get_processing_word(vocab_words, vocab_chars, lowercase=True, chars=config.chars) processing_tag = get_processing_word(vocab_tags, lowercase=False) processing_pos = get_processing_word(vocab_pos, lowercase=False) # get pre trained embeddings embeddings = get_trimmed_glove_vectors(config.trimmed_filename) embeddings_uni = get_trimmed_glove_vectors(config.uni_trimmed_filename) pos_embeddings = get_trimmed_glove_vectors(config.feature_trimmed_filename) NE_dic = get_trimmed_glove_vectors(config.trimmed_dic) # create dataset dev = CoNLLDataset(config.dev_filename, processing_word, processing_tag, processing_pos, config.max_iter) train = CoNLLDataset(config.train_filename, processing_word, processing_tag, processing_pos, config.max_iter) # build model model = NERModel(config, embeddings, embeddings_uni, pos_embeddings, ntags=len(vocab_tags), nchars=len(vocab_chars), vocab_words=idx2words, NE_dic=NE_dic) model.build() # train, evaluate and interact if state == "train": model.train(train, dev, vocab_tags) elif state == "evaluate": model.evaluate(dev, vocab_tags) else: #state == predict convert(file) t2o("data_format/test_convert.txt","data_format/test.txt") test = CoNLLDataset(config.test_filename, processing_word, processing_tag, processing_pos, config.max_iter) model.evaluate(test, vocab_tags) tagging("data_format/test_convert.txt")
def main(config): # load vocabs vocab_words = load_vocab(config.words_filename) vocab_tags = load_vocab(config.tags_filename) vocab_chars = load_vocab(config.chars_filename) vocab_iob = {"O": 0, "B": 1, "I": 2} vocab_type = {"LOC": 0, "PER": 1, "ORG": 2, "MISC": 3} # get processing functions processing_word = get_processing_word(vocab_words, vocab_chars, lowercase=True, chars=config.chars) processing_tag = get_processing_word(vocab_tags, lowercase=False) processing_iob = get_processing_word(vocab_iob, lowercase=False) processing_type = get_processing_word(vocab_type, lowercase=False) # get pre trained embeddings embeddings = get_trimmed_glove_vectors(config.trimmed_filename) # create dataset dev = CoNLLDataset(config.dev_filename, processing_word, processing_tag, processing_iob, processing_type, config.max_iter) test = CoNLLDataset(config.test_filename, processing_word, processing_tag, processing_iob, processing_type, config.max_iter) train = CoNLLDataset(config.train_filename, processing_word, processing_tag, processing_iob, processing_type, config.max_iter) model = NERModel(config, embeddings, ntags=len(vocab_tags), nchars=len(vocab_chars), niob=3, ntype=4) model.build() # train, evaluate and interact print vocab_tags model.train(train, dev, vocab_tags) stime = time.time() model.evaluate(test, vocab_tags) etime = time.time() print etime - stime
def main(config): # load vocabs vocab_words = load_vocab(config.words_filename) vocab_tags = load_vocab(config.tags_filename) vocab_chars = load_vocab(config.chars_filename) vocab_pref_suff = load_vocab( config.PS_filename) ############### For prefix and suffix vocab_pref_suff_2 = load_vocab(config.PS_filename_2) vocab_pref_suff_4 = load_vocab(config.PS_filename_4) # get processing functions processing_word = get_processing_word(vocab_words, vocab_chars, vocab_pref_suff, vocab_pref_suff_2, vocab_pref_suff_4, lowercase=True, chars=config.chars, Pref_Suff=config.pref_suff) processing_tag = get_processing_word(vocab_tags, lowercase=False, Geoparser=True) # get pre trained embeddings embeddings = get_trimmed_glove_vectors(config.trimmed_filename) ##create dataset dev = CoNLLDataset( config.dev_filename, processing_word, ############ Here dev, test and train have the raw words and tags. Now we have to map these to corresponding word index processing_tag, config.max_iter ) ############ and tags index. Therefore, when we do model.evaluate in below lines, it calls run_evaluate in run_epoch function test = CoNLLDataset(config.test_filename, processing_word, processing_tag, config.max_iter) train = CoNLLDataset(config.train_filename, processing_word, processing_tag, config.max_iter) # build model model = NERModel(config, embeddings, ntags=len(vocab_tags), nchars=len(vocab_chars)) model.build() # train, evaluate and interact model.train(train, dev, vocab_tags) model.evaluate(test, vocab_tags)
# create dataset dev = CoNLLDataset(config.dev_filename, processing_word, processing_tag, processing_pos, config.max_iter) test = CoNLLDataset(config.test_filename, processing_word, processing_tag, processing_pos, config.max_iter) train = CoNLLDataset(config.train_filename, processing_word, processing_tag, processing_pos, config.max_iter) # build model lmwords = len(vocab_words) lmposs = len(pos_tags) model = NERModel(config, embeddings, dic_embeddings, pos_embeddings, syl_embeddings, morph_embeddings, ntags=len(vocab_tags), nchars=len(vocab_chars), nsyls=len(vocab_syls), nmorphs=len(vocab_morphs), nwords=lmwords, nposs=lmposs) model.build() # train, evaluate and interact model.train(train, dev, vocab_tags) model.evaluate(test, vocab_tags, test_flag=1) #model.interactive_shell(vocab_tags, processing_word)
vocab_tags = load_vocab(config.tags_filename) vocab_chars = load_vocab(config.chars_filename) # get processing functions processing_word = get_processing_word(vocab_words, vocab_chars, lowercase=True, chars=config.chars) processing_tag = get_processing_word(vocab_tags, lowercase=False) # get pre trained embeddings embeddings = get_trimmed_glove_vectors(config.trimmed_filename) # create dataset dev = CoNLLDataset(config.dev_filename, processing_word, processing_tag, config.max_iter) test = CoNLLDataset(config.test_filename, processing_word, processing_tag, config.max_iter) train = CoNLLDataset(config.train_filename, processing_word, processing_tag, config.max_iter) # build model model = NERModel(config, embeddings, ntags=len(vocab_tags), nchars=len(vocab_chars)) model.build() # train, evaluate and interact model.train(train, dev, vocab_tags) model.evaluate(test, vocab_tags) model.interactive_shell(vocab_tags, processing_word)