'.txt') else: print 'Preparing vocab' words, w2i, pos, rels = utils.vocab(options.conll_train) with open(os.path.join(options.output, options.params), 'w') as paramsfp: pickle.dump((words, w2i, pos, rels, options), paramsfp) print 'Finished collecting vocab' print 'Initializing lstm mstparser:' parser = mstlstm.MSTParserLSTM(words, pos, rels, w2i, options) for epoch in xrange(options.epochs): print 'Starting epoch', epoch parser.Train(options.conll_train) conllu = (os.path.splitext( options.conll_dev.lower())[1] == '.conllu') devpath = os.path.join( options.output, 'dev_epoch_' + str(epoch + 1) + ('.conll' if not conllu else '.conllu')) utils.write_conll(devpath, parser.Predict(options.conll_dev)) parser.Save( os.path.join(options.output, os.path.basename(options.model) + str(epoch + 1))) if not conllu: os.system('perl conll/eval.pl -g ' + options.conll_dev + ' -s ' + devpath + ' > ' + devpath + '.txt') else: os.system(
dev_data = {} dev_id2arg2rel = {} for index in fulltrain_data: if index not in train_data: dev_data[index] = fulltrain_data[index] dev_id2arg2rel[index] = train_id2arg2rel[index] #parser = learner.jNeRE(words, nertags, postagCount, rels, w2i, c2i, options) parser = learner.jNeRE(words, nertags, rels, w2i, c2i, options) for epoch in xrange(options.epochs): print '\n-----------------\nStarting epoch', epoch + 1 #parser.Train(train_data, train_id2nerBILOU, id2arg2rel, classweights) parser.Train(train_data, train_id2nerBILOU, id2arg2rel) label_pred = [] label_correct = [] predDev, relsDev = parser.Predict(dev_data) #pickle.dump((predDev, relsDev), open(options.output + "dev_ep" + str(epoch + 1), "wb")) for sentenceID in predDev: label_pred.append(predDev[sentenceID]) label_correct.append(train_id2nerBILOU[sentenceID].strip().split()) assert len(label_pred) == len(label_correct) f1 = compute_NER_f1_macro(label_pred, label_correct, 'O', "IOBES") f1_b = compute_NER_f1_macro(label_pred, label_correct, 'B', "IOBES")
sentences, train_batches = utils.batch_data(options.conll_train, c2i, options.batch_tokens) batches = len(train_batches) highestScore = options.highest_score tsId = 0 for epoch in range(options.last_epoch, options.epochs): print("Starting epoch ", epoch + 1) random.shuffle(train_batches) for idx, mini_batch in enumerate(train_batches): t_step = (epoch * batches) + idx + 1 if (t_step % 5000 == 0): parser.Train(sentences, mini_batch, t_step, True) else: parser.Train(sentences, mini_batch, t_step) if (t_step <= 5000): if (t_step % 100 == 0): print("Save Model...") parser.Save( os.path.join(options.output, os.path.basename(options.model))) else: print("Performance on Dev data") start = time.time() devPredSents = parser.Predict(conll_sentences, False) count = 0
else: print 'Preparing vocab' words, w2i, pos, rels = utils.vocab(options.conll_train) with open(os.path.join(options.output, options.params), 'w') as paramsfp: pickle.dump((words, w2i, pos, rels, options), paramsfp) print 'Finished collecting vocab' print 'Initializing lstm mstparser:' parser = mstlstm.MSTParserLSTM(words, pos, rels, w2i, options) for epoch in xrange(options.epochs): print 'Starting epoch', epoch _start = time.time() parser.Train(options.conll_train, options.batch_size) print "time:", time.time() - _start conllu = (os.path.splitext( options.conll_dev.lower())[1] == '.conllu') devpath = os.path.join( options.output, 'dev_epoch_' + str(epoch + 1) + ('.conll' if not conllu else '.conllu')) utils.write_conll( devpath, parser.Predict(options.conll_dev, options.batch_size)) parser.Save( os.path.join(options.output, os.path.basename(options.model) + str(epoch + 1))) if not conllu: os.system('perl src/utils/eval.pl -g ' + options.conll_dev + ' -s ' + devpath + ' > ' + devpath + '.txt')
pickle.dump((words, lemmas, pos, roles, chars, options), paramsfp) print 'Finished collecting vocab' print 'Initializing blstm srl:' parser = SRLLSTM(words, lemmas, pos, roles, chars, options) max_len = max([len(d) for d in train_data]) min_len = min([len(d) for d in train_data]) buckets = [list() for i in range(min_len, max_len)] for d in train_data: buckets[len(d) - min_len - 1].append(d) buckets = [x for x in buckets if x != []] for epoch in xrange(options.epochs): print 'Starting epoch', epoch parser.Train(utils.get_batches(buckets, parser, True)) if options.save_epoch: parser.Save( os.path.join(options.outdir, options.model + str(epoch + 1))) if options.conll_dev != '': start = time.time() utils.write_conll( os.path.join(options.outdir, options.model) + str(epoch + 1) + '.txt', parser.Predict(options.conll_dev)) os.system('perl src/utils/eval.pl -g ' + options.conll_dev + ' -s ' + os.path.join(options.outdir, options.model) + str(epoch + 1) + '.txt' + ' > ' + os.path.join(options.outdir, options.model) + str(epoch + 1) + '.eval &')
pickle.dump((words, w2i, pos, rels, options), paramsfp) print 'Finished collecting vocab' print 'Initializing blstm arc hybrid:' if WITHCPOS: print "Using features as well" #print "ff", GENDER, NUMBER, PERSON parser = ArcHybridLSTM(words, pos, cpos, GENDER, NUMBER, PERSON, CASE, rels, w2i, options) else: parser = ArcHybridLSTM(words, pos, rels, w2i, options) deltas = [] for epoch in xrange(options.epochs): print '\n================\nStarting epoch', epoch+1 parser.Train(options.conll_train, epoch) #devpath = os.path.join(options.output, 'dev_epoch_' + str(epoch+1) + '.conll') devpath = os.path.join(options.output, 'dev_epoch_%03d.conll' % (epoch+1)) utils.write_conll(devpath, parser.Predict(options.conll_dev)) # run evaluation #command = 'perl src/utils/eval.pl -g ' + options.conll_dev + ' -s ' + devpath + ' > ' + devpath + '.txt ' #print "executing: %s" % command #os.system(command) # just show current LAS #ifp = open(devpath + '.txt') #print "current LAS", ifp.readline() #ifp.close() # command = "~/bin/toolbin/conll/evaluation_script/conll17_ud_eval.py --weights ~/bin/toolbin/conll/evaluation_script/weights.clas " + options.conll_dev + " " + devpath + " > " + devpath + '.txt4' # print "executing: %s" % command
#print 'Initializing joint model' parser = learner.jPosDepLearner(words, pos, rels, w2i, c2i, caps, options) for epoch in xrange(options.epochs): print '\n-----------------\nStarting epoch', epoch + 1 if epoch % 10 == 0: if epoch == 0: parser.trainer.restart(learning_rate=0.001) elif epoch == 10: parser.trainer.restart(learning_rate=0.0005) else: parser.trainer.restart(learning_rate=0.00025) parser.Train(options.conll_train, dep_epoch=1) parser.Train(options.conll_trainner, ner_epoch=1) if options.conll_dev == "N/A": parser.Save( os.path.join(options.output, os.path.basename(options.model))) else: devPredSents = parser.Predict(options.conll_dev, ner_epoch=0) devNerPredSents = parser.Predict(options.conll_devner) out = open(os.path.join(options.output, options.predout), "w") count = 0 lasCount = 0 uasCount = 0 posCount = 0
print 'Initializing blstm srl:' parser = SRLLSTM(words, pWords, plemmas, pos, roles, chars, sense_mask, options) best_acc = 0.0 max_len = max([len(d) for d in train_data]) min_len = min([len(d) for d in train_data]) buckets = [list() for i in range(min_len, max_len)] for d in train_data: buckets[len(d) - min_len - 1].append(d) buckets = [x for x in buckets if x != []] for epoch in xrange(options.epochs): print 'Starting epoch', epoch best_acc = parser.Train( utils.get_batches(buckets, parser, True, options.sen_cut), epoch, best_acc, options) if options.conll_dev == None: parser.Save(os.path.join(options.outdir, options.model)) if options.input and options.output: with open(os.path.join(options.outdir, options.params), 'r') as paramsfp: words, pWords, plemmas, pos, roles, chars, sense_mask, stored_opt = pickle.load( paramsfp) stored_opt.external_embedding = options.external_embedding parser = SRLLSTM(words, pWords, plemmas, pos, roles, chars, sense_mask, stored_opt) parser.Load(os.path.join(options.outdir, options.model)) ts = time.time()
parser = SRLLSTM(words, lemmas, pos, roles, chars, options) best_f_score = 0.0 max_len = max([len(d) for d in train_data]) min_len = min([len(d) for d in train_data]) buckets = [list() for i in range(min_len, max_len)] for d in train_data: buckets[len(d) - min_len - 1].append(d) buckets = [x for x in buckets if x != []] for epoch in xrange(options.epochs): print 'Starting epoch', epoch print 'best F-score before starting the epoch: ' + str( best_f_score) best_f_score = parser.Train( utils.get_batches(buckets, parser, True), epoch, best_f_score, options) print 'best F-score after finishing the epoch: ' + str( best_f_score) if options.input and options.output: with open(os.path.join(options.outdir, options.params), 'r') as paramsfp: words, lemmas, pos, roles, chars, stored_opt = pickle.load( paramsfp) stored_opt.external_embedding = options.external_embedding parser = SRLLSTM(words, lemmas, pos, roles, chars, stored_opt) parser.Load(os.path.join(options.outdir, options.model)) print 'loaded the model' ts = time.time() pred = list(parser.Predict(options.input))
else: print 'Preparing vocab' words, w2i, pos, rels = utils.vocab(options.conll_train) with open(os.path.join(options.output, options.params), 'w') as paramsfp: pickle.dump((words, w2i, pos, rels, options), paramsfp) print 'Finished collecting vocab' print 'Initializing blstm arc hybrid:' parser = ArcHybridLSTM(words, pos, rels, w2i, options) for i, (epoch, train) in enumerate(zip(options.epochs.split(','), options.conll_train.split(',')), 1): for iepoch in range(1, int(epoch)+1): print 'Starting epoch', iepoch parser.Train(train) devpath = os.path.join(options.output, 'dev_epoch_' + str(i) + '_' + str(iepoch) + '.conll') utils.write_conll(devpath, parser.Predict(options.conll_dev)) os.system('perl src/utils/eval.pl -g ' + options.conll_dev + ' -s ' + devpath + ' > ' + devpath + '.txt &') print 'Finished predicting dev' parser.Save(os.path.join(options.output, options.model + '_' + str(i) + '_' + str(iepoch))) else: with open(options.params, 'r') as paramsfp: words, w2i, pos, rels, stored_opt = pickle.load(paramsfp) stored_opt.external_embedding = options.external_embedding parser = ArcHybridLSTM(words, pos, rels, w2i, stored_opt) parser.Load(options.model) tespath = os.path.join(options.output, 'test_pred.conll') ts = time.time()