parser.add_option("--dynet-mem", type="int", dest="mem", default=0) (options, args) = parser.parse_args() print 'Using external embedding:', options.external_embedding if options.predictFlag: with open(options.params, 'r') as paramsfp: words, w2i, pos, rels, stored_opt = pickle.load(paramsfp) stored_opt.external_embedding = options.external_embedding print 'Initializing lstm mstparser:' parser = mstlstm.MSTParserLSTM(words, pos, rels, w2i, stored_opt) parser.Load(options.model) conllu = (os.path.splitext(options.conll_test.lower())[1] == '.conllu') tespath = os.path.join( options.output, 'test_pred.conll' if not conllu else 'test_pred.conllu') ts = time.time() test_res = list(parser.Predict(options.conll_test)) te = time.time() print 'Finished predicting test.', te - ts, 'seconds.' utils.write_conll(tespath, test_res) if not conllu: os.system('perl conll/eval.pl -g ' + options.conll_test + ' -s ' + tespath + ' > ' + tespath + '.txt') else:
if options.predictFlag: with open(os.path.join(options.output, options.params), 'rb') as paramsfp: words, w2i, c2i, pos, xpos, rels, stored_opt = pickle.load( paramsfp) ext_words_train = utils.ext_vocab(stored_opt.conll_train, stored_opt.external_embedding_voc) ext_words_test = utils.ext_vocab(options.conll_test, stored_opt.external_embedding_voc) print('Loading pre-trained model') parser = learner.parser(words, pos, xpos, rels, w2i, c2i, ext_words_train, ext_words_test, stored_opt) parser.Load( os.path.join(options.output, os.path.basename(options.model))) parser.pred_batch_size = options.pred_batch_size with open(options.conll_test, 'r') as conllFP: devData = list(utils.read_conll(conllFP, parser.c2i)) conll_sentences = [] for sentence in devData: conll_sentence = [ entry for entry in sentence if isinstance(entry, utils.ConllEntry) ] conll_sentences.append(conll_sentence) tespath = os.path.join(options.output, options.conll_test_output) print('Predicting parsing dependencies')
#print 'Using external embedding:', options.external_embedding pretrained_flag = False if options.predictFlag: print("PREDICT...") with open(os.path.join(options.output, options.params), 'rb') as paramsfp: words, w2i, c2i, m2i, t2i, morph_dict, pos, rels, stored_opt = pickle.load( paramsfp) stored_opt.external_embedding = None print('Loading pre-trained model') parser = learner.jPosDepLearner(words, pos, rels, w2i, c2i, m2i, t2i, morph_dict, stored_opt) parser.Load(os.path.join(options.output, options.model)) testoutpath = os.path.join(options.output, options.conll_test_output) print('Predicting POS tags and parsing dependencies') with open(testoutpath, 'w') as fh: for sentence in parser.Predict(options.conll_test): for entry in sentence[1:]: fh.write(str(entry) + '\n') fh.write('\n') else: print("Training file: " + options.conll_train) highestScore = 0.0 eId = 0 print('Extracting vocabulary')
best_acc = parser.Train( utils.get_batches(buckets, parser, True, options.sen_cut), epoch, best_acc, options) if options.conll_dev == None: parser.Save(os.path.join(options.outdir, options.model)) if options.input and options.output: with open(os.path.join(options.outdir, options.params), 'r') as paramsfp: words, pWords, plemmas, pos, roles, chars, sense_mask, stored_opt = pickle.load( paramsfp) stored_opt.external_embedding = options.external_embedding parser = SRLLSTM(words, pWords, plemmas, pos, roles, chars, sense_mask, stored_opt) parser.Load(os.path.join(options.outdir, options.model)) ts = time.time() pred = list(parser.Predict(options.input, sen_cut, use_default)) te = time.time() utils.write_conll(options.output, pred) print 'Finished predicting test', te - ts if options.inputdir and options.outputdir: with open(os.path.join(options.outdir, options.params), 'r') as paramsfp: words, pWords, plemmas, pos, roles, chars, sense_mask, stored_opt = pickle.load( paramsfp) stored_opt.external_embedding = options.external_embedding parser = SRLLSTM(words, pWords, plemmas, pos, roles, chars, sense_mask, stored_opt) parser.Load(os.path.join(options.outdir, options.model))
print '预测发展集结束' # logger.info('预测发展集结束') parser.Save(options.model + str(epoch + 1)) # 测试过程,在测试集上 else: with open(options.params, 'r') as paramsfp: words, w2i, pos, rels, stored_opt = pickle.load(paramsfp) stored_opt.external_embedding = options.external_embedding parser = ArcHybridLSTM(words, pos, rels, w2i, stored_opt) for epoch in xrange(options.epochs): print '开始 第', epoch, '轮' # 从已经训练好的模型中载入模型 parser.Load(options.model + str(epoch + 1)) print '模型:' + options.model + str(epoch + 1) # 根据文件名后缀判断是否是conllu文件 conllu = (os.path.splitext( options.conll_test.lower())[1] == '.conllu') tespath = os.path.join( options.output, 'test_pred' + str(epoch + 1) + '.conll' if not conllu else 'test_pred' + str(epoch + 1) + '.conllu') print '测试结果路径:', tespath # logger.info('测试结果路径:%s',tespath) ts = time.time() pred = list(parser.Predict(options.conll_test)) te = time.time() utils.write_conll(tespath, pred)
(options, args) = parser.parse_args() print 'Using external embedding:', options.external_embedding if not options.predictFlag: if not (options.rlFlag or options.rlMostFlag or options.headFlag): print 'You must use either --userlmost or --userl or --usehead (you can use multiple)' sys.exit() if options.load_model != "": with open(options.load_params, 'r') as paramsfp: words, w2i, pos, rels, stored_opt = pickle.load(paramsfp) stored_opt.external_embedding = options.external_embedding parser = ArcHybridLSTM(words, pos, rels, w2i, stored_opt) parser.Load(options.load_model) else: print 'Preparing vocab' words, w2i, pos, rels = utils.vocab(options.conll_train) with open(os.path.join(options.output, options.params), 'w') as paramsfp: pickle.dump((words, w2i, pos, rels, options), paramsfp) print 'Finished collecting vocab' print 'Initializing blstm arc hybrid:' parser = ArcHybridLSTM(words, pos, rels, w2i, options) for i, (epoch, train) in enumerate(zip(options.epochs.split(','), options.conll_train.split(',')), 1): for iepoch in range(1, int(epoch)+1): print 'Starting epoch', iepoch