Beispiel #1
0
        with open(options.params, 'r') as paramsfp:
            words, w2i, pos, rels, stored_opt = pickle.load(paramsfp)

        stored_opt.external_embedding = options.external_embedding

        print 'Initializing lstm mstparser:'
        parser = mstlstm.MSTParserLSTM(words, pos, rels, w2i, stored_opt)

        parser.Load(options.model)
        conllu = (os.path.splitext(options.conll_test.lower())[1] == '.conllu')
        tespath = os.path.join(
            options.output,
            'test_pred.conll' if not conllu else 'test_pred.conllu')

        ts = time.time()
        test_res = list(parser.Predict(options.conll_test))
        te = time.time()
        print 'Finished predicting test.', te - ts, 'seconds.'
        utils.write_conll(tespath, test_res)

        if not conllu:
            os.system('perl conll/eval.pl -g ' + options.conll_test + ' -s ' +
                      tespath + ' > ' + tespath + '.txt')
        else:
            os.system(
                'python conll/evaluation_script/conll17_ud_eval.py -v -w conll/evaluation_script/weights.clas '
                + options.conll_test + ' ' + tespath + ' > ' + tespath +
                '.txt')
    else:
        print 'Preparing vocab'
        words, w2i, pos, rels = utils.vocab(options.conll_train)
Beispiel #2
0
        if index not in train_data:
            dev_data[index] = fulltrain_data[index]
            dev_id2arg2rel[index] = train_id2arg2rel[index]

    #parser = learner.jNeRE(words, nertags, postagCount, rels, w2i, c2i, options)
    parser = learner.jNeRE(words, nertags, rels, w2i, c2i, options)

    for epoch in xrange(options.epochs):
        print '\n-----------------\nStarting epoch', epoch + 1

        #parser.Train(train_data, train_id2nerBILOU, id2arg2rel, classweights)
        parser.Train(train_data, train_id2nerBILOU, id2arg2rel)

        label_pred = []
        label_correct = []
        predDev, relsDev = parser.Predict(dev_data)
        #pickle.dump((predDev, relsDev), open(options.output + "dev_ep" + str(epoch + 1), "wb"))

        for sentenceID in predDev:
            label_pred.append(predDev[sentenceID])
            label_correct.append(train_id2nerBILOU[sentenceID].strip().split())

        assert len(label_pred) == len(label_correct)

        f1 = compute_NER_f1_macro(label_pred, label_correct, 'O', "IOBES")
        f1_b = compute_NER_f1_macro(label_pred, label_correct, 'B', "IOBES")

        if f1_b > f1:
            logging.debug(
                "Setting wrong tags to B- improves from %.4f to %.4f" %
                (f1, f1_b))
Beispiel #3
0
        with open(options.conll_test, 'r') as conllFP:
            devData = list(utils.read_conll(conllFP, parser.c2i))

        conll_sentences = []
        for sentence in devData:
            conll_sentence = [
                entry for entry in sentence
                if isinstance(entry, utils.ConllEntry)
            ]
            conll_sentences.append(conll_sentence)

        tespath = os.path.join(options.output, options.conll_test_output)
        print('Predicting  parsing dependencies')
        ts = time.time()
        test_res = list(parser.Predict(conll_sentences, True))
        te = time.time()
        print('Finished in', te - ts, 'seconds.')
        utils.write_conll(tespath, test_res)

    else:

        ext_words_train = utils.ext_vocab(options.conll_train,
                                          options.external_embedding_voc)
        ext_words_dev = utils.ext_vocab(options.conll_dev,
                                        options.external_embedding_voc)

        print('Extracting vocabulary')
        words, w2i, c2i, pos, xpos, rels = utils.vocab(options.conll_train)

        with open(os.path.join(options.output, options.params),
Beispiel #4
0
        with open(options.params, 'r') as paramsfp:
            words, w2i, pos, rels, stored_opt = pickle.load(paramsfp)

        stored_opt.external_embedding = options.external_embedding

        print 'Initializing lstm mstparser:'
        parser = mstlstm.MSTParserLSTM(words, pos, rels, w2i, stored_opt)

        parser.Load(options.model)
        conllu = (os.path.splitext(options.conll_test.lower())[1] == '.conllu')
        tespath = os.path.join(
            options.output,
            'test_pred.conll' if not conllu else 'test_pred.conllu')

        ts = time.time()
        test_res = list(parser.Predict(options.conll_test, options.batch_size))
        te = time.time()
        print 'Finished predicting test.', te - ts, 'seconds.'
        utils.write_conll(tespath, test_res)

        if not conllu:
            os.system('perl src/utils/eval.pl -g ' + options.conll_test +
                      ' -s ' + tespath + ' > ' + tespath + '.txt')
        else:
            os.system(
                'python src/utils/evaluation_script/conll17_ud_eval.py -v -w src/utils/evaluation_script/weights.clas '
                + options.conll_test + ' ' + tespath + ' > ' + testpath +
                '.txt')
    else:
        print 'Preparing vocab'
        words, w2i, pos, rels = utils.vocab(options.conll_train)
Beispiel #5
0
        for d in train_data:
            buckets[len(d) - min_len - 1].append(d)
        buckets = [x for x in buckets if x != []]

        for epoch in xrange(options.epochs):
            print 'Starting epoch', epoch
            parser.Train(utils.get_batches(buckets, parser, True))
            if options.save_epoch:
                parser.Save(
                    os.path.join(options.outdir,
                                 options.model + str(epoch + 1)))
            if options.conll_dev != '':
                start = time.time()
                utils.write_conll(
                    os.path.join(options.outdir, options.model) +
                    str(epoch + 1) + '.txt', parser.Predict(options.conll_dev))
                os.system('perl src/utils/eval.pl -g ' + options.conll_dev +
                          ' -s ' +
                          os.path.join(options.outdir, options.model) +
                          str(epoch + 1) + '.txt' + ' > ' +
                          os.path.join(options.outdir, options.model) +
                          str(epoch + 1) + '.eval &')
                print 'Finished predicting dev; time:', time.time() - start
        parser.Save(os.path.join(options.outdir, options.model))

    if options.input and options.output:
        with open(options.outdir + '/' + options.params, 'r') as paramsfp:
            words, lemmas, pos, roles, chars, stored_opt = pickle.load(
                paramsfp)
        stored_opt.external_embedding = options.external_embedding
        parser = SRLLSTM(words, lemmas, pos, roles, chars, stored_opt)
        stored_opt.external_embedding = None
        print 'Loading pre-trained model'
        parser = learner.jPosDepLearner(words, pos, rels, w2i, c2i, caps,
                                        stored_opt)
        parser.Load(options.model)

        testoutpath = os.path.join(options.output, options.conll_test_output)
        print 'Predicting POS tags and parsing dependencies'
        #ts = time.time()
        #test_pred = list(parser.Predict(options.conll_test))
        #te = time.time()
        #print 'Finished in', te-ts, 'seconds.'
        #utils.write_conll(testoutpath, test_pred)

        with open(testoutpath, 'w') as fh:
            for sentence in parser.Predict(options.conll_test):
                print sentence
                for entry in sentence[1:]:
                    fh.write(str(entry) + '\n')
                fh.write('\n')

    else:
        print("Training file: " + options.conll_train)
        if options.conll_dev != "N/A":
            print("Development file: " + options.conll_dev)

        highestScore = 0.0
        eId = 0
        flag1 = 1
        if os.path.isfile(os.path.join(options.output, options.params)) and \
                os.path.isfile(os.path.join(options.output, os.path.basename(options.model))) and flag1==0 :
Beispiel #7
0
        print 'Initializing blstm arc hybrid:'
	if WITHCPOS:
	    print "Using features as well"
	    #print "ff", GENDER, NUMBER, PERSON	   
	    parser = ArcHybridLSTM(words, pos, cpos, GENDER, NUMBER, PERSON, CASE, rels, w2i, options)
	else:
	    parser = ArcHybridLSTM(words, pos, rels, w2i, options)

	deltas = []
        for epoch in xrange(options.epochs):
            print '\n================\nStarting epoch', epoch+1
            parser.Train(options.conll_train, epoch)
            #devpath = os.path.join(options.output, 'dev_epoch_' + str(epoch+1) + '.conll')
            devpath = os.path.join(options.output, 'dev_epoch_%03d.conll' % (epoch+1))
            utils.write_conll(devpath, parser.Predict(options.conll_dev))
	    # run evaluation
	    #command = 'perl src/utils/eval.pl -g ' + options.conll_dev + ' -s ' + devpath  + ' > ' + devpath + '.txt '
	    #print "executing: %s" % command
            #os.system(command)
	    # just show current LAS
  	    #ifp = open(devpath + '.txt')
	    #print "current LAS", ifp.readline()
            #ifp.close()

	    
#	    command = "~/bin/toolbin/conll/evaluation_script/conll17_ud_eval.py --weights ~/bin/toolbin/conll/evaluation_script/weights.clas " + options.conll_dev + "  " + devpath  + " > " + devpath + '.txt4'
#	    print "executing: %s" % command
#	    os.system(command)
#	    # just show current LAS
# 	    ifp = open(devpath + '.txt4')
Beispiel #8
0
        with open(os.path.join(options.output, options.params),
                  'rb') as paramsfp:
            words, w2i, c2i, m2i, t2i, morph_dict, pos, rels, stored_opt = pickle.load(
                paramsfp)
        stored_opt.external_embedding = None

        print('Loading pre-trained model')
        parser = learner.jPosDepLearner(words, pos, rels, w2i, c2i, m2i, t2i,
                                        morph_dict, stored_opt)

        parser.Load(os.path.join(options.output, options.model))

        testoutpath = os.path.join(options.output, options.conll_test_output)
        print('Predicting POS tags and parsing dependencies')
        with open(testoutpath, 'w') as fh:
            for sentence in parser.Predict(options.conll_test):
                for entry in sentence[1:]:
                    fh.write(str(entry) + '\n')
                fh.write('\n')

    else:
        print("Training file: " + options.conll_train)
        highestScore = 0.0
        eId = 0

        print('Extracting vocabulary')
        morph_dict = utils.get_morph_dict(options.segmentation_path,
                                          options.lowerCase)
        words, w2i, c2i, m2i, t2i, pos, rels = utils.vocab(
            options.conll_train, morph_dict)
Beispiel #9
0
    print 'Using external embedding:', options.external_embedding

    if options.predictFlag:
        with open(options.params, 'r') as paramsfp:
            words, w2i, c2i, pos, rels, stored_opt = pickle.load(paramsfp)

        stored_opt.external_embedding = options.external_embedding

        print 'Loading pre-trained joint model'
        parser = learner.jPosDepLearner(words, pos, rels, w2i, c2i, stored_opt)
        parser.Load(options.model)

        tespath = os.path.join(options.output, options.conll_test_output)
        print 'Predicting POS tags and parsing dependencies'
        ts = time.time()
        test_res = list(parser.Predict(options.conll_test))
        te = time.time()
        print 'Finished in', te - ts, 'seconds.'
        utils.write_conll(tespath, test_res)

        #conllu = (os.path.splitext(options.conll_test.lower())[1] == '.conllu')
        #if not conllu:#Scored with punctuation
        #    os.system('perl utils/eval07.pl -q -g ' + options.conll_test + ' -s ' + tespath  + ' > ' + tespath + '.scores.txt')
        #else:
        #    os.system('python utils/evaluation_script/conll17_ud_eval.py -v -w utils/evaluation_script/weights.clas ' + options.conll_test + ' ' + tespath + ' > ' + tespath + '.scores.txt')
    else:
        print 'Extracting vocabulary'
        words, w2i, c2i, pos, rels = utils.vocab(options.conll_train)

        with open(os.path.join(options.output, options.params),
                  'w') as paramsfp:
Beispiel #10
0
            #print 'Finished predicting dev'
        print "Total time:", total_time, "words/sec:", nwords / total_time, "sents/sec:", nsents / total_time
    else:
        with open(options.params, 'r') as paramsfp:
            words, w2i, pos, rels, stored_opt = pickle.load(paramsfp)

        stored_opt.external_embedding = options.external_embedding

        parser = ArcHybridLSTM(words, pos, rels, w2i, stored_opt)
        parser.Load(options.model)
        conllu = (os.path.splitext(options.conll_test.lower())[1] == '.conllu')
        tespath = os.path.join(
            options.output,
            'test_pred.conll' if not conllu else 'test_pred.conllu')
        ts = time.time()
        pred = list(parser.Predict(options.conll_test, options.batch_size))
        te = time.time()
        pred_time = te - ts
        nsents = len(pred)
        nwords = sum([len(s) for s in pred])
        print pred_time, "sents/sec:", nsents / pred_time, "words/sec:", nwords / pred_time
        print nsents, nwords
        utils.write_conll(tespath, pred)

        #if not conllu:
        #    os.system('perl src/utils/eval.pl -g ' + options.conll_test + ' -s ' + tespath  + ' > ' + tespath + '.txt')
        #else:
        #    os.system('python src/utils/evaluation_script/conll17_ud_eval.py -v -w src/utils/evaluation_script/weights.clas ' + options.conll_test + ' ' + tespath + ' > ' + testpath + '.txt')

        print 'Finished predicting test', te - ts
Beispiel #11
0
                epoch, best_acc, options)

        if options.conll_dev == None:
            parser.Save(os.path.join(options.outdir, options.model))

    if options.input and options.output:
        with open(os.path.join(options.outdir, options.params),
                  'r') as paramsfp:
            words, pWords, plemmas, pos, roles, chars, sense_mask, stored_opt = pickle.load(
                paramsfp)
        stored_opt.external_embedding = options.external_embedding
        parser = SRLLSTM(words, pWords, plemmas, pos, roles, chars, sense_mask,
                         stored_opt)
        parser.Load(os.path.join(options.outdir, options.model))
        ts = time.time()
        pred = list(parser.Predict(options.input, sen_cut, use_default))
        te = time.time()
        utils.write_conll(options.output, pred)
        print 'Finished predicting test', te - ts

    if options.inputdir and options.outputdir:
        with open(os.path.join(options.outdir, options.params),
                  'r') as paramsfp:
            words, pWords, plemmas, pos, roles, chars, sense_mask, stored_opt = pickle.load(
                paramsfp)
        stored_opt.external_embedding = options.external_embedding
        parser = SRLLSTM(words, pWords, plemmas, pos, roles, chars, sense_mask,
                         stored_opt)
        parser.Load(os.path.join(options.outdir, options.model))
        ts = time.time()
        for dir, subdir, files in os.walk(options.inputdir):
Beispiel #12
0
        max_len = max([len(d) for d in train_data])
        min_len = min([len(d) for d in train_data])
        buckets = [list() for i in range(min_len, max_len)]
        for d in train_data:
            buckets[len(d) - min_len - 1].append(d)
        buckets = [x for x in buckets if x != []]

        for epoch in xrange(options.epochs):
            print 'Starting epoch', epoch
            print 'best F-score before starting the epoch: ' + str(
                best_f_score)
            best_f_score = parser.Train(
                utils.get_batches(buckets, parser, True), epoch, best_f_score,
                options)
            print 'best F-score after finishing the epoch: ' + str(
                best_f_score)

    if options.input and options.output:
        with open(os.path.join(options.outdir, options.params),
                  'r') as paramsfp:
            words, lemmas, pos, roles, chars, stored_opt = pickle.load(
                paramsfp)
        stored_opt.external_embedding = options.external_embedding
        parser = SRLLSTM(words, lemmas, pos, roles, chars, stored_opt)
        parser.Load(os.path.join(options.outdir, options.model))
        print 'loaded the model'
        ts = time.time()
        pred = list(parser.Predict(options.input))
        te = time.time()
        utils.write_conll(options.output, pred)
        print 'Finished predicting test', te - ts
    print 'Using external embedding:', options.external_embedding

    if options.predictFlag:
        with open(os.path.join(options.outdir, options.params), 'r') as paramsfp:
            words, w2i, c2i, pos, rels, morphs, stored_opt = pickle.load(paramsfp)
            
        stored_opt.external_embedding = options.external_embedding
        stored_opt.pretrain_wembed = options.pretrain_wembed
        
        print 'Loading pre-trained joint model'
        parser = learner.jPosDepLearner(words, pos, rels, morphs, w2i, c2i, stored_opt)
        parser.Load(os.path.join(options.outdir, os.path.basename(options.model)))
        conllu = (os.path.splitext(options.conll_test.lower())[1] == '.conllu')
        tespath = os.path.join(options.outdir, stored_opt.model + 'test_pred.conllu')
        print 'Predicting POS tags and parsing dependencies'
        devPredSents = parser.Predict(options.conll_test)
        
        te = time.time()
        print 'Finished in', te-ts, 'seconds.'
        utils.write_conll(tespath, test_res)

        if not conllu:#Scored with punctuation
           os.system('perl utils/eval07.pl -q -g ' + options.conll_test + ' -s ' + tespath  + ' > ' + tespath + '.scores.txt')
        else:
           os.system('python utils/evaluation_script/conll17_ud_eval.py -v -w utils/evaluation_script/weights.clas ' + options.conll_gold + ' ' + tespath + ' > ' + tespath + '.scores.txt')
    else:
        if os.path.isfile(os.path.join(options.outdir, options.params)):
            print("Load existed vocabulary.")
            with open(os.path.join(options.outdir, options.params), 'r') as paramsfp:
                words, w2i, c2i, pos, rels, morphs, stored_opt = pickle.load(paramsfp)
        else:
Beispiel #14
0
            print 'Preparing vocab'
            words, w2i, pos, rels = utils.vocab(options.conll_train)

            with open(os.path.join(options.output, options.params), 'w') as paramsfp:
                pickle.dump((words, w2i, pos, rels, options), paramsfp)
            print 'Finished collecting vocab'

            print 'Initializing blstm arc hybrid:'
            parser = ArcHybridLSTM(words, pos, rels, w2i, options)

        for i, (epoch, train) in enumerate(zip(options.epochs.split(','), options.conll_train.split(',')), 1):
            for iepoch in range(1, int(epoch)+1):
                print 'Starting epoch', iepoch
                parser.Train(train)
                devpath = os.path.join(options.output, 'dev_epoch_' + str(i) + '_' + str(iepoch) + '.conll')
                utils.write_conll(devpath, parser.Predict(options.conll_dev))
                os.system('perl src/utils/eval.pl -g ' + options.conll_dev + ' -s ' + devpath  + ' > ' + devpath + '.txt &')
                print 'Finished predicting dev'
                parser.Save(os.path.join(options.output, options.model + '_' + str(i) + '_' + str(iepoch)))
    else:
        with open(options.params, 'r') as paramsfp:
            words, w2i, pos, rels, stored_opt = pickle.load(paramsfp)

        stored_opt.external_embedding = options.external_embedding

        parser = ArcHybridLSTM(words, pos, rels, w2i, stored_opt)
        parser.Load(options.model)
        tespath = os.path.join(options.output, 'test_pred.conll')
        ts = time.time()
        pred = parser.Predict(options.conll_test)
        te = time.time()