Example #1
0
                '.txt')
    else:
        print 'Preparing vocab'
        words, w2i, pos, rels = utils.vocab(options.conll_train)

        with open(os.path.join(options.output, options.params),
                  'w') as paramsfp:
            pickle.dump((words, w2i, pos, rels, options), paramsfp)
        print 'Finished collecting vocab'

        print 'Initializing lstm mstparser:'
        parser = mstlstm.MSTParserLSTM(words, pos, rels, w2i, options)

        for epoch in xrange(options.epochs):
            print 'Starting epoch', epoch
            parser.Train(options.conll_train)
            conllu = (os.path.splitext(
                options.conll_dev.lower())[1] == '.conllu')
            devpath = os.path.join(
                options.output, 'dev_epoch_' + str(epoch + 1) +
                ('.conll' if not conllu else '.conllu'))
            utils.write_conll(devpath, parser.Predict(options.conll_dev))
            parser.Save(
                os.path.join(options.output,
                             os.path.basename(options.model) + str(epoch + 1)))

            if not conllu:
                os.system('perl conll/eval.pl -g ' + options.conll_dev +
                          ' -s ' + devpath + ' > ' + devpath + '.txt')
            else:
                os.system(
Example #2
0
    dev_data = {}
    dev_id2arg2rel = {}
    for index in fulltrain_data:
        if index not in train_data:
            dev_data[index] = fulltrain_data[index]
            dev_id2arg2rel[index] = train_id2arg2rel[index]

    #parser = learner.jNeRE(words, nertags, postagCount, rels, w2i, c2i, options)
    parser = learner.jNeRE(words, nertags, rels, w2i, c2i, options)

    for epoch in xrange(options.epochs):
        print '\n-----------------\nStarting epoch', epoch + 1

        #parser.Train(train_data, train_id2nerBILOU, id2arg2rel, classweights)
        parser.Train(train_data, train_id2nerBILOU, id2arg2rel)

        label_pred = []
        label_correct = []
        predDev, relsDev = parser.Predict(dev_data)
        #pickle.dump((predDev, relsDev), open(options.output + "dev_ep" + str(epoch + 1), "wb"))

        for sentenceID in predDev:
            label_pred.append(predDev[sentenceID])
            label_correct.append(train_id2nerBILOU[sentenceID].strip().split())

        assert len(label_pred) == len(label_correct)

        f1 = compute_NER_f1_macro(label_pred, label_correct, 'O', "IOBES")
        f1_b = compute_NER_f1_macro(label_pred, label_correct, 'B', "IOBES")
Example #3
0
        sentences, train_batches = utils.batch_data(options.conll_train, c2i,
                                                    options.batch_tokens)
        batches = len(train_batches)
        highestScore = options.highest_score
        tsId = 0
        for epoch in range(options.last_epoch, options.epochs):

            print("Starting epoch ", epoch + 1)
            random.shuffle(train_batches)

            for idx, mini_batch in enumerate(train_batches):

                t_step = (epoch * batches) + idx + 1
                if (t_step % 5000 == 0):
                    parser.Train(sentences, mini_batch, t_step, True)
                else:
                    parser.Train(sentences, mini_batch, t_step)

                if (t_step <= 5000):
                    if (t_step % 100 == 0):
                        print("Save Model...")
                        parser.Save(
                            os.path.join(options.output,
                                         os.path.basename(options.model)))

                else:
                    print("Performance on Dev data")
                    start = time.time()
                    devPredSents = parser.Predict(conll_sentences, False)
                    count = 0
Example #4
0
    else:
        print 'Preparing vocab'
        words, w2i, pos, rels = utils.vocab(options.conll_train)

        with open(os.path.join(options.output, options.params),
                  'w') as paramsfp:
            pickle.dump((words, w2i, pos, rels, options), paramsfp)
        print 'Finished collecting vocab'

        print 'Initializing lstm mstparser:'
        parser = mstlstm.MSTParserLSTM(words, pos, rels, w2i, options)

        for epoch in xrange(options.epochs):
            print 'Starting epoch', epoch
            _start = time.time()
            parser.Train(options.conll_train, options.batch_size)
            print "time:", time.time() - _start
            conllu = (os.path.splitext(
                options.conll_dev.lower())[1] == '.conllu')
            devpath = os.path.join(
                options.output, 'dev_epoch_' + str(epoch + 1) +
                ('.conll' if not conllu else '.conllu'))
            utils.write_conll(
                devpath, parser.Predict(options.conll_dev, options.batch_size))
            parser.Save(
                os.path.join(options.output,
                             os.path.basename(options.model) + str(epoch + 1)))

            if not conllu:
                os.system('perl src/utils/eval.pl -g ' + options.conll_dev +
                          ' -s ' + devpath + ' > ' + devpath + '.txt')
Example #5
0
            pickle.dump((words, lemmas, pos, roles, chars, options), paramsfp)
        print 'Finished collecting vocab'

        print 'Initializing blstm srl:'
        parser = SRLLSTM(words, lemmas, pos, roles, chars, options)

        max_len = max([len(d) for d in train_data])
        min_len = min([len(d) for d in train_data])
        buckets = [list() for i in range(min_len, max_len)]
        for d in train_data:
            buckets[len(d) - min_len - 1].append(d)
        buckets = [x for x in buckets if x != []]

        for epoch in xrange(options.epochs):
            print 'Starting epoch', epoch
            parser.Train(utils.get_batches(buckets, parser, True))
            if options.save_epoch:
                parser.Save(
                    os.path.join(options.outdir,
                                 options.model + str(epoch + 1)))
            if options.conll_dev != '':
                start = time.time()
                utils.write_conll(
                    os.path.join(options.outdir, options.model) +
                    str(epoch + 1) + '.txt', parser.Predict(options.conll_dev))
                os.system('perl src/utils/eval.pl -g ' + options.conll_dev +
                          ' -s ' +
                          os.path.join(options.outdir, options.model) +
                          str(epoch + 1) + '.txt' + ' > ' +
                          os.path.join(options.outdir, options.model) +
                          str(epoch + 1) + '.eval &')
Example #6
0
                pickle.dump((words, w2i, pos, rels, options), paramsfp)

        print 'Finished collecting vocab'

        print 'Initializing blstm arc hybrid:'
	if WITHCPOS:
	    print "Using features as well"
	    #print "ff", GENDER, NUMBER, PERSON	   
	    parser = ArcHybridLSTM(words, pos, cpos, GENDER, NUMBER, PERSON, CASE, rels, w2i, options)
	else:
	    parser = ArcHybridLSTM(words, pos, rels, w2i, options)

	deltas = []
        for epoch in xrange(options.epochs):
            print '\n================\nStarting epoch', epoch+1
            parser.Train(options.conll_train, epoch)
            #devpath = os.path.join(options.output, 'dev_epoch_' + str(epoch+1) + '.conll')
            devpath = os.path.join(options.output, 'dev_epoch_%03d.conll' % (epoch+1))
            utils.write_conll(devpath, parser.Predict(options.conll_dev))
	    # run evaluation
	    #command = 'perl src/utils/eval.pl -g ' + options.conll_dev + ' -s ' + devpath  + ' > ' + devpath + '.txt '
	    #print "executing: %s" % command
            #os.system(command)
	    # just show current LAS
  	    #ifp = open(devpath + '.txt')
	    #print "current LAS", ifp.readline()
            #ifp.close()

	    
#	    command = "~/bin/toolbin/conll/evaluation_script/conll17_ud_eval.py --weights ~/bin/toolbin/conll/evaluation_script/weights.clas " + options.conll_dev + "  " + devpath  + " > " + devpath + '.txt4'
#	    print "executing: %s" % command
            #print 'Initializing joint model'
            parser = learner.jPosDepLearner(words, pos, rels, w2i, c2i, caps,
                                            options)

        for epoch in xrange(options.epochs):
            print '\n-----------------\nStarting epoch', epoch + 1

            if epoch % 10 == 0:
                if epoch == 0:
                    parser.trainer.restart(learning_rate=0.001)
                elif epoch == 10:
                    parser.trainer.restart(learning_rate=0.0005)
                else:
                    parser.trainer.restart(learning_rate=0.00025)
            parser.Train(options.conll_train, dep_epoch=1)
            parser.Train(options.conll_trainner, ner_epoch=1)

            if options.conll_dev == "N/A":
                parser.Save(
                    os.path.join(options.output,
                                 os.path.basename(options.model)))

            else:
                devPredSents = parser.Predict(options.conll_dev, ner_epoch=0)
                devNerPredSents = parser.Predict(options.conll_devner)
                out = open(os.path.join(options.output, options.predout), "w")
                count = 0
                lasCount = 0
                uasCount = 0
                posCount = 0
Example #8
0
        print 'Initializing blstm srl:'
        parser = SRLLSTM(words, pWords, plemmas, pos, roles, chars, sense_mask,
                         options)
        best_acc = 0.0

        max_len = max([len(d) for d in train_data])
        min_len = min([len(d) for d in train_data])
        buckets = [list() for i in range(min_len, max_len)]
        for d in train_data:
            buckets[len(d) - min_len - 1].append(d)
        buckets = [x for x in buckets if x != []]

        for epoch in xrange(options.epochs):
            print 'Starting epoch', epoch
            best_acc = parser.Train(
                utils.get_batches(buckets, parser, True, options.sen_cut),
                epoch, best_acc, options)

        if options.conll_dev == None:
            parser.Save(os.path.join(options.outdir, options.model))

    if options.input and options.output:
        with open(os.path.join(options.outdir, options.params),
                  'r') as paramsfp:
            words, pWords, plemmas, pos, roles, chars, sense_mask, stored_opt = pickle.load(
                paramsfp)
        stored_opt.external_embedding = options.external_embedding
        parser = SRLLSTM(words, pWords, plemmas, pos, roles, chars, sense_mask,
                         stored_opt)
        parser.Load(os.path.join(options.outdir, options.model))
        ts = time.time()
Example #9
0
        parser = SRLLSTM(words, lemmas, pos, roles, chars, options)
        best_f_score = 0.0

        max_len = max([len(d) for d in train_data])
        min_len = min([len(d) for d in train_data])
        buckets = [list() for i in range(min_len, max_len)]
        for d in train_data:
            buckets[len(d) - min_len - 1].append(d)
        buckets = [x for x in buckets if x != []]

        for epoch in xrange(options.epochs):
            print 'Starting epoch', epoch
            print 'best F-score before starting the epoch: ' + str(
                best_f_score)
            best_f_score = parser.Train(
                utils.get_batches(buckets, parser, True), epoch, best_f_score,
                options)
            print 'best F-score after finishing the epoch: ' + str(
                best_f_score)

    if options.input and options.output:
        with open(os.path.join(options.outdir, options.params),
                  'r') as paramsfp:
            words, lemmas, pos, roles, chars, stored_opt = pickle.load(
                paramsfp)
        stored_opt.external_embedding = options.external_embedding
        parser = SRLLSTM(words, lemmas, pos, roles, chars, stored_opt)
        parser.Load(os.path.join(options.outdir, options.model))
        print 'loaded the model'
        ts = time.time()
        pred = list(parser.Predict(options.input))
Example #10
0
        else:
            print 'Preparing vocab'
            words, w2i, pos, rels = utils.vocab(options.conll_train)

            with open(os.path.join(options.output, options.params), 'w') as paramsfp:
                pickle.dump((words, w2i, pos, rels, options), paramsfp)
            print 'Finished collecting vocab'

            print 'Initializing blstm arc hybrid:'
            parser = ArcHybridLSTM(words, pos, rels, w2i, options)

        for i, (epoch, train) in enumerate(zip(options.epochs.split(','), options.conll_train.split(',')), 1):
            for iepoch in range(1, int(epoch)+1):
                print 'Starting epoch', iepoch
                parser.Train(train)
                devpath = os.path.join(options.output, 'dev_epoch_' + str(i) + '_' + str(iepoch) + '.conll')
                utils.write_conll(devpath, parser.Predict(options.conll_dev))
                os.system('perl src/utils/eval.pl -g ' + options.conll_dev + ' -s ' + devpath  + ' > ' + devpath + '.txt &')
                print 'Finished predicting dev'
                parser.Save(os.path.join(options.output, options.model + '_' + str(i) + '_' + str(iepoch)))
    else:
        with open(options.params, 'r') as paramsfp:
            words, w2i, pos, rels, stored_opt = pickle.load(paramsfp)

        stored_opt.external_embedding = options.external_embedding

        parser = ArcHybridLSTM(words, pos, rels, w2i, stored_opt)
        parser.Load(options.model)
        tespath = os.path.join(options.output, 'test_pred.conll')
        ts = time.time()