Exemple #1
0
def run(args):

    data = loader.load_treebanks(TREEBANK_PATH)
    train_data, dev_data, test_data = loader.train_test_split(
        data, 0.8, 0.1, 0.1)
    words, embeddings = loader.load_word_embeddings(EMBEDDING_PATH)

    pcfg = PCFG(train_data)
    pcfg.train(train_data)
    pcfg.set_oov(OOV, words, embeddings)

    if args.generate_output:
        output = pcfg.generate_output(test_data)

    if args.evaluation:
        accs, nb_no_parse = pcfg.predict(test_data[:2])

    if args.parse:
        corpus = []
        with open(args.txt_path, 'r') as f:
            corpus = f.read().split('\n')
        pcfg.parse_from_txt(corpus)
Exemple #2
0
from pcfg import PCFG
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("--corpus", help="training treebank corpus", type=str)
parser.add_argument("--sentences", help="raw token sentences", type=str)
parser.add_argument("--outfile", help="name of the output file", type=str)
args = parser.parse_args()

grammar = PCFG(args.corpus)
grammar.parse_corpus()
grammar.predict(args.sentences, args.outfile)