Exemple #1
0
def multi_f(sentence):
    grammar_file = sys.argv[1]
    pcfg = PCFG()
    pcfg.load_model(grammar_file)
    parser = Parser(pcfg)
    tree = parser.parse(sentence)
    print(dumps(tree))
Exemple #2
0
def build_model():
    pcfg = PCFG()
    if exists(MODEL):
        pcfg.load_model(MODEL)
    
    else:
        print "Building the Grammar Model"
        start = time()
        
        if not exists(TEMP_DIR):
            makedirs(TEMP_DIR)
        
        # Normalise the treebanks
        if not exists(QUESTIONBANK_NORM):
            normalize_questionbank(QUESTIONBANK_DATA, QUESTIONBANK_PENN_DATA)
            gen_norm(QUESTIONBANK_NORM, [QUESTIONBANK_PENN_DATA])
        
        if not exists(PENNTREEBANK_NORM):
            gen_norm(PENNTREEBANK_NORM, glob(PENNTREEBANK_GLOB))
        
        # Keep a part of the treebanks for testing
        i = 0
        with open(MODEL_TREEBANK, 'w') as model, open(TEST_DAT, 'w') as dat, open(TEST_KEY, 'w') as key:
            for treebank in [QUESTIONBANK_NORM, PENNTREEBANK_NORM]:
                for tree in open(treebank):
                    i += 1
                    if (i % 100) == 0:
                        sentence, n = get_sentence(loads(tree))
                        if n > 7 and n < 20:
                            dat.write(sentence+'\n')
                            key.write(tree)
                        else:
                            i -= 1
                    
                    model.write(tree)
        
        # Learn PCFG
        pcfg.learn_from_treebanks([MODEL_TREEBANK])
        pcfg.save_model(MODEL)
        print "Time: (%.2f)s\n" % (time() - start)
    
    return pcfg
Exemple #3
0
def multi_f(sentence):
    grammar_file = sys.argv[1]
    pcfg = PCFG()
    pcfg.load_model(grammar_file)
    parser = Parser(pcfg)
    return parser.parse(sentence)
Exemple #4
0
def multi_f(sentence):
    grammar_file = sys.argv[1]
    pcfg = PCFG()
    pcfg.load_model(grammar_file)
    parser = Parser(pcfg)
    return parser.parse(sentence)

if __name__ == "__main__":

    if len(sys.argv) != 2:
        print("usage: python3 parser.py GRAMMAR")
        exit()

    start = time()
    grammar_file = sys.argv[1]
    print("Loading grammar from " + grammar_file + " ...", file=stderr)    
    pcfg = PCFG()
    pcfg.load_model(grammar_file)
    parser = Parser(pcfg)

    print("Parsing sentences ...", file=stderr)

    with Pool(processes = os.cpu_count()) as pool:
        trees = pool.map(multi_f, stdin.readlines())

    for t in trees:
        print(dumps(t))

    print("Time: (%.2f)s\n" % (time() - start), file=stderr)