def multi_f(sentence): grammar_file = sys.argv[1] pcfg = PCFG() pcfg.load_model(grammar_file) parser = Parser(pcfg) tree = parser.parse(sentence) print(dumps(tree))
def build_model(): pcfg = PCFG() if exists(MODEL): pcfg.load_model(MODEL) else: print "Building the Grammar Model" start = time() if not exists(TEMP_DIR): makedirs(TEMP_DIR) # Normalise the treebanks if not exists(QUESTIONBANK_NORM): normalize_questionbank(QUESTIONBANK_DATA, QUESTIONBANK_PENN_DATA) gen_norm(QUESTIONBANK_NORM, [QUESTIONBANK_PENN_DATA]) if not exists(PENNTREEBANK_NORM): gen_norm(PENNTREEBANK_NORM, glob(PENNTREEBANK_GLOB)) # Keep a part of the treebanks for testing i = 0 with open(MODEL_TREEBANK, 'w') as model, open(TEST_DAT, 'w') as dat, open(TEST_KEY, 'w') as key: for treebank in [QUESTIONBANK_NORM, PENNTREEBANK_NORM]: for tree in open(treebank): i += 1 if (i % 100) == 0: sentence, n = get_sentence(loads(tree)) if n > 7 and n < 20: dat.write(sentence+'\n') key.write(tree) else: i -= 1 model.write(tree) # Learn PCFG pcfg.learn_from_treebanks([MODEL_TREEBANK]) pcfg.save_model(MODEL) print "Time: (%.2f)s\n" % (time() - start) return pcfg
def multi_f(sentence): grammar_file = sys.argv[1] pcfg = PCFG() pcfg.load_model(grammar_file) parser = Parser(pcfg) return parser.parse(sentence)
def multi_f(sentence): grammar_file = sys.argv[1] pcfg = PCFG() pcfg.load_model(grammar_file) parser = Parser(pcfg) return parser.parse(sentence) if __name__ == "__main__": if len(sys.argv) != 2: print("usage: python3 parser.py GRAMMAR") exit() start = time() grammar_file = sys.argv[1] print("Loading grammar from " + grammar_file + " ...", file=stderr) pcfg = PCFG() pcfg.load_model(grammar_file) parser = Parser(pcfg) print("Parsing sentences ...", file=stderr) with Pool(processes = os.cpu_count()) as pool: trees = pool.map(multi_f, stdin.readlines()) for t in trees: print(dumps(t)) print("Time: (%.2f)s\n" % (time() - start), file=stderr)