def main(args): grammar = nltk.data.load(args.input_PCFG_file) # print(grammar._rhs_index) tot_sentences = 0 tot_parses = 0 with open(args.test_sentence_filename) as f: for line in f: line = line.strip() # print(line) token_list = nltk.word_tokenize(line) if args.improved: table = cky_build_table_improved(token_list, grammar) else: table = cky_build_table(token_list, grammar) # print_table(table) try: if args.improved: parses = [parse_table_improved(grammar, table)] else: parses = parse_table_orig(grammar, table) max_parse = None max_prob = -1 for p in parses: if p.prob() > max_prob: max_prob = p.prob() max_parse = p # print(str(max_parse).replace("\n", " ")) # for best parse and probability print(Tree.__str__(max_parse).replace( "\n", " ")) # for best parse only # print(f"Number of possible parses: {len(parses)}", "\n") tot_parses += len(parses) except NoParsesException: print("") # Print blank tot_sentences += 1
def fix_multi_sent(line): tree = Tree.fromstring(line) if len(tree) > 1: newtree = Tree('S1', [Tree('S', tree[:])]) else: newtree = tree return re.sub('\n\s*', ' ', newtree.__str__())