def parse_sents(inputs, grammar, trace=0): """ Convert input sentences into syntactic trees. :param inputs: sentences to be parsed :type inputs: list(str) :param grammar: ``FeatureGrammar`` or name of feature-based grammar :type grammar: nltk.grammar.FeatureGrammar :rtype: list(nltk.tree.Tree) or dict(list(str)): list(Tree) :return: a mapping from input sentences to a list of ``Tree``s """ # put imports here to avoid circult dependencies from nltk.grammar import FeatureGrammar from nltk.parse import FeatureChartParser, load_parser if isinstance(grammar, FeatureGrammar): cp = FeatureChartParser(grammar) else: cp = load_parser(grammar, trace=trace) parses = [] for sent in inputs: tokens = sent.split() # use a tokenizer? syntrees = list(cp.parse(tokens)) parses.append(syntrees) return parses
def parse_sents(inputs, grammar, trace=0): """ Convert input sentences into syntactic trees. :param inputs: sentences to be parsed :type inputs: list(str) :param grammar: ``FeatureGrammar`` or name of feature-based grammar :type grammar: nltk.grammar.FeatureGrammar :rtype: list(nltk.tree.Tree) or dict(list(str)): list(Tree) :return: a mapping from input sentences to a list of ``Tree`` instances. """ # put imports here to avoid circult dependencies from nltk.grammar import FeatureGrammar from nltk.parse import FeatureChartParser, load_parser if isinstance(grammar, FeatureGrammar): cp = FeatureChartParser(grammar) else: cp = load_parser(grammar, trace=trace) parses = [] for sent in inputs: tokens = sent.split() # use a tokenizer? syntrees = list(cp.parse(tokens)) parses.append(syntrees) return parses
gr = nltk.data.load(input_pcfg_filename, format='fcfg') # read fcfg grammar parser = FeatureChartParser(gr) # Initialize parserxx output_file = open(output_parses_filename, 'w') # Parse sentences with open(sentences_filename, 'r') as sent_file: line = sent_file.readline().strip('\n') while line: output_file.write(line + '\n') words = nltk.word_tokenize(line) try: parsed = parser.parse(words) tree = parsed.__iter__() out_tree = '' for tree in parsed: out_tree = str(tree.label()['SEM'].simplify()) # print(tree.label()['SEM'].simplify()) output_file.write(out_tree + '\n') # print(tree) break # only print one possible tree if not out_tree: # print('') output_file.write('\n') except ValueError: # print('') output_file.write('\n') line = sent_file.readline().strip('\n')