def batch_parse(inputs, grammar, trace=0): """ Convert input sentences into syntactic trees. :param inputs: sentences to be parsed :type inputs: list of str :param grammar: L{FeatureGrammar} or name of feature-based grammar :rtype: dict :return: a mapping from input sentences to a list of L{Tree}s """ # put imports here to avoid circult dependencies from nltk.grammar import FeatureGrammar from nltk.parse import FeatureChartParser, load_parser if isinstance(grammar, FeatureGrammar): cp = FeatureChartParser(grammar) else: cp = load_parser(grammar, trace=trace) parses = [] for sent in inputs: tokens = sent.split() # use a tokenizer? syntrees = cp.nbest_parse(tokens) parses.append(syntrees) return parses
def batch_parse(inputs, grammar, trace=0): """ Convert input sentences into syntactic trees. :param inputs: sentences to be parsed :type inputs: list of str :param grammar: ``FeatureGrammar`` or name of feature-based grammar :rtype: dict :return: a mapping from input sentences to a list of ``Tree``s """ # put imports here to avoid circult dependencies from nltk.grammar import FeatureGrammar from nltk.parse import FeatureChartParser, load_parser if isinstance(grammar, FeatureGrammar): cp = FeatureChartParser(grammar) else: cp = load_parser(grammar, trace=trace) parses = [] for sent in inputs: tokens = sent.split() # use a tokenizer? syntrees = cp.nbest_parse(tokens) parses.append(syntrees) return parses
def parse_sents(inputs, grammar, trace=0): """ Convert input sentences into syntactic trees. :param inputs: sentences to be parsed :type inputs: list(str) :param grammar: ``FeatureGrammar`` or name of feature-based grammar :type grammar: nltk.grammar.FeatureGrammar :rtype: list(nltk.tree.Tree) or dict(list(str)): list(Tree) :return: a mapping from input sentences to a list of ``Tree``s """ # put imports here to avoid circult dependencies from nltk.grammar import FeatureGrammar from nltk.parse import FeatureChartParser, load_parser if isinstance(grammar, FeatureGrammar): cp = FeatureChartParser(grammar) else: cp = load_parser(grammar, trace=trace) parses = [] for sent in inputs: tokens = sent.split() # use a tokenizer? syntrees = list(cp.parse(tokens)) parses.append(syntrees) return parses
def parse_sents(inputs, grammar, trace=0): """ Convert input sentences into syntactic trees. :param inputs: sentences to be parsed :type inputs: list(str) :param grammar: ``FeatureGrammar`` or name of feature-based grammar :type grammar: nltk.grammar.FeatureGrammar :rtype: list(nltk.tree.Tree) or dict(list(str)): list(Tree) :return: a mapping from input sentences to a list of ``Tree`` instances. """ # put imports here to avoid circult dependencies from nltk.grammar import FeatureGrammar from nltk.parse import FeatureChartParser, load_parser if isinstance(grammar, FeatureGrammar): cp = FeatureChartParser(grammar) else: cp = load_parser(grammar, trace=trace) parses = [] for sent in inputs: tokens = sent.split() # use a tokenizer? syntrees = list(cp.parse(tokens)) parses.append(syntrees) return parses
use_local_file = False if use_local_file: if 'hw6' in os.listdir(): os.chdir('hw6') input_pcfg_filename = 'grammar.fcfg' sentences_filename = 'sentences.txt' output_parses_filename = "hw6_output.txt" else: input_pcfg_filename = sys.argv[1] sentences_filename = sys.argv[2] output_parses_filename = sys.argv[3] gr = nltk.data.load(input_pcfg_filename, format='fcfg') # read fcfg grammar parser = FeatureChartParser(gr) # Initialize parserxx output_file = open(output_parses_filename, 'w') # Parse sentences with open(sentences_filename, 'r') as sent_file: line = sent_file.readline().strip('\n') while line: output_file.write(line + '\n') words = nltk.word_tokenize(line) try: parsed = parser.parse(words) tree = parsed.__iter__() out_tree = '' for tree in parsed: out_tree = str(tree.label()['SEM'].simplify())