Example #1
0
def parse_sents(inputs, grammar, trace=0):
    """
    Convert input sentences into syntactic trees.

    :param inputs: sentences to be parsed
    :type inputs: list(str)
    :param grammar: ``FeatureGrammar`` or name of feature-based grammar
    :type grammar: nltk.grammar.FeatureGrammar
    :rtype: list(nltk.tree.Tree) or dict(list(str)): list(Tree)
    :return: a mapping from input sentences to a list of ``Tree``s
    """
    # put imports here to avoid circult dependencies
    from nltk.grammar import FeatureGrammar
    from nltk.parse import FeatureChartParser, load_parser

    if isinstance(grammar, FeatureGrammar):
        cp = FeatureChartParser(grammar)
    else:
        cp = load_parser(grammar, trace=trace)
    parses = []
    for sent in inputs:
        tokens = sent.split()  # use a tokenizer?
        syntrees = list(cp.parse(tokens))
        parses.append(syntrees)
    return parses
Example #2
0
def parse_sents(inputs, grammar, trace=0):
    """
    Convert input sentences into syntactic trees.

    :param inputs: sentences to be parsed
    :type inputs: list(str)
    :param grammar: ``FeatureGrammar`` or name of feature-based grammar
    :type grammar: nltk.grammar.FeatureGrammar
    :rtype: list(nltk.tree.Tree) or dict(list(str)): list(Tree)
    :return: a mapping from input sentences to a list of ``Tree`` instances.
    """
    # put imports here to avoid circult dependencies
    from nltk.grammar import FeatureGrammar
    from nltk.parse import FeatureChartParser, load_parser

    if isinstance(grammar, FeatureGrammar):
        cp = FeatureChartParser(grammar)
    else:
        cp = load_parser(grammar, trace=trace)
    parses = []
    for sent in inputs:
        tokens = sent.split()  # use a tokenizer?
        syntrees = list(cp.parse(tokens))
        parses.append(syntrees)
    return parses
Example #3
0
    gr = nltk.data.load(input_pcfg_filename,
                        format='fcfg')  # read fcfg grammar

    parser = FeatureChartParser(gr)  # Initialize parserxx

    output_file = open(output_parses_filename, 'w')

    # Parse sentences
    with open(sentences_filename, 'r') as sent_file:
        line = sent_file.readline().strip('\n')
        while line:
            output_file.write(line + '\n')
            words = nltk.word_tokenize(line)
            try:
                parsed = parser.parse(words)
                tree = parsed.__iter__()
                out_tree = ''
                for tree in parsed:
                    out_tree = str(tree.label()['SEM'].simplify())
                    # print(tree.label()['SEM'].simplify())
                    output_file.write(out_tree + '\n')
                    # print(tree)
                    break  # only print one possible tree
                if not out_tree:
                    # print('')
                    output_file.write('\n')
            except ValueError:
                # print('')
                output_file.write('\n')
            line = sent_file.readline().strip('\n')