Python Nonterminal Examples, nltk.Nonterminal Python Examples

Example #1

0

Show file

    def count_rules(self, tree):
        """
        For each level in the given tree, increment the corresponding count in the dictionary
        :param tree: the given tree
        :return: void
        """
        lhs = tree.label()

        if self.root is '':
            self.root = lhs

        # if this tree's children are subtrees, count the rules in each of those too
        if len(tree) > 1:
            rhs = Nonterminal(tree[0].label() + " " + tree[1].label())
            self.count_rules(tree[0])
            self.count_rules(tree[1])
        # if this tree has one child, and it's a subtree, count the rules in there too
        elif isinstance(tree[0], Tree):
            rhs = Nonterminal(tree[0].label())
            self.count_rules(tree[0])
        # if this tree's child is a leaf, no recursion is necessary
        else:
            rhs = tree[0]

        self.grammar_counts.setdefault(lhs, {}).setdefault(rhs, 0)
        self.grammar_counts[lhs][rhs] += 1

Example #2

0

Show file

File: Chunker_w_parse.py Project: misakss/Portfolio

def chunker(parsedData):
    """
    Extract the grammar rules from the input parsed text and assign
    each rule with the probability of it occuring in the parsed text.
    """
    tags_words = treebank.tagged_words()

    # This is the list where all the rules will be stored, for
    # construction of the PCFG
    rules = []
    NP = Nonterminal('NP')
    rhs_rules = []

    # Extract the rules from the training-data
    for sent in parsedData:
        for production in sent.productions():
            rules.append(production)

    # Add the lexical rules
    for word, tag in tags_words:

        # For each tagged word, create a tree containing that
        # lexical rule
        # This is to be able to add it to the list rules
        t = Tree.fromstring("(" + tag + " " + word + ")")
        for production in t.productions():
            rules.append(production)

    # All the syntactic rules and all of the lexical rules
    # are extracted from the training-data
    # Here the PCFG is extracted
    rules_prob = nltk.grammar.induce_pcfg(Nonterminal('S'), rules)
    return rules_prob

Example #3

0

Show file

File: translate.py Project: giacomocostarelli/Natural-Language-Technologies

def yoda_translation(root: Tree):
    """
    Provides translation from italian language to Yoda-speak language using a Transfer approach.

    :param root: the syntactic tree to be translated
    """

    current_index = list(
        (index for index in root.treepositions()
         if isinstance(root[index], Tree) and root[index].label() in
         [Nonterminal("VP"), Nonterminal("AUX")] and len(root[index]) == 1))[0]

    parent_index = get_parent(current_index)

    nodes_to_be_moved = []
    while root.__getitem__(parent_index).label() == Nonterminal("VP"):
        index_to_be_moved = get_right_child(parent_index)
        nodes_to_be_moved.append(root.__getitem__(index_to_be_moved))

        root.__setitem__(index_to_be_moved, Tree("ε", []))

        current_index = parent_index
        parent_index = get_parent(current_index)

    nodes_to_be_moved.reverse()
    for node in nodes_to_be_moved:
        root = Tree('Yoda Translation', [node, root])

    root.draw()

Example #4

0

Show file

File: homework6_2a.py Project: aashishyadavally/MS_AI_Coursework

def update_complete_chart(chart, tokens, grammar, trace=False):
    """Updates non-diagonal elements of chart

    Arguments:
    ----------
        chart (list):
            List of list containing chart algorithm elements
        tokens (list):
            List of words in input sentence
        grammar (list):
            List of production rules in the grammar
    """
    index = dict((p.rhs(), p.lhs()) for p in grammar.productions())
    num_tokens = len(tokens)
    for span in range(2, num_tokens + 1):
        for start in range(num_tokens + 1 - span):
            end = start + span
            temp_categories, temp_rules = [], []
            for mid in range(start + 1, end):
                nt1s, nt2s = chart[start][mid], chart[mid][end]
                if len(nt1s) != 0 and len(nt2s) != 0:
                    for nt1 in nt1s[0]:
                        for nt2 in nt2s[0]:
                            if nt1 and nt2 and (nt1, nt2) in index:
                                p = Production(
                                    index[(nt1, nt2)],
                                    (Nonterminal(nt1), Nonterminal(nt2)))
                                temp_rules.append(f'{p._lhs} -> {p._rhs}')
                                temp_categories.append(index[(nt1, nt2)])
            chart[start][end] = [(temp_categories[i], temp_rules[i], mid)
                                 for i in range(len(temp_rules))]
    return chart

Example #5

0

Show file

 def _generate_production(self, t):
     arr = []
     for i in range(len(t)):
         if type(t[i]) == str:
             arr.append(t[i])
         else:
             arr.append(Nonterminal(t[i].label()))
     return Production(Nonterminal(t.label()), tuple(arr))

Example #6

0

Show file

File: utilities.py Project: fodierna/Natural-Language-Technologies

def translate_it_yo(tree):
    SUBJ = [Nonterminal("PRON"), Nonterminal("NP"), Nonterminal("N")]
    VERB = [Nonterminal("VP")]
    yoda_tree = Tree("Yoda", [])
    for i in range(len(tree)):
        if (tree[i].label() in SUBJ):
            yoda_tree.insert(1, tree[i])
        if (tree[i].label() in VERB):
            V = tree[i][0]
            X = tree[i][1]
            yoda_tree.insert(0, X)
            yoda_tree.insert(2, V)
    return yoda_tree

Example #7

0

Show file

File: pcfg_grammar.py Project: Aadesh-Magare/PCFG_CKY_NLU

def create_grammar(x_train):
    productions = []
    for x in x_train:
        for tree in treebank.parsed_sents(x):
            # tree.collapse_unary(collapsePOS = True)
            tree.chomsky_normal_form()
            productions += tree.productions()

    S = Nonterminal('S')
    for w in ['CC','CD','DT','EX','FW','IN','JJ','JJR','JJS','LS','MD','NN','NNS','NNP','NNPS','PDT','POS','PRP','PRP','RB','RBR','RBS','RP','TO','UH','VB','VBD','VBG','VBN','VBP','VBZ','WDT','WP','WP','WRB', 'NP' ]:
        productions.append(Production(Nonterminal(w), ('<UNK>', )))
    
    grammar = create_pcfg(S, productions)
    return grammar

Example #8

0

Show file

def test_grammar_general():
    np.random.seed(0)
    
    txtgram = "S -> S '+' F [0.2] | F [0.8] \n"
    txtgram += "F -> 'x' [0.5] | 'y' [0.5]"
    grammar = GeneratorGrammar(txtgram)
    
    sample = grammar.generate_one()
    assert sample[0] == ['y'] and sample[1] == 0.4 and sample[2] == '11'
    
    assert grammar.count_trees(Nonterminal("S"), 5) == 30
    assert grammar.count_coverage(Nonterminal("S"), 2) == 0.8
    
    assert "".join(grammar.code_to_expression('0101')[0]) == "x+y"

Example #9

0

Show file

def pcfg_learn1(treebank, n):
    productions = list()
    for i in range(n):
        for tree in treebank.parsed_sents()[:i + 1]:
            prod_gen = tree_to_productions(tree, "BOT")
            tree_to_append = next(prod_gen)[0]
            while tree_to_append:
                if tree_to_append.lhs() == Nonterminal('NP'):
                    productions.append(tree_to_append)
                try:
                    tree_to_append = next(prod_gen)[0]
                except Exception as e:
                    tree_to_append = False
    productions, dist = get_productions(productions)
    return PCFG(Nonterminal('NP'), productions), dist

Example #10

0

Show file

File: grammars.py Project: florianthalmann/corpus-analysis

def to_pcfg(sequences, sections):
    sequences = [s[s >= 0] for s in sequences]
    trees = [Tree.fromstring(to_tree(s, sections)) for s in sequences]
    # [t.collapse_unary(collapsePOS = False) for t in trees]
    # [t.chomsky_normal_form(horzMarkov = 2) for t in trees]
    prods = [p for t in trees for p in t.productions()]
    print(induce_pcfg(Nonterminal('S'), prods))

Example #11

0

Show file

File: try_parse1.py Project: folagit/resumatcher

def test1():
    nt1 = Nonterminal('NP')
    nt2 = Nonterminal('VP')
     	
    print nt1.symbol()
     	
    S, NP, VP, PP = nonterminals('S, NP, VP, PP')
    N, V, P, DT = nonterminals('N, V, P, DT')
     	
    prod1 = Production(S, [NP, VP])
    prod2 = Production(NP, [DT, NP])
     	
    print prod1.lhs() 	
    print prod1.rhs() 	
    print prod1 == Production(S, [NP, VP]) 	
    print prod1 == prod2

Example #12

0

Show file

File: deep_syntax.py Project: daanwerf/FakeReviews

def get_bigram_and_deep_syntax_feature(review, speller, stop_words, ps, preprocess):
    res = ""
    productions = []

    parser = CoreNLPParser(url='http://localhost:9500')

    for sentence in re.split(r"[.!?]", review):
        try:
            tree = next(parser.raw_parse(sentence))

            # Optimize by creating Chomsky normal form
            tree.collapse_unary(collapsePOS=False)
            tree.chomsky_normal_form(horzMarkov=2)
            productions += tree.productions()

        except StopIteration:
            # End of review reached
            break

    S = Nonterminal('S')
    grammar = induce_pcfg(S, productions)

    count = 0
    for line in str(grammar).split("\n"):
        if count == 0:
            count += 1
            continue
        elif "'" in line:
            res += re.sub(r"[(->) `\'\"\[\d\]]", "", line) + " "

    res += bipos.get_bigrams_and_unigrams_of_sentence(
        bow.sanitize_sentence(review, speller, stop_words, ps, preprocess))

    return res

Example #13

0

Show file

File: main.py Project: syrix78/IFT6285_Devoir8

def train_grammar(unknown_words=[], nb_reduced_production=6000):

    productions = []

    for item in train:
        for tree in treebank.parsed_sents(item):
            # perform optional tree transformations, e.g.:
            tree.collapse_unary(collapsePOS=False)  # Remove branches A-B-C into A-B+C
            tree.chomsky_normal_form(horzMarkov=2)  # Remove A->(B,C,D) into A->B,C+D->D
            #tree_prods = tree.productions()


            productions += tree.productions()


    counter = collections.Counter(productions)
    n_comms = [item for item, count in counter.most_common(nb_reduced_production) for i in range(count)]

    #Adding unkwown words and terminal rules back into the reduced productions set
    unknown_words_prods = []
    for p in productions:
        if isinstance(p._rhs[0], str):
            unknown_words_prods.append(p)
            for u in unknown_words:
                rhs = [u]
                lhs = p._lhs
                new_prod = Production(lhs, rhs)
                unknown_words_prods.append(new_prod)


    n_comms += unknown_words_prods
    S = Nonterminal('S')
    grammar = induce_pcfg(S, n_comms)

    return grammar

Example #14

0

Show file

def pcfg(train_idx=None, smoothing=None):
    """
    productions = []
    item = treebank._fileids[0]
    print("ITEM\n\n",item,"\n\n")
    for tree in treebank.parsed_sents(item)[:3]:
        # perform optional tree transformations, e.g.:
        tree.collapse_unary(collapsePOS = False)
        tree.chomsky_normal_form(horzMarkov = 2)
        productions += tree.productions()
"""
    if train_idx == None:
        train_idx = (len(treebank.fileids()) * 3) // 4
    productions = []
    for item in treebank.fileids()[0:train_idx]:
        for tree in treebank.parsed_sents(item):
            tree.collapse_unary(
                collapsePOS=False)  # Remove unary production rule
            tree.chomsky_normal_form(
                horzMarkov=2
            )  # Convert into chomsky normal form i.e., A->(B,C,D) into A->(B,E) E->(C,D)
            productions += tree.productions()

    S = Nonterminal('S')
    if smoothing == None:
        grammar = learn_pcfg(S, productions)
    elif smoothing == 'L1':
        grammar = smoothing_pcfg(S, productions)

    with open('grammar.pkl', 'wb') as f:
        pickle.dump(grammar, f)

    return grammar

Example #15

0

Show file

File: bmm_labels2grammar.py Project: i-machine-think/emergent_grammar_induction

def main(config):
    grammar_string = parse_induced_grammar( config.grammar )

    if config.output:
        with open(config.output, 'w') as f:
            f.write(grammar_string)
    grammar = PCFG.fromstring( grammar_string )
    grammar._start = Nonterminal('TOP') # Not sure whether this is allowed or breaks things

    # Create directory for parse_trees if it does not already exist
    if config.textfile:
        if not os.path.exists(config.output_parse):
            os.makedirs(config.output_parse)
    
    if config.textfile:
        parser = ViterbiParser(grammar)
        with open(config.textfile, 'r') as f:
            lines = f.read().splitlines() 
        for i, line in enumerate(lines):
            if i==config.number_parses:
                break
            print(f"Parsing sentence {i+1}")
            sent = line.split()
            for t in parser.parse(sent):
                TreeView(t)._cframe.print_to_file(f"{config.output_parse}/tree_{i}")

Example #16

0

Show file

File: cpfg.py Project: clementchadebec/cyk_parser

    def get_grammar(cls, train_trees, starting_symb='SENT'):
        """
        This method returns a the grammar coputed from the training set.

        Inputs:
        -------

        train_trees (list): List of trees to perform training
        startting_symbol (str): The root symbol
        """
        productions = []

        # Chmosky Normal Form
        for tree in train_trees:
            
            # Remove unary rules
            treetransforms.collapse_unary(tree)

            # Transform to CNF
            treetransforms.chomsky_normal_form(tree, horzMarkov=2)

            # Copute production and store is
            productions += tree.productions()

        # Define the root symbol
        SENT = Nonterminal(starting_symb)

        # Compute the grammar using PCFG
        grammar = induce_pcfg(SENT, productions)

        grammar.chomsky_normal_form()

        return grammar

Example #17

0

Show file

File: Chunker_w_parse.py Project: misakss/Portfolio

def accuracy(train_rules, test_rules, prob_thresh):
    """
    Gives the NP production rules which are exclusive to one set of rules.
    """

    NP = Nonterminal('NP')
    trainAmount = 0
    testAmount = 0
    rules_train = []
    for rule in train_rules.productions():
        if NP == rule.lhs(
        ):  # and rule.lhs() != 'NNP' and rule.lhs() != 'NNPS':
            trainAmount += 1
            rules_train.append(rule.rhs())

    rules_test = []
    for rule in test_rules.productions():
        if NP == rule.lhs(
        ):  # and rule.lhs() != 'NNP' and rule.lhs() != 'NNPS':
            testAmount += 1
            rules_test.append(rule.rhs())

    rulesExclusivelyInTrain = 0
    for train_rule in rules_train:
        if train_rule not in rules_test:
            rulesExclusivelyInTrain += 1

    rulesExclusivelyInTest = 0
    for test_rule in rules_test:
        if test_rule not in rules_train:
            rulesExclusivelyInTest += 1

    return rulesExclusivelyInTrain, rulesExclusivelyInTest, trainAmount, testAmount

Example #18

0

Show file

File: Preprocess.py Project: SkittlePox/2951-Final-Project

def create_pcfg_from_treebank(pickle_it=False, log_it=False, filename="treebank", full=False):
    """
    Creates a PCFG from the Penn Treebank dataset using induce_pcfg
    Optional pickling of this PCFG in pickled-vars/
    """
    if full:
        tb = ptb
    else:
        tb = treebank
    productions = []
    flat_trees = 0
    for item in tb.fileids(): # Goes through all trees
        for tree in tb.parsed_sents(item):
            if tree.height() == 2:  # Gets rid of flat trees
                # print("####Tree not collected#####")
                flat_trees += 1
                continue
            # print(" ".join(tree.leaves()))    # This should print the sentences
            # perform optional tree transformations, e.g.:
            # tree.collapse_unary(collapsePOS = False)# Remove branches A-B-C into A-B+C
            # tree.chomsky_normal_form(horzMarkov = 2)# Remove A->(B,C,D) into A->B,C+D->D
            productions += tree.productions()
    print("%s Flat trees purged" % flat_trees)

    S = Nonterminal('S')
    grammar = induce_pcfg(S, productions)
    if pickle_it:
        pickle.dump(grammar, open("%s%s-grammar.p" % (var_dir, filename), "wb"))
    if log_it:
        save_grammar_cleartext(grammar, filename)
        save_lexicon_cleartext(grammar, filename)
    return grammar

Example #19

0

Show file

File: cfg_test.py Project: Ghost---Shadow/cfgnet

def test_weird_indices():
    cfg = CFGHelper(sample_grammar)
    indexes = [1, 1, 1, 1, 1, 1, 1, 1, 1]
    assert cfg.indexes_to_tokens(indexes) == ['y']

    indexes = [1]
    assert cfg.indexes_to_tokens(indexes) == [Nonterminal('V')]

Example #20

0

Show file

 def build_context_free_grammar(self, data):
     productions = []
     for tree in [Tree.fromstring(tree) for tree in data]:
         tree.collapse_unary(collapsePOS=False)
         tree.chomsky_normal_form(horzMarkov=2)
         productions += tree.productions()
     starting_state = Nonterminal('SENT')
     grammar = induce_pcfg(starting_state, productions)
     return grammar

Example #21

0

Show file

File: parser.py Project: jpedrocm/nlp-task02

def build_candidate_tree(score, back, words):
	li = 0
	ri = len(words)
	tagi = Nonterminal('NEW_ROOT')
	if tagi not in back[li][ri]:
		return None
	tree_string = '(' + str(tagi) + ' ' + build_tree(back, li, ri, tagi, words, "")
	candidate_tree = Tree.fromstring(tree_string)
	return candidate_tree

Example #22

0

Show file

File: pcky.py Project: zoew2/Projects

    def compose_children(self):
        """
        Combine all valid left and right children for the current location in the matrix
        :return:
        """
        for l_symbol, l_info in self.matrix[self.i][self.k].items():
            l_rhs = Nonterminal(l_symbol)
            for r_symbol, r_info in self.matrix[self.k][self.j].items():
                r_rhs = Nonterminal(r_symbol)

                # check the subtrees in [i][k] and [k][j] to see if you can make a valid rhs
                potential_rules = [p for p in self.grammar.productions(rhs=l_rhs) if p.rhs()[1] == r_rhs]
                for potential_rule in sorted(potential_rules, key=lambda x: x.prob()):
                    new_lhs = potential_rule.lhs().symbol()
                    new_tree = Tree(new_lhs, [l_info[1], r_info[1]])
                    new_prob = log(potential_rule.prob()) + l_info[0] + r_info[0]
                    if new_lhs not in self.matrix[self.i][self.j] or new_prob > self.matrix[self.i][self.j][new_lhs][0]:
                            self.matrix[self.i][self.j][new_lhs] = (new_prob, new_tree)

Example #23

0

Show file

File: pcfg.py Project: gabrielbarcik/CYKparser

    def create_pcfg(self, trees):
        productions = []
        for tree in trees:
            tree.collapse_unary(collapsePOS=True)
            tree.chomsky_normal_form(horzMarkov=2)
            productions += tree.productions()

        S = Nonterminal('SENT')
        grammar = induce_pcfg(S, productions)

        return grammar

Example #24

0

Show file

    def _setprob(self, tree, prod_probs):
        if tree.prob() is not None: return

        # Get the prob of the CFG production.
        lhs = Nonterminal(tree.node)
        rhs = []
        for child in tree:
            if isinstance(child, Tree):
                rhs.append(Nonterminal(child.node))
            else:
                rhs.append(child)
        prob = prod_probs[lhs, tuple(rhs)]

        # Get the probs of children.
        for child in tree:
            if isinstance(child, Tree):
                self._setprob(child, prod_probs)
                prob *= child.prob()

        tree.set_prob(prob)

Example #25

0

Show file

File: induction.py Project: excursus/nlpypes

def induce(trees: Iterable) -> FancyPCFG:
    productions = []
    for tree in trees:
        #        tree.pretty_print()
        # perform optional tree transformations, e.g.:
        # tree.collapse_unary(collapsePOS = False)# Remove branches A-B-C into A-B+C
        # tree.chomsky_normal_form(horzMarkov = 2)# Remove A->(B,C,D) into A->B,C+D->D
        productions += tree.productions()
    S = Nonterminal('S')
    grammar = induce_pcfg(S, productions)
    return FancyPCFG.fromCFG(grammar)

Example #26

0

Show file

File: parser.py Project: xiangnanyue/parser

def update_grammar(productions, unknown):
    lis = pos_tagger.tag(unknown)
    for i in range(len(lis)):
        pos = nonterminals(lis[i][1])[0]
        production_ = Production(pos, [unknown[i]])
        productions.append(production_)
        print production_, "added to productions"

    S = Nonterminal('SENT')
    grammar = induce_pcfg(S, productions)

    return grammar

Example #27

0

Show file

File: views.py Project: 5l1v3r1/WS-VulnS

 def gen_sql_stmt_from_grammar(self,
                               start_,
                               num_stmts=None,
                               table_name="table_name",
                               columns_name="columns_names"):
     grammar = CFG.fromstring(
         self.get_sql_select_stml_grammar(table_name, columns_name,
                                          COMMON_VALUES))
     sql_select_stmts = []
     for stmt in generate(grammar, start=Nonterminal(start_), n=num_stmts):
         sql_select_stmts.append(''.join(stmt))
     return sql_select_stmts

Example #28

0

Show file

File: ckyParser.py Project: AnkurDebnath35/CKY-Parser-with-PCFG-

def getGrammar():

    fileid = treebank.fileids()
    trainfiles = fileid[:160]
    #testfiles=fileid[0.8*len(fileid):]

    productions = []
    for item in trainfiles:
        for tree in treebank.parsed_sents(item):
            # perform optional tree transformations, e.g.:
            tree.collapse_unary(
                collapsePOS=False)  # Remove branches A-B-C into A-B+C
            tree.chomsky_normal_form(
                horzMarkov=2)  # Remove A->(B,C,D) into A->B,C+D->D
            productions += tree.productions()

    lhs_prod = [p.lhs() for p in productions]
    rhs_prod = [p.rhs() for p in productions]
    set_prod = set(productions)

    list_prod = list(set_prod)

    token_rule = []
    for ele in list_prod:
        if ele.is_lexical():
            token_rule.append(ele)

    set_token_rule = set(p.lhs() for p in token_rule)
    list_token_rule = list(set_token_rule)
    corr_list_token_rule = []
    for word in list_token_rule:
        if str(word).isalpha():
            corr_list_token_rule.append(word)
            continue
    #print(corr_list_token_rule)

    import nltk
    a = []
    for tok in corr_list_token_rule:
        #lhs = nltk.grammar.Nonterminal('UNK')
        lhs = 'UNK'
        rhs = [u'UNK']
        UNK_production = nltk.grammar.Production(lhs, rhs)
        lhs2 = nltk.grammar.Nonterminal(str(tok))
        a.append(nltk.grammar.Production(lhs2, [lhs]))

    token_rule.extend(a)

    list_prod.extend(a)

    S = Nonterminal('S')
    grammar = induce_pcfg(S, list_prod)
    return grammar

Example #29

0

Show file

File: context_free_grammar_tests.py Project: andrew-pyle/msis-project

def nonterm_generation_suite(cfg):
    # Data
    expansions = defaultdict(list)

    # Run tests
    # TODO expand all nonterminals
    for x in range(5):
        expansions[Nonterminal('ROOT')].append(
            cfg.nltk_expand(Nonterminal('ROOT')))

    # Print results
    print('Symbols in cfg:\n')
    pprint.pprint(cfg.productions.keys())
    print('\nExpansions:\n')

    for symbol in expansions:
        if symbol == 'S':
            print('Root:', symbol, '\n')
        else:
            print('Nonroot:', symbol)
        pprint.pprint(expansions[symbol])
        print('\n')

Example #30

0

Show file

File: symbolic.py Project: SkittlePox/2951-Final-Project

def main():
    # print(nltk.corpus.treebank.parsed_sents('wsj_0001.mrg')[0])
    # nltk.corpus.treebank.parsed_sents('wsj_0001.mrg')[0].draw()

    # print("Induce PCFG grammar from treebank data:")
    #
    productions = []
    print(len(treebank.fileids()))
    for item in treebank.fileids(): # Goes through all trees
      for tree in treebank.parsed_sents(item):
        # perform optional tree transformations, e.g.:
        tree.collapse_unary(collapsePOS = False)# Remove branches A-B-C into A-B+C
        tree.chomsky_normal_form(horzMarkov = 2)# Remove A->(B,C,D) into A->B,C+D->D
        productions += tree.productions()
    # #
    # # print(type(productions[0]))
    # #
    S = Nonterminal('S')
    grammar = induce_pcfg(S, productions)
    # # # print(grammar)    # This is a PCFG
    # pickle.dump(grammar, open("tbank-grammar.p", "wb"))
    # t = time.time()
    # grammar = pickle.load(open("tbank-grammar.p", "rb"))
    # textf = open("lexicon.txt", "w")
    # n = textf.write(str(reduce(lambda a, b: a + "\n" + b, list(filter(lambda x: "'" in x, str(grammar).split("\n"))))))
    # textf.close()
    # print(time.time()-t)
    parser = ViterbiParser(grammar)
    # pickle.dump(parser, open("cky-parser.p", "wb"))
    # parser = pickle.load(open("cky-parser.p", "rb"))
    parser.trace(0)
    sent = "John will join the board"
    tokens = sent.split()

    try:
        grammar.check_coverage(tokens)
        print("All words covered")
        parses = parser.parse_all(tokens)
        if parses:
            lp = len(parses)
            print(lp)
            print(parses[0].label())
            # parses[0].draw()
            p = reduce(lambda a,b:a+b.prob(), list(filter(lambda x: x.label() == 'S', parses)), 0.0)
        else:
            p = 0

        print("Probability:", p)
    except:
        print("Some words not covered")

Example #31

0

Show file

File: datasets.py Project: saist1993/parseq

 def build(self, examples=tuple()):
     """
     :param examples:    tuple or list of nltk Trees
     :return: 
     """
     allproductions = []
     for example in examples:
         q = example
         t = self.grammarify(q)
         t = Tree("S", [t])
         productions = t.productions()
         allproductions += productions
     pcfg = nltk.induce_pcfg(Nonterminal("S"), allproductions)
     return pcfg

Example #32

0

Show file

File: ch5_7.py Project: xenron/sandbox-da-python

import nltk
from nltk import Nonterminal, nonterminals, Production, CFG
nonterminal1 = Nonterminal('NP')
nonterminal2 = Nonterminal('VP')
nonterminal3 = Nonterminal('PP')
print(nonterminal1.symbol())
print(nonterminal2.symbol())
print(nonterminal3.symbol())
print(nonterminal1==nonterminal2)
print(nonterminal2==nonterminal3)
print(nonterminal1==nonterminal3)
S, NP, VP, PP = nonterminals('S, NP, VP, PP')
N, V, P, DT = nonterminals('N, V, P, DT')
production1 = Production(S, [NP, VP])
production2 = Production(NP, [DT, NP])
production3 = Production(VP, [V, NP,NP,PP])
print(production1.lhs())
print(production1.rhs())
print(production3.lhs())
print(production3.rhs())
print(production3 == Production(VP, [V,NP,NP,PP]))
print(production2 == production3)