Python PCFG.start Examples

Programming Language: Python

Namespace/Package Name: nltk.grammar

Class/Type: PCFG

Method/Function: start

Examples at hotexamples.com: 2

Python PCFG.start - 2 examples found. These are the top rated real world Python examples of nltk.grammar.PCFG.start extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

fromstring(18)

PCFG(9)

start(2)

productions(1)

Example #1

Show file

File: pcfg.py Project: XsongyangX/ift6285-hw8

def fill_missing_words(grammar: PCFG, missing_words: Set[str]):
    # UNK -> word1 | word2 | ... | wordN
    unknown = Nonterminal('UNK')
    unk_rules = [
        Production(unknown, [missing_word]) for missing_word in missing_words
    ]

    # Add UNK as a possibility to all rules with strings in the right hand side
    corrected_rules: List[Nonterminal] = []
    rule: ProbabilisticProduction
    for rule in grammar.productions():

        # right hand side has a string somewhere
        if any(isinstance(element, str) for element in rule.rhs()):

            # rule has already been corrected
            if rule.lhs() in corrected_rules:
                continue

            unk_rules.append(Production(rule.lhs(), [unknown]))

            corrected_rules.append(rule.lhs())

    return induce_pcfg(grammar.start(), grammar.productions() + unk_rules)

Example #2

Show file

def _attaching(N, G, C, T):
    print("attaching...")
    C_derived = _apply_grammar(G, C)
    ORs = [] # liste des OR (NonTerminal)
    for prod in G.productions():
        nt = prod.lhs()
        if "OR" in nt.symbol() and nt not in ORs:
            ORs.append(nt)
    ## for each OR symbol O in G do
    for O in ORs:
        ## if O leads to a valid expanded bicluster
        ## as well as a posterior gain (Eq.3) larger than a threshold then
        
        #
        #   AND-OR group
        
        group = None
        pos = None # gauche ou droite (impair-False ou pair-True)
        ## récupération du groupe AND-OR de O
        for g in biclusters:
            if O.symbol() in g[1] or O.symbol() in g[2]:
                group = g
                break
        ## récupération de la position de O dand le groupe
        num = int(O.symbol()[4:]) # numéro du OR, ex: "_OR_2" -> 2
        pos = True if num % 2 == 0 else False
        
        #
        #   BC_tilde et BC_tilde_prime
        
        ## création de BC_t (BC_tilde)
        BC_t = biclusters[group].copy()
        ## remplissage de BC_t
        for pair in _get_bicluster_pairs(BC_t):
            BC_t.at[pair] = _count_occ(" ".join(pair), C_derived)
        ## création de BC_t_1 (BC_tilde_prime) (proposed new rule OR -> AND)
        BC_t_1 = BC_t.copy()
        ## . remplissage de BC_t_1
        if pos == False:
            ## new row (OR à gauche)
            new_row = [_count_occ(" ".join((N.symbol(),x)), C) for x in BC_t.columns]
            BC_t_1.loc[N.symbol(),:] = new_row
            BC_t_1 = BC_t_1.astype(int)
        else:
            ## new column (OR à droite)
            new_col = [_count_occ(" ".join((x,N.symbol())), C) for x in BC_t.index]
            BC_t_1.loc[:,N.symbol()] = new_col
            BC_t_1 = BC_t_1.astype(int)
        
        #
        #   EC_tilde et EC_tilde_prime

        ## création et remplissage de EC_t
        EC_t = _create_ec(BC_t, C_derived, _create_t(C_derived))
        ## création de EC_t_1
        EC_t_1 = EC_t.copy()
        ## . ajout des nouvelles lignes de EC_t_1
        if pos == False:
            ## OR à gauche
            new_row_indices = [(N.symbol(),col) for col in BC_t_1.columns]
        else:
            ## OR à droite
            new_row_indices = [(row,N.symbol()) for row in BC_t_1.index]
        ## . remplissage des nouvelles lignes de EC_t_1
        for i in new_row_indices:
            i_str = _tuple_to_ec_index(i, True)
            EC_t_1.loc[i_str,:] = [-1]*EC_t_1.shape[1]
            for j in EC_t_1.columns:
                e, c = " ".join(i), list(_ec_index_to_tuple(j, False)) # expression, contexte
                c = tuple(["" if _represents_int(x) else x for x in c])
                EC_t_1.loc[i_str,j] = _count_occ(" ".join([c[0],e,c[1]]).strip(), C)
        EC_t_1 = EC_t_1.astype(int)
        bc_t_1 = BC_t_1.as_matrix()
        ec_t_1 = EC_t_1.as_matrix()
        bc_t = BC_t.as_matrix()
        ec_t = EC_t.as_matrix()
        
        #
        #   LOG POSTERIOR GAIN DIFFERENCE (Eq.3)
        
        ## BC et EC valid (MC) ?
        if not _is_mc(bc_t_1) and _is_mc(ec_t_1) and _is_mc(bc_t) and _is_mc(ec_t):
            continue
        
        lpg_diff = _log_posterior_gain(bc_t_1, ec_t_1)
        lpg_diff -= _log_posterior_gain(bc_t, ec_t)
        
        if lpg_diff > LPG_DIFF_THRESHOLD:
            print("new rule: %s -> %s" % (O.symbol(),N.symbol()))
            bc = BC_t_1.as_matrix()
            s = np.sum(bc)
            row_prob = np.sum(bc, 1)/s
            col_prob = np.sum(bc, 0)/s
            ## règles
            rules = []
            for prod in G.productions():
                if O.symbol() not in prod.lhs().symbol():
                    rules.append(prod)
            ## ajout des nouvelles règles
            if pos == False:
                ## OR à gauche
                probs = row_prob
                rhs_symbols = [x for x in BC_t.index]+[N]
                for i in range(BC_t_1.shape[0]):
                    rules.append(ProbabilisticProduction(O, [rhs_symbols[i]], prob=probs[i]))
            else:
                ## OR à droite
                probs = col_prob
                rhs_symbols = [x for x in BC_t.columns]+[N]
                for j in range(BC_t_1.shape[1]):
                    rules.append(ProbabilisticProduction(O, [rhs_symbols[j]], prob=probs[j]))
                
            ## mises à jour
            biclusters[group] = BC_t_1.copy() # mise à jour du groupe AND-OR
            G = PCFG(G.start(), rules) # mise à jour de G
            C = _reduce_corpus(C, biclusters[group], N, True) # réduction de C
            T = _create_t(C) # mise à jour de T
            
    return G, C, T