Python PhraseTable.addUNKRule Examples

Programming Language: Python

Namespace/Package Name: phraseTable

Class/Type: PhraseTable

Method/Function: addUNKRule

Examples at hotexamples.com: 2

Python PhraseTable.addUNKRule - 2 examples found. These are the top rated real world Python examples of phraseTable.PhraseTable.addUNKRule extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

getRuleEntries(2)

PhraseTable(1)

addUNKRule(1)

findConsistentRules(1)

getTotalRules(1)

hasRule(1)

Example #1

Show file

File: parse_CP.py Project: amirpouya/Kriya

    def parse(self):
        'Parse the sentence passed in the argument'

        global consObjsLst
        final_cell = False
        glueSrcLst = ['X__1', 'S__1 X__2']

        # Phase-1: Initialization
        # Fill the initial axioms in the chartDict (Dict of dict) in corresponding word positions
        p_i = 0
        for p_word in self.wordsLst:
#            print "Span:", p_i, p_i, "\tSpan length: 1"
            if ( p_i == 0 and self.sent_len == 1 ):
                final_cell = True
            Parse.chartDict[(p_i, p_i)] = Cell()

            # if the word is UNK; add it to ruleDict as: X -> <w_i, w_i> with default prob
            if not PhraseTable.hasRule(p_word):
                (unk_score, unk_lm_heu, unk_featVec) = FeatureManager.unkRuleTup
                PhraseTable.addUNKRule( p_word, RuleItem.initUNKRule(p_word, unk_featVec, unk_score, unk_lm_heu) )

            # Known (X -> <w_i, w_t>) or unknown (X -> <w_i, w_i>) rules are now flushed to the chart
            self.__flush2Cell( (p_i, p_i), ('X', p_word), 0, self.__getRulesFromPT(p_word, (p_i, p_i)) )     # Flush the entries to the cell
            #Parse.chartDict[(p_i, p_i)].printCell('X', self.sent_indx)

            # Add the glue rule S --> <X__1, X__1> in cell (0, 0)
            if p_i == 0:
                p_src = glueSrcLst[0]
                self.__getGlueRuleSpans((p_i, p_i), p_src)
                if consObjsLst:
                    Parse.chartDict[(p_i, p_i)].has_S_tree = True
                    self.__reduceCell((p_i, p_i), 'S', 'S', final_cell)   # Compute the n-best list from the parse forest
                    if settings.opts.force_decode:
                        force_dec_status = Parse.chartDict[(0, p_i)].forceDecodePrune(self.refsLst, final_cell)
                        if final_cell and not force_dec_status:
                            sys.stderr.write("           INFO  :: Force decode mode: No matching candidate found for cell (0, %d). Aborting!!\n" % (p_i))
                            return 0
                    #Parse.chartDict[(0, p_i)].printCell('S', self.sent_indx)

            p_i += 1

        # Phase-2: Filling the CKY table
        # Iterate through all possible spans of length 2 thro' M (maximum phrase length)
        for p_l in range(1, self.sent_len):
            for p_j in range(p_l, self.sent_len):
                p_i = p_j - p_l
#                print "\nSpan:", p_i, p_j, "\tSpan length:", p_l + 1
                # If the span length is greater than the 'maximum phrase length' skip to next iteration of p_l
                if p_l >= settings.opts.max_phr_len and p_i != 0: break

                Parse.chartDict[(p_i, p_j)] = Cell()
                p_cell_type = 'X'
                p_left_nt = 'X'
                if ( p_i == 0 and p_j == self.sent_len - 1 ):
                    final_cell = True
                if p_l < settings.opts.max_phr_len:
                    self.__getRuleSpans( p_i, p_j, ' '.join(self.wordsLst[p_i:p_j+1]) )

                if consObjsLst:
                    self.__reduceCell((p_i, p_j), p_cell_type, p_left_nt, final_cell)
                    #Parse.chartDict[(p_i, p_j)].printCell('X', self.sent_indx)

                # For span beginning at '0' (top row in the parse triangle), add items of the form [S, i, j]:w to chart
                # Glue rules are: S --> (X__1, X__1) and S --> (S__1 X__2, S__1 X__2)
                # Sentence boundary markers <s> and </s> are added in Cube-Pruning step (lazyMerge_CP.py)
                if p_i == 0:
                    p_cell_type = 'S'
                    p_left_nt = 'S'
                    for p_src in glueSrcLst: self.__getGlueRuleSpans((p_i, p_j), p_src)

                    if consObjsLst:
                        Parse.chartDict[(p_i, p_j)].has_S_tree = True
                        self.__reduceCell((p_i, p_j), p_cell_type, p_left_nt, final_cell)
                    if settings.opts.force_decode:
                        force_dec_status = Parse.chartDict[(p_i, p_j)].forceDecodePrune(self.refsLst, final_cell)
                        if final_cell and not force_dec_status:
                            sys.stderr.write("           INFO  :: Force decode mode: No matching candidate found for cell (0, %d). Aborting!!\n" % (p_j))
                            return 0
                    #Parse.chartDict[(p_i, p_j)].printCell('S', self.sent_indx)

        p_j = self.sent_len - 1
        if not Parse.chartDict[(0, p_j)].has_S_tree:
            return 99
        Parse.chartDict[(0, p_j)].printNBest('S', self.sent_indx)       # Print the N-best derivations in the last cell
        if settings.opts.trace_rules > 0:
            #Parse.chartDict[(0, p_j)].trackRulesUsed('S')               # Track the rules used in the top-k translations
            Parse.chartDict[(0, p_j)].printTrace('S', self.sent)        # Prints the translation trace for the top-3 entries

        return 1

Example #2

Show file

    def parse(self):
        'Parse the sentence passed in the argument'

        global consObjsLst
        final_cell = False
        glueSrcLst = ['X__1', 'S__1 X__2']

        # Phase-1: Initialization
        # Fill the initial axioms in the chartDict (Dict of dict) in corresponding word positions
        p_i = 0
        for p_word in self.wordsLst:
            #            print "Span:", p_i, p_i, "\tSpan length: 1"
            if (p_i == 0 and self.sent_len == 1):
                final_cell = True
            Parse.chartDict[(p_i, p_i)] = Cell()

            # if the word is UNK; add it to ruleDict as: X -> <w_i, w_i> with default prob
            if not PhraseTable.hasRule(p_word):
                (unk_score, unk_lm_heu,
                 unk_featVec) = FeatureManager.unkRuleTup
                PhraseTable.addUNKRule(
                    p_word,
                    RuleItem.initUNKRule(p_word, unk_featVec, unk_score,
                                         unk_lm_heu))

            # Known (X -> <w_i, w_t>) or unknown (X -> <w_i, w_i>) rules are now flushed to the chart
            self.__flush2Cell(
                (p_i, p_i), ('X', p_word), 0,
                self.__getRulesFromPT(
                    p_word, (p_i, p_i)))  # Flush the entries to the cell
            #Parse.chartDict[(p_i, p_i)].printCell('X', self.sent_indx)

            # Add the glue rule S --> <X__1, X__1> in cell (0, 0)
            if p_i == 0:
                p_src = glueSrcLst[0]
                self.__getGlueRuleSpans((p_i, p_i), p_src)
                if consObjsLst:
                    Parse.chartDict[(p_i, p_i)].has_S_tree = True
                    self.__reduceCell(
                        (p_i, p_i), 'S', 'S', final_cell
                    )  # Compute the n-best list from the parse forest
                    if settings.opts.force_decode:
                        force_dec_status = Parse.chartDict[(
                            0, p_i)].forceDecodePrune(self.refsLst, final_cell)
                        if final_cell and not force_dec_status:
                            sys.stderr.write(
                                "           INFO  :: Force decode mode: No matching candidate found for cell (0, %d). Aborting!!\n"
                                % (p_i))
                            return 0
                    #Parse.chartDict[(0, p_i)].printCell('S', self.sent_indx)

            p_i += 1

        # Phase-2: Filling the CKY table
        # Iterate through all possible spans of length 2 thro' M (maximum phrase length)
        for p_l in range(1, self.sent_len):
            for p_j in range(p_l, self.sent_len):
                p_i = p_j - p_l
                #                print "\nSpan:", p_i, p_j, "\tSpan length:", p_l + 1
                # If the span length is greater than the 'maximum phrase length' skip to next iteration of p_l
                if p_l >= settings.opts.max_phr_len and p_i != 0: break

                Parse.chartDict[(p_i, p_j)] = Cell()
                p_cell_type = 'X'
                p_left_nt = 'X'
                if (p_i == 0 and p_j == self.sent_len - 1):
                    final_cell = True
                if p_l < settings.opts.max_phr_len:
                    self.__getRuleSpans(p_i, p_j,
                                        ' '.join(self.wordsLst[p_i:p_j + 1]))

                if consObjsLst:
                    self.__reduceCell((p_i, p_j), p_cell_type, p_left_nt,
                                      final_cell)
                    #Parse.chartDict[(p_i, p_j)].printCell('X', self.sent_indx)

                # For span beginning at '0' (top row in the parse triangle), add items of the form [S, i, j]:w to chart
                # Glue rules are: S --> (X__1, X__1) and S --> (S__1 X__2, S__1 X__2)
                # Sentence boundary markers <s> and </s> are added in Cube-Pruning step (lazyMerge_CP.py)
                if p_i == 0:
                    p_cell_type = 'S'
                    p_left_nt = 'S'
                    for p_src in glueSrcLst:
                        self.__getGlueRuleSpans((p_i, p_j), p_src)

                    if consObjsLst:
                        Parse.chartDict[(p_i, p_j)].has_S_tree = True
                        self.__reduceCell((p_i, p_j), p_cell_type, p_left_nt,
                                          final_cell)
                    if settings.opts.force_decode:
                        force_dec_status = Parse.chartDict[(
                            p_i,
                            p_j)].forceDecodePrune(self.refsLst, final_cell)
                        if final_cell and not force_dec_status:
                            sys.stderr.write(
                                "           INFO  :: Force decode mode: No matching candidate found for cell (0, %d). Aborting!!\n"
                                % (p_j))
                            return 0
                    #Parse.chartDict[(p_i, p_j)].printCell('S', self.sent_indx)

        p_j = self.sent_len - 1
        if not Parse.chartDict[(0, p_j)].has_S_tree:
            return 99
        Parse.chartDict[(0, p_j)].printNBest(
            'S',
            self.sent_indx)  # Print the N-best derivations in the last cell
        if settings.opts.trace_rules > 0:
            #Parse.chartDict[(0, p_j)].trackRulesUsed('S')               # Track the rules used in the top-k translations
            Parse.chartDict[(0, p_j)].printTrace(
                'S', self.sent
            )  # Prints the translation trace for the top-3 entries

        return 1