Example #1
0
    def parse(self):
        'Parse the sentence passed in the argument'

        global consObjsLst
        final_cell = False
        glueSrcLst = ['X__1', 'S__1 X__2']

        # Phase-1: Initialization
        # Fill the initial axioms in the chartDict (Dict of dict) in corresponding word positions
        p_i = 0
        for p_word in self.wordsLst:
#            print "Span:", p_i, p_i, "\tSpan length: 1"
            if ( p_i == 0 and self.sent_len == 1 ):
                final_cell = True
            Parse.chartDict[(p_i, p_i)] = Cell()

            # if the word is UNK; add it to ruleDict as: X -> <w_i, w_i> with default prob
            if not PhraseTable.hasRule(p_word):
                (unk_score, unk_lm_heu, unk_featVec) = FeatureManager.unkRuleTup
                PhraseTable.addUNKRule( p_word, RuleItem.initUNKRule(p_word, unk_featVec, unk_score, unk_lm_heu) )

            # Known (X -> <w_i, w_t>) or unknown (X -> <w_i, w_i>) rules are now flushed to the chart
            self.__flush2Cell( (p_i, p_i), ('X', p_word), 0, self.__getRulesFromPT(p_word, (p_i, p_i)) )     # Flush the entries to the cell
            #Parse.chartDict[(p_i, p_i)].printCell('X', self.sent_indx)

            # Add the glue rule S --> <X__1, X__1> in cell (0, 0)
            if p_i == 0:
                p_src = glueSrcLst[0]
                self.__getGlueRuleSpans((p_i, p_i), p_src)
                if consObjsLst:
                    Parse.chartDict[(p_i, p_i)].has_S_tree = True
                    self.__reduceCell((p_i, p_i), 'S', 'S', final_cell)   # Compute the n-best list from the parse forest
                    if settings.opts.force_decode:
                        force_dec_status = Parse.chartDict[(0, p_i)].forceDecodePrune(self.refsLst, final_cell)
                        if final_cell and not force_dec_status:
                            sys.stderr.write("           INFO  :: Force decode mode: No matching candidate found for cell (0, %d). Aborting!!\n" % (p_i))
                            return 0
                    #Parse.chartDict[(0, p_i)].printCell('S', self.sent_indx)

            p_i += 1

        # Phase-2: Filling the CKY table
        # Iterate through all possible spans of length 2 thro' M (maximum phrase length)
        for p_l in range(1, self.sent_len):
            for p_j in range(p_l, self.sent_len):
                p_i = p_j - p_l
#                print "\nSpan:", p_i, p_j, "\tSpan length:", p_l + 1
                # If the span length is greater than the 'maximum phrase length' skip to next iteration of p_l
                if p_l >= settings.opts.max_phr_len and p_i != 0: break

                Parse.chartDict[(p_i, p_j)] = Cell()
                p_cell_type = 'X'
                p_left_nt = 'X'
                if ( p_i == 0 and p_j == self.sent_len - 1 ):
                    final_cell = True
                if p_l < settings.opts.max_phr_len:
                    self.__getRuleSpans( p_i, p_j, ' '.join(self.wordsLst[p_i:p_j+1]) )

                if consObjsLst:
                    self.__reduceCell((p_i, p_j), p_cell_type, p_left_nt, final_cell)
                    #Parse.chartDict[(p_i, p_j)].printCell('X', self.sent_indx)

                # For span beginning at '0' (top row in the parse triangle), add items of the form [S, i, j]:w to chart
                # Glue rules are: S --> (X__1, X__1) and S --> (S__1 X__2, S__1 X__2)
                # Sentence boundary markers <s> and </s> are added in Cube-Pruning step (lazyMerge_CP.py)
                if p_i == 0:
                    p_cell_type = 'S'
                    p_left_nt = 'S'
                    for p_src in glueSrcLst: self.__getGlueRuleSpans((p_i, p_j), p_src)

                    if consObjsLst:
                        Parse.chartDict[(p_i, p_j)].has_S_tree = True
                        self.__reduceCell((p_i, p_j), p_cell_type, p_left_nt, final_cell)
                    if settings.opts.force_decode:
                        force_dec_status = Parse.chartDict[(p_i, p_j)].forceDecodePrune(self.refsLst, final_cell)
                        if final_cell and not force_dec_status:
                            sys.stderr.write("           INFO  :: Force decode mode: No matching candidate found for cell (0, %d). Aborting!!\n" % (p_j))
                            return 0
                    #Parse.chartDict[(p_i, p_j)].printCell('S', self.sent_indx)

        p_j = self.sent_len - 1
        if not Parse.chartDict[(0, p_j)].has_S_tree:
            return 99
        Parse.chartDict[(0, p_j)].printNBest('S', self.sent_indx)       # Print the N-best derivations in the last cell
        if settings.opts.trace_rules > 0:
            #Parse.chartDict[(0, p_j)].trackRulesUsed('S')               # Track the rules used in the top-k translations
            Parse.chartDict[(0, p_j)].printTrace('S', self.sent)        # Prints the translation trace for the top-3 entries

        return 1
Example #2
0
    def parse(self):
        'Parse the sentence passed in the argument'

        global consObjsLst
        final_cell = False
        glueSrcLst = ['X__1', 'S__1 X__2']

        # Phase-1: Initialization
        # Fill the initial axioms in the chartDict (Dict of dict) in corresponding word positions
        p_i = 0
        for p_word in self.wordsLst:
            #            print "Span:", p_i, p_i, "\tSpan length: 1"
            if (p_i == 0 and self.sent_len == 1):
                final_cell = True
            Parse.chartDict[(p_i, p_i)] = Cell()

            # if the word is UNK; add it to ruleDict as: X -> <w_i, w_i> with default prob
            if not PhraseTable.hasRule(p_word):
                (unk_score, unk_lm_heu,
                 unk_featVec) = FeatureManager.unkRuleTup
                PhraseTable.addUNKRule(
                    p_word,
                    RuleItem.initUNKRule(p_word, unk_featVec, unk_score,
                                         unk_lm_heu))

            # Known (X -> <w_i, w_t>) or unknown (X -> <w_i, w_i>) rules are now flushed to the chart
            self.__flush2Cell(
                (p_i, p_i), ('X', p_word), 0,
                self.__getRulesFromPT(
                    p_word, (p_i, p_i)))  # Flush the entries to the cell
            #Parse.chartDict[(p_i, p_i)].printCell('X', self.sent_indx)

            # Add the glue rule S --> <X__1, X__1> in cell (0, 0)
            if p_i == 0:
                p_src = glueSrcLst[0]
                self.__getGlueRuleSpans((p_i, p_i), p_src)
                if consObjsLst:
                    Parse.chartDict[(p_i, p_i)].has_S_tree = True
                    self.__reduceCell(
                        (p_i, p_i), 'S', 'S', final_cell
                    )  # Compute the n-best list from the parse forest
                    if settings.opts.force_decode:
                        force_dec_status = Parse.chartDict[(
                            0, p_i)].forceDecodePrune(self.refsLst, final_cell)
                        if final_cell and not force_dec_status:
                            sys.stderr.write(
                                "           INFO  :: Force decode mode: No matching candidate found for cell (0, %d). Aborting!!\n"
                                % (p_i))
                            return 0
                    #Parse.chartDict[(0, p_i)].printCell('S', self.sent_indx)

            p_i += 1

        # Phase-2: Filling the CKY table
        # Iterate through all possible spans of length 2 thro' M (maximum phrase length)
        for p_l in range(1, self.sent_len):
            for p_j in range(p_l, self.sent_len):
                p_i = p_j - p_l
                #                print "\nSpan:", p_i, p_j, "\tSpan length:", p_l + 1
                # If the span length is greater than the 'maximum phrase length' skip to next iteration of p_l
                if p_l >= settings.opts.max_phr_len and p_i != 0: break

                Parse.chartDict[(p_i, p_j)] = Cell()
                p_cell_type = 'X'
                p_left_nt = 'X'
                if (p_i == 0 and p_j == self.sent_len - 1):
                    final_cell = True
                if p_l < settings.opts.max_phr_len:
                    self.__getRuleSpans(p_i, p_j,
                                        ' '.join(self.wordsLst[p_i:p_j + 1]))

                if consObjsLst:
                    self.__reduceCell((p_i, p_j), p_cell_type, p_left_nt,
                                      final_cell)
                    #Parse.chartDict[(p_i, p_j)].printCell('X', self.sent_indx)

                # For span beginning at '0' (top row in the parse triangle), add items of the form [S, i, j]:w to chart
                # Glue rules are: S --> (X__1, X__1) and S --> (S__1 X__2, S__1 X__2)
                # Sentence boundary markers <s> and </s> are added in Cube-Pruning step (lazyMerge_CP.py)
                if p_i == 0:
                    p_cell_type = 'S'
                    p_left_nt = 'S'
                    for p_src in glueSrcLst:
                        self.__getGlueRuleSpans((p_i, p_j), p_src)

                    if consObjsLst:
                        Parse.chartDict[(p_i, p_j)].has_S_tree = True
                        self.__reduceCell((p_i, p_j), p_cell_type, p_left_nt,
                                          final_cell)
                    if settings.opts.force_decode:
                        force_dec_status = Parse.chartDict[(
                            p_i,
                            p_j)].forceDecodePrune(self.refsLst, final_cell)
                        if final_cell and not force_dec_status:
                            sys.stderr.write(
                                "           INFO  :: Force decode mode: No matching candidate found for cell (0, %d). Aborting!!\n"
                                % (p_j))
                            return 0
                    #Parse.chartDict[(p_i, p_j)].printCell('S', self.sent_indx)

        p_j = self.sent_len - 1
        if not Parse.chartDict[(0, p_j)].has_S_tree:
            return 99
        Parse.chartDict[(0, p_j)].printNBest(
            'S',
            self.sent_indx)  # Print the N-best derivations in the last cell
        if settings.opts.trace_rules > 0:
            #Parse.chartDict[(0, p_j)].trackRulesUsed('S')               # Track the rules used in the top-k translations
            Parse.chartDict[(0, p_j)].printTrace(
                'S', self.sent
            )  # Prints the translation trace for the top-3 entries

        return 1