def parse(self): 'Parse the sentence passed in the argument' global consObjsLst final_cell = False glueSrcLst = ['X__1', 'S__1 X__2'] # Phase-1: Initialization # Fill the initial axioms in the chartDict (Dict of dict) in corresponding word positions p_i = 0 for p_word in self.wordsLst: # print "Span:", p_i, p_i, "\tSpan length: 1" if ( p_i == 0 and self.sent_len == 1 ): final_cell = True Parse.chartDict[(p_i, p_i)] = Cell() # if the word is UNK; add it to ruleDict as: X -> <w_i, w_i> with default prob if not PhraseTable.hasRule(p_word): (unk_score, unk_lm_heu, unk_featVec) = FeatureManager.unkRuleTup PhraseTable.addUNKRule( p_word, RuleItem.initUNKRule(p_word, unk_featVec, unk_score, unk_lm_heu) ) # Known (X -> <w_i, w_t>) or unknown (X -> <w_i, w_i>) rules are now flushed to the chart self.__flush2Cell( (p_i, p_i), ('X', p_word), 0, self.__getRulesFromPT(p_word, (p_i, p_i)) ) # Flush the entries to the cell #Parse.chartDict[(p_i, p_i)].printCell('X', self.sent_indx) # Add the glue rule S --> <X__1, X__1> in cell (0, 0) if p_i == 0: p_src = glueSrcLst[0] self.__getGlueRuleSpans((p_i, p_i), p_src) if consObjsLst: Parse.chartDict[(p_i, p_i)].has_S_tree = True self.__reduceCell((p_i, p_i), 'S', 'S', final_cell) # Compute the n-best list from the parse forest if settings.opts.force_decode: force_dec_status = Parse.chartDict[(0, p_i)].forceDecodePrune(self.refsLst, final_cell) if final_cell and not force_dec_status: sys.stderr.write(" INFO :: Force decode mode: No matching candidate found for cell (0, %d). Aborting!!\n" % (p_i)) return 0 #Parse.chartDict[(0, p_i)].printCell('S', self.sent_indx) p_i += 1 # Phase-2: Filling the CKY table # Iterate through all possible spans of length 2 thro' M (maximum phrase length) for p_l in range(1, self.sent_len): for p_j in range(p_l, self.sent_len): p_i = p_j - p_l # print "\nSpan:", p_i, p_j, "\tSpan length:", p_l + 1 # If the span length is greater than the 'maximum phrase length' skip to next iteration of p_l if p_l >= settings.opts.max_phr_len and p_i != 0: break Parse.chartDict[(p_i, p_j)] = Cell() p_cell_type = 'X' p_left_nt = 'X' if ( p_i == 0 and p_j == self.sent_len - 1 ): final_cell = True if p_l < settings.opts.max_phr_len: self.__getRuleSpans( p_i, p_j, ' '.join(self.wordsLst[p_i:p_j+1]) ) if consObjsLst: self.__reduceCell((p_i, p_j), p_cell_type, p_left_nt, final_cell) #Parse.chartDict[(p_i, p_j)].printCell('X', self.sent_indx) # For span beginning at '0' (top row in the parse triangle), add items of the form [S, i, j]:w to chart # Glue rules are: S --> (X__1, X__1) and S --> (S__1 X__2, S__1 X__2) # Sentence boundary markers <s> and </s> are added in Cube-Pruning step (lazyMerge_CP.py) if p_i == 0: p_cell_type = 'S' p_left_nt = 'S' for p_src in glueSrcLst: self.__getGlueRuleSpans((p_i, p_j), p_src) if consObjsLst: Parse.chartDict[(p_i, p_j)].has_S_tree = True self.__reduceCell((p_i, p_j), p_cell_type, p_left_nt, final_cell) if settings.opts.force_decode: force_dec_status = Parse.chartDict[(p_i, p_j)].forceDecodePrune(self.refsLst, final_cell) if final_cell and not force_dec_status: sys.stderr.write(" INFO :: Force decode mode: No matching candidate found for cell (0, %d). Aborting!!\n" % (p_j)) return 0 #Parse.chartDict[(p_i, p_j)].printCell('S', self.sent_indx) p_j = self.sent_len - 1 if not Parse.chartDict[(0, p_j)].has_S_tree: return 99 Parse.chartDict[(0, p_j)].printNBest('S', self.sent_indx) # Print the N-best derivations in the last cell if settings.opts.trace_rules > 0: #Parse.chartDict[(0, p_j)].trackRulesUsed('S') # Track the rules used in the top-k translations Parse.chartDict[(0, p_j)].printTrace('S', self.sent) # Prints the translation trace for the top-3 entries return 1
def parse(self): 'Parse the sentence passed in the argument' global consObjsLst final_cell = False glueSrcLst = ['X__1', 'S__1 X__2'] # Phase-1: Initialization # Fill the initial axioms in the chartDict (Dict of dict) in corresponding word positions p_i = 0 for p_word in self.wordsLst: # print "Span:", p_i, p_i, "\tSpan length: 1" if (p_i == 0 and self.sent_len == 1): final_cell = True Parse.chartDict[(p_i, p_i)] = Cell() # if the word is UNK; add it to ruleDict as: X -> <w_i, w_i> with default prob if not PhraseTable.hasRule(p_word): (unk_score, unk_lm_heu, unk_featVec) = FeatureManager.unkRuleTup PhraseTable.addUNKRule( p_word, RuleItem.initUNKRule(p_word, unk_featVec, unk_score, unk_lm_heu)) # Known (X -> <w_i, w_t>) or unknown (X -> <w_i, w_i>) rules are now flushed to the chart self.__flush2Cell( (p_i, p_i), ('X', p_word), 0, self.__getRulesFromPT( p_word, (p_i, p_i))) # Flush the entries to the cell #Parse.chartDict[(p_i, p_i)].printCell('X', self.sent_indx) # Add the glue rule S --> <X__1, X__1> in cell (0, 0) if p_i == 0: p_src = glueSrcLst[0] self.__getGlueRuleSpans((p_i, p_i), p_src) if consObjsLst: Parse.chartDict[(p_i, p_i)].has_S_tree = True self.__reduceCell( (p_i, p_i), 'S', 'S', final_cell ) # Compute the n-best list from the parse forest if settings.opts.force_decode: force_dec_status = Parse.chartDict[( 0, p_i)].forceDecodePrune(self.refsLst, final_cell) if final_cell and not force_dec_status: sys.stderr.write( " INFO :: Force decode mode: No matching candidate found for cell (0, %d). Aborting!!\n" % (p_i)) return 0 #Parse.chartDict[(0, p_i)].printCell('S', self.sent_indx) p_i += 1 # Phase-2: Filling the CKY table # Iterate through all possible spans of length 2 thro' M (maximum phrase length) for p_l in range(1, self.sent_len): for p_j in range(p_l, self.sent_len): p_i = p_j - p_l # print "\nSpan:", p_i, p_j, "\tSpan length:", p_l + 1 # If the span length is greater than the 'maximum phrase length' skip to next iteration of p_l if p_l >= settings.opts.max_phr_len and p_i != 0: break Parse.chartDict[(p_i, p_j)] = Cell() p_cell_type = 'X' p_left_nt = 'X' if (p_i == 0 and p_j == self.sent_len - 1): final_cell = True if p_l < settings.opts.max_phr_len: self.__getRuleSpans(p_i, p_j, ' '.join(self.wordsLst[p_i:p_j + 1])) if consObjsLst: self.__reduceCell((p_i, p_j), p_cell_type, p_left_nt, final_cell) #Parse.chartDict[(p_i, p_j)].printCell('X', self.sent_indx) # For span beginning at '0' (top row in the parse triangle), add items of the form [S, i, j]:w to chart # Glue rules are: S --> (X__1, X__1) and S --> (S__1 X__2, S__1 X__2) # Sentence boundary markers <s> and </s> are added in Cube-Pruning step (lazyMerge_CP.py) if p_i == 0: p_cell_type = 'S' p_left_nt = 'S' for p_src in glueSrcLst: self.__getGlueRuleSpans((p_i, p_j), p_src) if consObjsLst: Parse.chartDict[(p_i, p_j)].has_S_tree = True self.__reduceCell((p_i, p_j), p_cell_type, p_left_nt, final_cell) if settings.opts.force_decode: force_dec_status = Parse.chartDict[( p_i, p_j)].forceDecodePrune(self.refsLst, final_cell) if final_cell and not force_dec_status: sys.stderr.write( " INFO :: Force decode mode: No matching candidate found for cell (0, %d). Aborting!!\n" % (p_j)) return 0 #Parse.chartDict[(p_i, p_j)].printCell('S', self.sent_indx) p_j = self.sent_len - 1 if not Parse.chartDict[(0, p_j)].has_S_tree: return 99 Parse.chartDict[(0, p_j)].printNBest( 'S', self.sent_indx) # Print the N-best derivations in the last cell if settings.opts.trace_rules > 0: #Parse.chartDict[(0, p_j)].trackRulesUsed('S') # Track the rules used in the top-k translations Parse.chartDict[(0, p_j)].printTrace( 'S', self.sent ) # Prints the translation trace for the top-3 entries return 1