def setFeatureWeights(cls, tot_lm_feats, tot_tm_feats, how_many_tm_feats): assert (tot_lm_feats == len(cls.lmWgt)), "Error: Language model param should have %d weights instead of %d" % (tot_lm_feats, len(cls.lmWgt)) assert (tot_tm_feats == len(cls.tmWgt)), "Error: Translation model param should have %d weights instead of %d" % (tot_tm_feats, len(cls.tmWgt)) for tmVec in cls.tmWgt: assert (how_many_tm_feats == len(tmVec)), "Error: # of TM features (%d) doesn't match TM weights count (%d)" % (how_many_tm_feats, len(tmVec)) cls.egivenf_offset = 2 StatefulFeatures.setLMInitLst(tot_lm_feats) cls.setUnkRule() cls.printWeights() LanguageModelManager.setLMInfo(cls.lmWgt)
def mergeEntries(self, entriesLst, cube_indx): # First process the goal: this will be a (regular/glue) rule sf_f_obj = sff.initNew(entriesLst[0].lm_heu) score = entriesLst[0].getScoreSansLmHeu() # Now process the antecedents anteHyps = [] anteSfFeats = [] anteItemsStates = [] for ante_ent in entriesLst[1:]: score += ante_ent.getScoreSansLmHeu() anteHyps.append(ante_ent.tgt) anteSfFeats.append(ante_ent.sf_feat) anteItemsStates.append(ante_ent.consItems) (tgt_hyp, newConsItems) = lmm.helperConsItem( Lazy.is_last_cell, Lazy.cell_type, Lazy.cell_span, entriesLst[0].tgt.split(), anteHyps, anteItemsStates ) if settings.opts.force_decode and not Lazy.candMatchesRef(tgt_hyp): return (score, None) # Hypothesis wouldn't lead to reference; ignore this """ Get hypothesis status from the classmethod (in Lazy); hypothesis status can take one of these three values: -2 : Hyp was not see earlier; create a new entry -1 : Hyp was seen earlier but current one has a better score; create a new entry to replace the existing one 0 : Hyp was seen earlier and has a poor score than the existing one; ignore this """ score_wo_LM = score - sf_f_obj.aggregSFScore(anteSfFeats) hyp_status = Lazy.getHypothesisStatus(tgt_hyp, score_wo_LM) """ Should we recombine hypothesis? A new hypothesis is always added; query LM for lm-score and create new entry_obj. If an identical hypothesis exists then the current hyp is added under below conditions: i) the use_unique_nbest flag is False (add new hyp; but use the LM score of the existing one) ii) use_unique_nbest is True and the new hyp is better than the existing one. """ if hyp_status == 0 and settings.opts.use_unique_nbest: entry_obj = None else: score += sf_f_obj.helperScore(newConsItems, Lazy.is_last_cell) entry_obj = Hypothesis( score, self.src_side, tgt_hyp, sf_f_obj, self.depth_hier, Lazy.cell_span, entriesLst[0], entriesLst[1:], newConsItems, ) return (score, entry_obj)
def setFeatureWeights(cls, tot_lm_feats, tot_tm_feats, how_many_tm_feats): assert ( tot_lm_feats == len(cls.lmWgt) ), "Error: Language model param should have %d weights instead of %d" % ( tot_lm_feats, len(cls.lmWgt)) assert ( tot_tm_feats == len(cls.tmWgt) ), "Error: Translation model param should have %d weights instead of %d" % ( tot_tm_feats, len(cls.tmWgt)) for tmVec in cls.tmWgt: assert ( how_many_tm_feats == len(tmVec) ), "Error: # of TM features (%d) doesn't match TM weights count (%d)" % ( how_many_tm_feats, len(tmVec)) cls.egivenf_offset = 2 StatefulFeatures.setLMInitLst(tot_lm_feats) cls.setUnkRule() cls.printWeights() LanguageModelManager.setLMInfo(cls.lmWgt)
def computeFeatures(self): agg_sl_feat = StatelessFeatures.copySLFeat(self.inf_rule.sl_feat) agg_sf_feat = StatefulFeatures.replicateSFFeat(self.sf_feat) entryStack = [ent_obj for ent_obj in self.bp] while entryStack: ent_obj = entryStack.pop(0) agg_sl_feat.aggregFeatScore(ent_obj.inf_rule.sl_feat) agg_sf_feat.aggregFeatScore(ent_obj.sf_feat) for bp_ent_obj in ent_obj.bp: entryStack.append(bp_ent_obj) return agg_sl_feat, agg_sf_feat
def mergeEntries(self, entriesLst, cube_indx): # First process the goal: this will be a (regular/glue) rule sf_f_obj = sff.initNew(entriesLst[0].lm_heu) score = entriesLst[0].getScoreSansLmHeu() # Now process the antecedents anteHyps = [] anteSfFeats = [] anteItemsStates = [] for ante_ent in entriesLst[1:]: score += ante_ent.getScoreSansLmHeu() anteHyps.append(ante_ent.tgt) anteSfFeats.append(ante_ent.sf_feat) anteItemsStates.append(ante_ent.consItems) (tgt_hyp, newConsItems) = lmm.helperConsItem(Lazy.is_last_cell, Lazy.cell_type, \ Lazy.cell_span, entriesLst[0].tgt.split(), anteHyps, anteItemsStates) if settings.opts.force_decode and not Lazy.candMatchesRef(tgt_hyp): return (score, None ) # Hypothesis wouldn't lead to reference; ignore this """ Get hypothesis status from the classmethod (in Lazy); hypothesis status can take one of these three values: -2 : Hyp was not see earlier; create a new entry -1 : Hyp was seen earlier but current one has a better score; create a new entry to replace the existing one 0 : Hyp was seen earlier and has a poor score than the existing one; ignore this """ score_wo_LM = score - sf_f_obj.aggregSFScore(anteSfFeats) hyp_status = Lazy.getHypothesisStatus(tgt_hyp, score_wo_LM) """ Should we recombine hypothesis? A new hypothesis is always added; query LM for lm-score and create new entry_obj. If an identical hypothesis exists then the current hyp is added under below conditions: i) the use_unique_nbest flag is False (add new hyp; but use the LM score of the existing one) ii) use_unique_nbest is True and the new hyp is better than the existing one. """ if (hyp_status == 0 and settings.opts.use_unique_nbest): entry_obj = None else: score += sf_f_obj.helperScore(newConsItems, Lazy.is_last_cell) entry_obj = Hypothesis(score, self.src_side, tgt_hyp, sf_f_obj, self.depth_hier, Lazy.cell_span, \ entriesLst[0], entriesLst[1:], newConsItems) return (score, entry_obj)
def createFromRule(cls, r_item, span): return Hypothesis(r_item.score, r_item.src, r_item.tgt, StatefulFeatures.initNew(r_item.lm_heu), \ 0, span, r_item, (), [ConsequentItem(r_item.tgt.split())])