コード例 #1
0
ファイル: featureManager.py プロジェクト: amirpouya/Kriya
    def setFeatureWeights(cls, tot_lm_feats, tot_tm_feats, how_many_tm_feats):
        assert (tot_lm_feats == len(cls.lmWgt)), "Error: Language model param should have %d weights instead of %d" % (tot_lm_feats, len(cls.lmWgt))
        assert (tot_tm_feats == len(cls.tmWgt)), "Error: Translation model param should have %d weights instead of %d" % (tot_tm_feats, len(cls.tmWgt))
        for tmVec in cls.tmWgt:
            assert (how_many_tm_feats == len(tmVec)), "Error: # of TM features (%d) doesn't match TM weights count (%d)" % (how_many_tm_feats, len(tmVec))

        cls.egivenf_offset = 2
        StatefulFeatures.setLMInitLst(tot_lm_feats)
        cls.setUnkRule()
        cls.printWeights()
        LanguageModelManager.setLMInfo(cls.lmWgt)
コード例 #2
0
 def helperScore(self, newConsItems, is_last_cell):
     '''Helper function for computing stateful scores (recomputes lm_heu)'''
     (frag_lm_score, lm_comp_heu) = lmm.helperLM(newConsItems, is_last_cell,
                                                 self.lmFVec)
     self.comp_score += frag_lm_score
     self.lm_heu = lm_comp_heu
     return frag_lm_score + self.lm_heu
コード例 #3
0
ファイル: lazyMerge_CP.py プロジェクト: vsooda/Kriya
    def mergeEntries(self, entriesLst, cube_indx):

        # First process the goal: this will be a (regular/glue) rule
        sf_f_obj = sff.initNew(entriesLst[0].lm_heu)
        score = entriesLst[0].getScoreSansLmHeu()

        # Now process the antecedents
        anteHyps = []
        anteSfFeats = []
        anteItemsStates = []
        for ante_ent in entriesLst[1:]:
            score += ante_ent.getScoreSansLmHeu()
            anteHyps.append(ante_ent.tgt)
            anteSfFeats.append(ante_ent.sf_feat)
            anteItemsStates.append(ante_ent.consItems)

        (tgt_hyp, newConsItems) = lmm.helperConsItem(
            Lazy.is_last_cell, Lazy.cell_type, Lazy.cell_span, entriesLst[0].tgt.split(), anteHyps, anteItemsStates
        )

        if settings.opts.force_decode and not Lazy.candMatchesRef(tgt_hyp):
            return (score, None)  # Hypothesis wouldn't lead to reference; ignore this

        """
            Get hypothesis status from the classmethod (in Lazy); hypothesis status can take one of these three values:
            -2 : Hyp was not see earlier; create a new entry
            -1 : Hyp was seen earlier but current one has a better score; create a new entry to replace the existing one
             0 : Hyp was seen earlier and has a poor score than the existing one; ignore this
        """
        score_wo_LM = score - sf_f_obj.aggregSFScore(anteSfFeats)
        hyp_status = Lazy.getHypothesisStatus(tgt_hyp, score_wo_LM)

        """ Should we recombine hypothesis?
            A new hypothesis is always added; query LM for lm-score and create new entry_obj.
            If an identical hypothesis exists then the current hyp is added under below conditions:
            i) the use_unique_nbest flag is False (add new hyp; but use the LM score of the existing one)
            ii) use_unique_nbest is True and the new hyp is better than the existing one.
        """
        if hyp_status == 0 and settings.opts.use_unique_nbest:
            entry_obj = None
        else:
            score += sf_f_obj.helperScore(newConsItems, Lazy.is_last_cell)
            entry_obj = Hypothesis(
                score,
                self.src_side,
                tgt_hyp,
                sf_f_obj,
                self.depth_hier,
                Lazy.cell_span,
                entriesLst[0],
                entriesLst[1:],
                newConsItems,
            )

        return (score, entry_obj)
コード例 #4
0
ファイル: featureManager.py プロジェクト: ChenxiCui/Kriya
    def setFeatureWeights(cls, tot_lm_feats, tot_tm_feats, how_many_tm_feats):
        assert (
            tot_lm_feats == len(cls.lmWgt)
        ), "Error: Language model param should have %d weights instead of %d" % (
            tot_lm_feats, len(cls.lmWgt))
        assert (
            tot_tm_feats == len(cls.tmWgt)
        ), "Error: Translation model param should have %d weights instead of %d" % (
            tot_tm_feats, len(cls.tmWgt))
        for tmVec in cls.tmWgt:
            assert (
                how_many_tm_feats == len(tmVec)
            ), "Error: # of TM features (%d) doesn't match TM weights count (%d)" % (
                how_many_tm_feats, len(tmVec))

        cls.egivenf_offset = 2
        StatefulFeatures.setLMInitLst(tot_lm_feats)
        cls.setUnkRule()
        cls.printWeights()
        LanguageModelManager.setLMInfo(cls.lmWgt)
コード例 #5
0
ファイル: lazyMerge_CP.py プロジェクト: ChenxiCui/Kriya
    def mergeEntries(self, entriesLst, cube_indx):

        # First process the goal: this will be a (regular/glue) rule
        sf_f_obj = sff.initNew(entriesLst[0].lm_heu)
        score = entriesLst[0].getScoreSansLmHeu()

        # Now process the antecedents
        anteHyps = []
        anteSfFeats = []
        anteItemsStates = []
        for ante_ent in entriesLst[1:]:
            score += ante_ent.getScoreSansLmHeu()
            anteHyps.append(ante_ent.tgt)
            anteSfFeats.append(ante_ent.sf_feat)
            anteItemsStates.append(ante_ent.consItems)

        (tgt_hyp, newConsItems) = lmm.helperConsItem(Lazy.is_last_cell, Lazy.cell_type, \
                                    Lazy.cell_span, entriesLst[0].tgt.split(), anteHyps, anteItemsStates)

        if settings.opts.force_decode and not Lazy.candMatchesRef(tgt_hyp):
            return (score, None
                    )  # Hypothesis wouldn't lead to reference; ignore this
        """
            Get hypothesis status from the classmethod (in Lazy); hypothesis status can take one of these three values:
            -2 : Hyp was not see earlier; create a new entry
            -1 : Hyp was seen earlier but current one has a better score; create a new entry to replace the existing one
             0 : Hyp was seen earlier and has a poor score than the existing one; ignore this
        """
        score_wo_LM = score - sf_f_obj.aggregSFScore(anteSfFeats)
        hyp_status = Lazy.getHypothesisStatus(tgt_hyp, score_wo_LM)
        """ Should we recombine hypothesis?
            A new hypothesis is always added; query LM for lm-score and create new entry_obj.
            If an identical hypothesis exists then the current hyp is added under below conditions:
            i) the use_unique_nbest flag is False (add new hyp; but use the LM score of the existing one)
            ii) use_unique_nbest is True and the new hyp is better than the existing one.
        """
        if (hyp_status == 0 and settings.opts.use_unique_nbest):
            entry_obj = None
        else:
            score += sf_f_obj.helperScore(newConsItems, Lazy.is_last_cell)
            entry_obj = Hypothesis(score, self.src_side, tgt_hyp, sf_f_obj, self.depth_hier, Lazy.cell_span, \
                                     entriesLst[0], entriesLst[1:], newConsItems)

        return (score, entry_obj)
コード例 #6
0
ファイル: features.py プロジェクト: amirpouya/Kriya
 def stringifyMembers(self, cand_hyp):
     return lmm.adjustUNKLMScore(cand_hyp, self.lmFVec)
コード例 #7
0
ファイル: features.py プロジェクト: amirpouya/Kriya
 def helperScore(self, newConsItems, is_last_cell):
     '''Helper function for computing stateful scores (recomputes lm_heu)'''
     (frag_lm_score, lm_comp_heu) = lmm.helperLM(newConsItems, is_last_cell, self.lmFVec)
     self.comp_score += frag_lm_score
     self.lm_heu = lm_comp_heu
     return frag_lm_score + self.lm_heu
コード例 #8
0
ファイル: settings.py プロジェクト: amirpouya/Kriya
def args():
    import optparse
    optparser = optparse.OptionParser(usage="usage: cat input | %prog [options]")

    optparser.add_option("", "--debug", dest="debug", default=False, action="store_true", help="Debug flag")
    optparser.add_option("", "--config", dest="configFile", type="string", help="Configuration file")
    optparser.add_option("", "--one-nt-decode", dest="one_nt_decode", default=False, action="store_true", help="Run decoder in 1NT mode (ignore 2NT rules)")
    optparser.add_option("", "--shallow-hiero", dest="shallow_hiero", default=False, action="store_true", help="Flag for shallow decoding")
    optparser.add_option("", "--shallow-order", dest="sh_order", default=1, type="int", help="Shallow decoding order")
    optparser.add_option("", "--free-glue", dest="free_glue", default=True, action="store_true", help="Glue rules can freely combine any X")
    optparser.add_option("", "--index", dest="sentindex", default=0, type="int", help="Sentence index")
    optparser.add_option("", "--skip-sents", dest="skip_sents", default=None, type="int", help="Skip sentences (usefel to resume decoding mid-way)")
    optparser.add_option("", "--sentperfile", dest="sent_per_file", default=500, type="int", help="Sentences per file")
    optparser.add_option("", "--fr-rule-terms", dest="fr_rule_terms", default=5, type="int", help="Terms in French side of Hiero rules")
    optparser.add_option("", "--inputfile", dest="inFile", type="string", help="Input data file")
    optparser.add_option("", "--outputfile", dest="outFile", type="string", help="Output file")
    optparser.add_option("", "--glue-file", dest="glueFile", type="string", help="Glue rules file")
    optparser.add_option("", "--ttable-file", dest="ruleFile", type="string", help="SCFG rules file")
    optparser.add_option("", "--lmodel-file", dest="lmFile", type="string", help="LM file")
    optparser.add_option("", "--use-srilm", dest="use_srilm", default=False, action="store_true", help="Flag for using SRILM")
    optparser.add_option("", "--no-lm-state", dest="no_lm_state", default=False, action="store_true", help="Don't use LM state for KENLM")
    optparser.add_option("", "--no-dscnt-UNKlm", dest="no_dscnt_UNKlm", default=False, action="store_true", help="Don't discount LM penalty for UNK")
    optparser.add_option("", "--no-glue-penalty", dest="no_glue_penalty", default=False, action="store_true", help="Don't penalise glue rules")
    optparser.add_option("", "--tm-wgt-cnt", dest="tm_weight_cnt", default=5, type="int", help="# of TM weights")

    optparser.add_option("", "--trace-rules", dest="trace_rules", default=0, type="int", help="Trace the rules used in the k-best candidates as specified")
    optparser.add_option("", "--force-decode", dest="force_decode", default=False, action="store_true", help="Run the decoder in force decode mode")
    optparser.add_option("", "--reffile", dest="refFile", type="string", help="Reference file or prefix for multiple refs (for force decoding)")
    optparser.add_option("", "--use-local", dest="local_path", default="None", type="string", help="Local path to copy the models")
    optparser.add_option("", "--nbest-extremum", dest="nbest_extremum", default=0, type="int", help="Produce nbest_extremum entries if provided; default full nbest list")

    optparser.add_option("", "--lm", dest="weight_lm", default=1.0, type="float", help="Language model weight")
    optparser.add_option("", "--tm", dest="weight_tm", type="string", help="Translation model weights as a string")
    optparser.add_option("", "--tmf", dest="weight_tmf", default=1.0, type="float", help="Forward trans model weight")
    optparser.add_option("", "--tmr", dest="weight_tmr", default=1.0, type="float", help="Reverse trans model weight")
    optparser.add_option("", "--lwf", dest="weight_lwf", default=0.5, type="float", help="Forward lexical trans weight")
    optparser.add_option("", "--lwr", dest="weight_lwr", default=0.5, type="float", help="Reverse lexical trans weight")
    optparser.add_option("", "--pp", dest="weight_pp", default=-1.0, type="float", help="Phrase penalty weight")
    optparser.add_option("", "--wp", dest="weight_wp", default=-2.0, type="float", help="Word penalty weight")
    optparser.add_option("", "--wg", dest="weight_glue", default=0.0, type="float", help="Glue rule weight")

    optparser.add_option("", "--cbp", dest="cbp", default=250, type="int", help="Cube pruning pop limit")
    optparser.add_option("", "--cbp-diversity", dest="cbp_diversity", default=0, type="int", help="Stack diversity in Cube pruning")
    optparser.add_option("", "--ttl", dest="ttl", default=20, type="int", help="# of translations for each source span")
    optparser.add_option("", "--btx", dest="beta_x", default=0.001, type="int", help="Beam threshold for X cells")
    optparser.add_option("", "--bts", dest="beta_s", default=0.001, type="int", help="Beam threshold for S cells")
    optparser.add_option("", "--eps", dest="eps", default=0.1, type="float", help="Beam search margin")

    optparser.add_option("", "--1b", dest="one_best", default=False, action="store_true", help="Just do the best derivation")
    optparser.add_option("", "--zmert-nbest", dest="zmert_nbest", default=False, action="store_true", help="N-best list should be in zmert format")
    optparser.add_option("", "--ng", dest="n_gram_size", default=3, type="int", help="n-gram size")

    global opts, feat
    (opts, args) = optparser.parse_args()

    # Default flags & thresholds
    opts.fr_rule_terms = 5
    opts.max_phr_len = 10
    opts.nbest_limit = 100
    opts.use_unique_nbest = True
    opts.nbest_format = True
    opts.score_diff_threshold = 0.01
    opts.elider = '*__*'
    opts.lmTupLst = []
    opts.weightLM = []
    opts.weightTM = []

    if opts.configFile is None:
        sys.stderr.write('ERROR: Please specify a Config file. Exiting!!')
        sys.exit(1)
    if opts.configFile is not None:
        loadConfig()

    if opts.force_decode and not opts.refFile: 
        sys.stderr.write("ERROR: Forced decoding requires at least one reference file.\n")
        sys.stderr.write("       But, no reference file has been specified. Exiting!!\n\n")
        sys.exit(1)

    if (not opts.no_lm_state) and opts.use_srilm:
        sys.stderr.write("INFO: lm_state and srilm are mutually exclusive; no_lm_state can only be used with KENLM.\n")
        sys.stderr.write("      Setting no_lm_state to True and using SRILM\n")
        opts.no_lm_state = True

    if opts.use_srilm:
        sys.stderr.write("WARNING: SRILM wrapper is not included with Kriya and needs to be build separately.\n")
        sys.stderr.write("         Falling back to use KenLM wrapper.\n")
        sys.stderr.write("** If you would like to use SRILM, comment out/remove the lines: 94-98 in Kriya-Decoder/settings.py **\n")
        opts.use_srilm = False

    sys.stderr.write( "INFO: Using the N-gram size      : %d\n" % (opts.n_gram_size) )
    sys.stderr.write( "INFO: Run decoder in 1NT mode    : %s\n" % (opts.one_nt_decode) )
    sys.stderr.write( "INFO: Use X freely in Glue rules : %s\n" % (opts.free_glue) )
    sys.stderr.write( "INFO: # of rule terms in Fr side : %d\n" % (opts.fr_rule_terms) )
    sys.stderr.write( "INFO: Generating unique N-best   : %s\n" % (opts.use_unique_nbest) )
    sys.stderr.write( "INFO: Use state info for KENLM   : %s\n" % (not opts.no_lm_state) )
    sys.stderr.write( "INFO: Discount LM penalty 4 UNK  : %s\n" % (not opts.no_dscnt_UNKlm) )
    sys.stderr.write( "INFO: Glue rules penalty applied : %s\n" % (not opts.no_glue_penalty) )
    sys.stderr.write( "INFO: Cube pruning diversity     : %d\n" % (opts.cbp_diversity) )

    sys.stderr.write( "INFO: Force decoding status      : %s\n" % (opts.force_decode) )
    sys.stderr.write( "INFO: Reference file             : %s\n" % (opts.refFile) )

    if opts.nbest_extremum > 0:
        if opts.nbest_extremum * 2 >= opts.nbest_limit:
            opts.nbest_extremum = 20
            sys.stderr.write( "INFO: Nbest extremum must be less than half the nbest size. Using default nbest extremum of 20.\n" )
        else:
            sys.stderr.write( "INFO: Nbest extremum set: will produce top-%d and bottom-%d entries as nbest-list\n" % (opts.nbest_extremum, opts.nbest_extremum) )

    # Default weights for different features
    feat = Features()
    if opts.weight_tm:
        feat.tm = map( lambda x: float(x), opts.weight_tm.split(' ') )
    else:
        feat.tm = [opts.weight_tmf, opts.weight_tmr, opts.weight_lwf, \
                    opts.weight_lwr, opts.weight_pp]
    feat.wp = opts.weight_wp

    # Set the nbest_format to 'False' & nbest_limit to '1', if one_best option is set
    if opts.one_best:
        opts.nbest_format = False
        opts.nbest_limit = 1
        sys.stderr.write("INFO: one-best option specified. Option nbest-format will be turned off and nbest_limit set to 1.\n")
    sys.stderr.write( "INFO: cbp/ Nbest limit : %d/ %d\n" % (opts.cbp, opts.nbest_limit) )

    if opts.shallow_hiero: sys.stderr.write( "INFO: Shallow decoding hiero with order : %d...\n" % (opts.sh_order) )
    else: sys.stderr.write( "INFO: Shallow decoding hiero turned off; decoding as full hiero ...\n" )

    if opts.use_srilm: sys.stderr.write( "INFO: Using SRILM language model wrapper ...\n" )
    else: sys.stderr.write( "INFO: Using KenLM language model wrapper ...\n" )

    # Initialize the language models
    LanguageModelManager.initLMs(len(opts.weightLM), opts.lmTupLst, opts.use_srilm)

    # Set weights for the features
    FeatureManager.glue_wgt = opts.weight_glue
    FeatureManager.wp_wgt = opts.weight_wp
    FeatureManager.lmWgt = opts.weightLM[:]
    FeatureManager.tmWgt = opts.weightTM[:]
    FeatureManager.setFeatureWeights(len(opts.weightLM), len(opts.weightTM), opts.tm_weight_cnt)

    if opts.local_path is not 'None':
        sys.stderr.write( "About to copy language model locally ...\n" )
        copyModels()
コード例 #9
0
ファイル: ruleItem.py プロジェクト: ChenxiCui/Kriya
 def scoreRule(self):
     p_score = FeatureManager.scorePTEntry(self.sl_feat)
     lm_score = LanguageModelManager.scoreLMFeat(self.tgt)
     self.lm_heu = lm_score
     self.score = p_score + lm_score
コード例 #10
0
 def stringifyMembers(self, cand_hyp):
     return lmm.adjustUNKLMScore(cand_hyp, self.lmFVec)
コード例 #11
0
def args():
    import optparse
    optparser = optparse.OptionParser(
        usage="usage: cat input | %prog [options]")

    optparser.add_option("",
                         "--debug",
                         dest="debug",
                         default=False,
                         action="store_true",
                         help="Debug flag")
    optparser.add_option("",
                         "--config",
                         dest="configFile",
                         type="string",
                         help="Configuration file")
    optparser.add_option("",
                         "--one-nt-decode",
                         dest="one_nt_decode",
                         default=False,
                         action="store_true",
                         help="Run decoder in 1NT mode (ignore 2NT rules)")
    optparser.add_option("",
                         "--shallow-hiero",
                         dest="shallow_hiero",
                         default=False,
                         action="store_true",
                         help="Flag for shallow decoding")
    optparser.add_option("",
                         "--shallow-order",
                         dest="sh_order",
                         default=1,
                         type="int",
                         help="Shallow decoding order")
    optparser.add_option("",
                         "--free-glue",
                         dest="free_glue",
                         default=True,
                         action="store_true",
                         help="Glue rules can freely combine any X")
    optparser.add_option("",
                         "--index",
                         dest="sentindex",
                         default=0,
                         type="int",
                         help="Sentence index")
    optparser.add_option(
        "",
        "--skip-sents",
        dest="skip_sents",
        default=None,
        type="int",
        help="Skip sentences (usefel to resume decoding mid-way)")
    optparser.add_option("",
                         "--sentperfile",
                         dest="sent_per_file",
                         default=500,
                         type="int",
                         help="Sentences per file")
    optparser.add_option("",
                         "--fr-rule-terms",
                         dest="fr_rule_terms",
                         default=5,
                         type="int",
                         help="Terms in French side of Hiero rules")
    optparser.add_option("",
                         "--inputfile",
                         dest="inFile",
                         type="string",
                         help="Input data file")
    optparser.add_option("",
                         "--outputfile",
                         dest="outFile",
                         type="string",
                         help="Output file")
    optparser.add_option("",
                         "--glue-file",
                         dest="glueFile",
                         type="string",
                         help="Glue rules file")
    optparser.add_option("",
                         "--ttable-file",
                         dest="ruleFile",
                         type="string",
                         help="SCFG rules file")
    optparser.add_option("",
                         "--lmodel-file",
                         dest="lmFile",
                         type="string",
                         help="LM file")
    optparser.add_option("",
                         "--use-srilm",
                         dest="use_srilm",
                         default=False,
                         action="store_true",
                         help="Flag for using SRILM")
    optparser.add_option("",
                         "--no-lm-state",
                         dest="no_lm_state",
                         default=False,
                         action="store_true",
                         help="Don't use LM state for KENLM")
    optparser.add_option("",
                         "--no-dscnt-UNKlm",
                         dest="no_dscnt_UNKlm",
                         default=False,
                         action="store_true",
                         help="Don't discount LM penalty for UNK")
    optparser.add_option("",
                         "--no-glue-penalty",
                         dest="no_glue_penalty",
                         default=False,
                         action="store_true",
                         help="Don't penalise glue rules")
    optparser.add_option("",
                         "--tm-wgt-cnt",
                         dest="tm_weight_cnt",
                         default=5,
                         type="int",
                         help="# of TM weights")

    optparser.add_option(
        "",
        "--trace-rules",
        dest="trace_rules",
        default=0,
        type="int",
        help="Trace the rules used in the k-best candidates as specified")
    optparser.add_option("",
                         "--force-decode",
                         dest="force_decode",
                         default=False,
                         action="store_true",
                         help="Run the decoder in force decode mode")
    optparser.add_option(
        "",
        "--reffile",
        dest="refFile",
        type="string",
        help="Reference file or prefix for multiple refs (for force decoding)")
    optparser.add_option("",
                         "--use-local",
                         dest="local_path",
                         default="None",
                         type="string",
                         help="Local path to copy the models")
    optparser.add_option(
        "",
        "--nbest-extremum",
        dest="nbest_extremum",
        default=0,
        type="int",
        help=
        "Produce nbest_extremum entries if provided; default full nbest list")

    optparser.add_option("",
                         "--lm",
                         dest="weight_lm",
                         default=1.0,
                         type="float",
                         help="Language model weight")
    optparser.add_option("",
                         "--tm",
                         dest="weight_tm",
                         type="string",
                         help="Translation model weights as a string")
    optparser.add_option("",
                         "--tmf",
                         dest="weight_tmf",
                         default=1.0,
                         type="float",
                         help="Forward trans model weight")
    optparser.add_option("",
                         "--tmr",
                         dest="weight_tmr",
                         default=1.0,
                         type="float",
                         help="Reverse trans model weight")
    optparser.add_option("",
                         "--lwf",
                         dest="weight_lwf",
                         default=0.5,
                         type="float",
                         help="Forward lexical trans weight")
    optparser.add_option("",
                         "--lwr",
                         dest="weight_lwr",
                         default=0.5,
                         type="float",
                         help="Reverse lexical trans weight")
    optparser.add_option("",
                         "--pp",
                         dest="weight_pp",
                         default=-1.0,
                         type="float",
                         help="Phrase penalty weight")
    optparser.add_option("",
                         "--wp",
                         dest="weight_wp",
                         default=-2.0,
                         type="float",
                         help="Word penalty weight")
    optparser.add_option("",
                         "--wg",
                         dest="weight_glue",
                         default=0.0,
                         type="float",
                         help="Glue rule weight")

    optparser.add_option("",
                         "--cbp",
                         dest="cbp",
                         default=250,
                         type="int",
                         help="Cube pruning pop limit")
    optparser.add_option("",
                         "--cbp-diversity",
                         dest="cbp_diversity",
                         default=0,
                         type="int",
                         help="Stack diversity in Cube pruning")
    optparser.add_option("",
                         "--ttl",
                         dest="ttl",
                         default=20,
                         type="int",
                         help="# of translations for each source span")
    optparser.add_option("",
                         "--btx",
                         dest="beta_x",
                         default=0.001,
                         type="int",
                         help="Beam threshold for X cells")
    optparser.add_option("",
                         "--bts",
                         dest="beta_s",
                         default=0.001,
                         type="int",
                         help="Beam threshold for S cells")
    optparser.add_option("",
                         "--eps",
                         dest="eps",
                         default=0.1,
                         type="float",
                         help="Beam search margin")

    optparser.add_option("",
                         "--1b",
                         dest="one_best",
                         default=False,
                         action="store_true",
                         help="Just do the best derivation")
    optparser.add_option("",
                         "--zmert-nbest",
                         dest="zmert_nbest",
                         default=False,
                         action="store_true",
                         help="N-best list should be in zmert format")
    optparser.add_option("",
                         "--ng",
                         dest="n_gram_size",
                         default=3,
                         type="int",
                         help="n-gram size")

    global opts, feat
    (opts, args) = optparser.parse_args()

    # Default flags & thresholds
    opts.fr_rule_terms = 5
    opts.max_phr_len = 10
    opts.nbest_limit = 100
    opts.use_unique_nbest = True
    opts.nbest_format = True
    opts.score_diff_threshold = 0.01
    opts.elider = '*__*'
    opts.lmTupLst = []
    opts.weightLM = []
    opts.weightTM = []

    if opts.configFile is None:
        sys.stderr.write('ERROR: Please specify a Config file. Exiting!!')
        sys.exit(1)
    if opts.configFile is not None:
        loadConfig()

    if opts.force_decode and not opts.refFile:
        sys.stderr.write(
            "ERROR: Forced decoding requires at least one reference file.\n")
        sys.stderr.write(
            "       But, no reference file has been specified. Exiting!!\n\n")
        sys.exit(1)

    if (not opts.no_lm_state) and opts.use_srilm:
        sys.stderr.write(
            "INFO: lm_state and srilm are mutually exclusive; no_lm_state can only be used with KENLM.\n"
        )
        sys.stderr.write("      Setting no_lm_state to True and using SRILM\n")
        opts.no_lm_state = True

    if opts.use_srilm:
        sys.stderr.write(
            "WARNING: SRILM wrapper is not included with Kriya and needs to be build separately.\n"
        )
        sys.stderr.write("         Falling back to use KenLM wrapper.\n")
        sys.stderr.write(
            "** If you would like to use SRILM, comment out/remove the lines: 94-98 in Kriya-Decoder/settings.py **\n"
        )
        opts.use_srilm = False

    sys.stderr.write("INFO: Using the N-gram size      : %d\n" %
                     (opts.n_gram_size))
    sys.stderr.write("INFO: Run decoder in 1NT mode    : %s\n" %
                     (opts.one_nt_decode))
    sys.stderr.write("INFO: Use X freely in Glue rules : %s\n" %
                     (opts.free_glue))
    sys.stderr.write("INFO: # of rule terms in Fr side : %d\n" %
                     (opts.fr_rule_terms))
    sys.stderr.write("INFO: Generating unique N-best   : %s\n" %
                     (opts.use_unique_nbest))
    sys.stderr.write("INFO: Use state info for KENLM   : %s\n" %
                     (not opts.no_lm_state))
    sys.stderr.write("INFO: Discount LM penalty 4 UNK  : %s\n" %
                     (not opts.no_dscnt_UNKlm))
    sys.stderr.write("INFO: Glue rules penalty applied : %s\n" %
                     (not opts.no_glue_penalty))
    sys.stderr.write("INFO: Cube pruning diversity     : %d\n" %
                     (opts.cbp_diversity))

    sys.stderr.write("INFO: Force decoding status      : %s\n" %
                     (opts.force_decode))
    sys.stderr.write("INFO: Reference file             : %s\n" %
                     (opts.refFile))

    if opts.nbest_extremum > 0:
        if opts.nbest_extremum * 2 >= opts.nbest_limit:
            opts.nbest_extremum = 20
            sys.stderr.write(
                "INFO: Nbest extremum must be less than half the nbest size. Using default nbest extremum of 20.\n"
            )
        else:
            sys.stderr.write(
                "INFO: Nbest extremum set: will produce top-%d and bottom-%d entries as nbest-list\n"
                % (opts.nbest_extremum, opts.nbest_extremum))

    # Default weights for different features
    feat = Features()
    if opts.weight_tm:
        feat.tm = map(lambda x: float(x), opts.weight_tm.split(' '))
    else:
        feat.tm = [opts.weight_tmf, opts.weight_tmr, opts.weight_lwf, \
                    opts.weight_lwr, opts.weight_pp]
    feat.wp = opts.weight_wp

    # Set the nbest_format to 'False' & nbest_limit to '1', if one_best option is set
    if opts.one_best:
        opts.nbest_format = False
        opts.nbest_limit = 1
        sys.stderr.write(
            "INFO: one-best option specified. Option nbest-format will be turned off and nbest_limit set to 1.\n"
        )
    sys.stderr.write("INFO: cbp/ Nbest limit : %d/ %d\n" %
                     (opts.cbp, opts.nbest_limit))

    if opts.shallow_hiero:
        sys.stderr.write("INFO: Shallow decoding hiero with order : %d...\n" %
                         (opts.sh_order))
    else:
        sys.stderr.write(
            "INFO: Shallow decoding hiero turned off; decoding as full hiero ...\n"
        )

    if opts.use_srilm:
        sys.stderr.write("INFO: Using SRILM language model wrapper ...\n")
    else:
        sys.stderr.write("INFO: Using KenLM language model wrapper ...\n")

    # Initialize the language models
    LanguageModelManager.initLMs(len(opts.weightLM), opts.lmTupLst,
                                 opts.use_srilm)

    # Set weights for the features
    FeatureManager.glue_wgt = opts.weight_glue
    FeatureManager.wp_wgt = opts.weight_wp
    FeatureManager.lmWgt = opts.weightLM[:]
    FeatureManager.tmWgt = opts.weightTM[:]
    FeatureManager.setFeatureWeights(len(opts.weightLM), len(opts.weightTM),
                                     opts.tm_weight_cnt)

    if opts.local_path is not 'None':
        sys.stderr.write("About to copy language model locally ...\n")
        copyModels()