Esempi in Python per FeatureManager, esempi in Python per featureManager.FeatureManager

Esempio n. 1

0

Mostra file

File: hypothesis.py Progetto: amirpouya/Kriya

    def getFeatVec(self):
        '''Return the feature values of the Hypothesis as a vector'''

        cand_hyp = self.getHypothesis()
        agg_sl_feat, agg_sf_feat = self.computeFeatures()
        feat_str = FeatureManager.formatFeatureVals(cand_hyp, agg_sl_feat, agg_sf_feat)
        return feat_str

Esempio n. 2

0

Mostra file

File: hypothesis.py Progetto: amirpouya/Kriya

    def printEntry(self):
        '''Prints the specific elements of the result'''

        cand_hyp = self.getHypothesis()
        agg_sl_feat, agg_sf_feat = self.computeFeatures()
        feat_str = FeatureManager.formatFeatureVals(cand_hyp, agg_sl_feat, agg_sf_feat)
        return cand_hyp, feat_str

Esempio n. 3

0

Mostra file

File: hypothesis.py Progetto: ChenxiCui/Kriya

    def printEntry(self):
        '''Prints the specific elements of the result'''

        cand_hyp = self.getHypothesis()
        agg_sl_feat, agg_sf_feat = self.computeFeatures()
        feat_str = FeatureManager.formatFeatureVals(cand_hyp, agg_sl_feat,
                                                    agg_sf_feat)
        return cand_hyp, feat_str

Esempio n. 4

0

Mostra file

File: hypothesis.py Progetto: ChenxiCui/Kriya

    def getFeatVec(self):
        '''Return the feature values of the Hypothesis as a vector'''

        cand_hyp = self.getHypothesis()
        agg_sl_feat, agg_sf_feat = self.computeFeatures()
        feat_str = FeatureManager.formatFeatureVals(cand_hyp, agg_sl_feat,
                                                    agg_sf_feat)
        return feat_str

Esempio n. 5

0

Mostra file

File: settings.py Progetto: amirpouya/Kriya

def args():
    import optparse
    optparser = optparse.OptionParser(usage="usage: cat input | %prog [options]")

    optparser.add_option("", "--debug", dest="debug", default=False, action="store_true", help="Debug flag")
    optparser.add_option("", "--config", dest="configFile", type="string", help="Configuration file")
    optparser.add_option("", "--one-nt-decode", dest="one_nt_decode", default=False, action="store_true", help="Run decoder in 1NT mode (ignore 2NT rules)")
    optparser.add_option("", "--shallow-hiero", dest="shallow_hiero", default=False, action="store_true", help="Flag for shallow decoding")
    optparser.add_option("", "--shallow-order", dest="sh_order", default=1, type="int", help="Shallow decoding order")
    optparser.add_option("", "--free-glue", dest="free_glue", default=True, action="store_true", help="Glue rules can freely combine any X")
    optparser.add_option("", "--index", dest="sentindex", default=0, type="int", help="Sentence index")
    optparser.add_option("", "--skip-sents", dest="skip_sents", default=None, type="int", help="Skip sentences (usefel to resume decoding mid-way)")
    optparser.add_option("", "--sentperfile", dest="sent_per_file", default=500, type="int", help="Sentences per file")
    optparser.add_option("", "--fr-rule-terms", dest="fr_rule_terms", default=5, type="int", help="Terms in French side of Hiero rules")
    optparser.add_option("", "--inputfile", dest="inFile", type="string", help="Input data file")
    optparser.add_option("", "--outputfile", dest="outFile", type="string", help="Output file")
    optparser.add_option("", "--glue-file", dest="glueFile", type="string", help="Glue rules file")
    optparser.add_option("", "--ttable-file", dest="ruleFile", type="string", help="SCFG rules file")
    optparser.add_option("", "--lmodel-file", dest="lmFile", type="string", help="LM file")
    optparser.add_option("", "--use-srilm", dest="use_srilm", default=False, action="store_true", help="Flag for using SRILM")
    optparser.add_option("", "--no-lm-state", dest="no_lm_state", default=False, action="store_true", help="Don't use LM state for KENLM")
    optparser.add_option("", "--no-dscnt-UNKlm", dest="no_dscnt_UNKlm", default=False, action="store_true", help="Don't discount LM penalty for UNK")
    optparser.add_option("", "--no-glue-penalty", dest="no_glue_penalty", default=False, action="store_true", help="Don't penalise glue rules")
    optparser.add_option("", "--tm-wgt-cnt", dest="tm_weight_cnt", default=5, type="int", help="# of TM weights")

    optparser.add_option("", "--trace-rules", dest="trace_rules", default=0, type="int", help="Trace the rules used in the k-best candidates as specified")
    optparser.add_option("", "--force-decode", dest="force_decode", default=False, action="store_true", help="Run the decoder in force decode mode")
    optparser.add_option("", "--reffile", dest="refFile", type="string", help="Reference file or prefix for multiple refs (for force decoding)")
    optparser.add_option("", "--use-local", dest="local_path", default="None", type="string", help="Local path to copy the models")
    optparser.add_option("", "--nbest-extremum", dest="nbest_extremum", default=0, type="int", help="Produce nbest_extremum entries if provided; default full nbest list")

    optparser.add_option("", "--lm", dest="weight_lm", default=1.0, type="float", help="Language model weight")
    optparser.add_option("", "--tm", dest="weight_tm", type="string", help="Translation model weights as a string")
    optparser.add_option("", "--tmf", dest="weight_tmf", default=1.0, type="float", help="Forward trans model weight")
    optparser.add_option("", "--tmr", dest="weight_tmr", default=1.0, type="float", help="Reverse trans model weight")
    optparser.add_option("", "--lwf", dest="weight_lwf", default=0.5, type="float", help="Forward lexical trans weight")
    optparser.add_option("", "--lwr", dest="weight_lwr", default=0.5, type="float", help="Reverse lexical trans weight")
    optparser.add_option("", "--pp", dest="weight_pp", default=-1.0, type="float", help="Phrase penalty weight")
    optparser.add_option("", "--wp", dest="weight_wp", default=-2.0, type="float", help="Word penalty weight")
    optparser.add_option("", "--wg", dest="weight_glue", default=0.0, type="float", help="Glue rule weight")

    optparser.add_option("", "--cbp", dest="cbp", default=250, type="int", help="Cube pruning pop limit")
    optparser.add_option("", "--cbp-diversity", dest="cbp_diversity", default=0, type="int", help="Stack diversity in Cube pruning")
    optparser.add_option("", "--ttl", dest="ttl", default=20, type="int", help="# of translations for each source span")
    optparser.add_option("", "--btx", dest="beta_x", default=0.001, type="int", help="Beam threshold for X cells")
    optparser.add_option("", "--bts", dest="beta_s", default=0.001, type="int", help="Beam threshold for S cells")
    optparser.add_option("", "--eps", dest="eps", default=0.1, type="float", help="Beam search margin")

    optparser.add_option("", "--1b", dest="one_best", default=False, action="store_true", help="Just do the best derivation")
    optparser.add_option("", "--zmert-nbest", dest="zmert_nbest", default=False, action="store_true", help="N-best list should be in zmert format")
    optparser.add_option("", "--ng", dest="n_gram_size", default=3, type="int", help="n-gram size")

    global opts, feat
    (opts, args) = optparser.parse_args()

    # Default flags & thresholds
    opts.fr_rule_terms = 5
    opts.max_phr_len = 10
    opts.nbest_limit = 100
    opts.use_unique_nbest = True
    opts.nbest_format = True
    opts.score_diff_threshold = 0.01
    opts.elider = '*__*'
    opts.lmTupLst = []
    opts.weightLM = []
    opts.weightTM = []

    if opts.configFile is None:
        sys.stderr.write('ERROR: Please specify a Config file. Exiting!!')
        sys.exit(1)
    if opts.configFile is not None:
        loadConfig()

    if opts.force_decode and not opts.refFile: 
        sys.stderr.write("ERROR: Forced decoding requires at least one reference file.\n")
        sys.stderr.write("       But, no reference file has been specified. Exiting!!\n\n")
        sys.exit(1)

    if (not opts.no_lm_state) and opts.use_srilm:
        sys.stderr.write("INFO: lm_state and srilm are mutually exclusive; no_lm_state can only be used with KENLM.\n")
        sys.stderr.write("      Setting no_lm_state to True and using SRILM\n")
        opts.no_lm_state = True

    if opts.use_srilm:
        sys.stderr.write("WARNING: SRILM wrapper is not included with Kriya and needs to be build separately.\n")
        sys.stderr.write("         Falling back to use KenLM wrapper.\n")
        sys.stderr.write("** If you would like to use SRILM, comment out/remove the lines: 94-98 in Kriya-Decoder/settings.py **\n")
        opts.use_srilm = False

    sys.stderr.write( "INFO: Using the N-gram size      : %d\n" % (opts.n_gram_size) )
    sys.stderr.write( "INFO: Run decoder in 1NT mode    : %s\n" % (opts.one_nt_decode) )
    sys.stderr.write( "INFO: Use X freely in Glue rules : %s\n" % (opts.free_glue) )
    sys.stderr.write( "INFO: # of rule terms in Fr side : %d\n" % (opts.fr_rule_terms) )
    sys.stderr.write( "INFO: Generating unique N-best   : %s\n" % (opts.use_unique_nbest) )
    sys.stderr.write( "INFO: Use state info for KENLM   : %s\n" % (not opts.no_lm_state) )
    sys.stderr.write( "INFO: Discount LM penalty 4 UNK  : %s\n" % (not opts.no_dscnt_UNKlm) )
    sys.stderr.write( "INFO: Glue rules penalty applied : %s\n" % (not opts.no_glue_penalty) )
    sys.stderr.write( "INFO: Cube pruning diversity     : %d\n" % (opts.cbp_diversity) )

    sys.stderr.write( "INFO: Force decoding status      : %s\n" % (opts.force_decode) )
    sys.stderr.write( "INFO: Reference file             : %s\n" % (opts.refFile) )

    if opts.nbest_extremum > 0:
        if opts.nbest_extremum * 2 >= opts.nbest_limit:
            opts.nbest_extremum = 20
            sys.stderr.write( "INFO: Nbest extremum must be less than half the nbest size. Using default nbest extremum of 20.\n" )
        else:
            sys.stderr.write( "INFO: Nbest extremum set: will produce top-%d and bottom-%d entries as nbest-list\n" % (opts.nbest_extremum, opts.nbest_extremum) )

    # Default weights for different features
    feat = Features()
    if opts.weight_tm:
        feat.tm = map( lambda x: float(x), opts.weight_tm.split(' ') )
    else:
        feat.tm = [opts.weight_tmf, opts.weight_tmr, opts.weight_lwf, \
                    opts.weight_lwr, opts.weight_pp]
    feat.wp = opts.weight_wp

    # Set the nbest_format to 'False' & nbest_limit to '1', if one_best option is set
    if opts.one_best:
        opts.nbest_format = False
        opts.nbest_limit = 1
        sys.stderr.write("INFO: one-best option specified. Option nbest-format will be turned off and nbest_limit set to 1.\n")
    sys.stderr.write( "INFO: cbp/ Nbest limit : %d/ %d\n" % (opts.cbp, opts.nbest_limit) )

    if opts.shallow_hiero: sys.stderr.write( "INFO: Shallow decoding hiero with order : %d...\n" % (opts.sh_order) )
    else: sys.stderr.write( "INFO: Shallow decoding hiero turned off; decoding as full hiero ...\n" )

    if opts.use_srilm: sys.stderr.write( "INFO: Using SRILM language model wrapper ...\n" )
    else: sys.stderr.write( "INFO: Using KenLM language model wrapper ...\n" )

    # Initialize the language models
    LanguageModelManager.initLMs(len(opts.weightLM), opts.lmTupLst, opts.use_srilm)

    # Set weights for the features
    FeatureManager.glue_wgt = opts.weight_glue
    FeatureManager.wp_wgt = opts.weight_wp
    FeatureManager.lmWgt = opts.weightLM[:]
    FeatureManager.tmWgt = opts.weightTM[:]
    FeatureManager.setFeatureWeights(len(opts.weightLM), len(opts.weightTM), opts.tm_weight_cnt)

    if opts.local_path is not 'None':
        sys.stderr.write( "About to copy language model locally ...\n" )
        copyModels()

Esempio n. 6

0

Mostra file