optparser.add_option("--fr", dest="fr", default=os.path.join("data", "train.fr"), help="French file") optparser.add_option("--testnbest", dest="testnbest", default=os.path.join("data", "test.nbest"), help="test N-best file") optparser.add_option("--testfr", dest="testfr", default=os.path.join("data", "test.fr"), help="test French file") optparser.add_option("-t", "--tau", dest="tau", default=5000, help="samples generated from n-best list per input sentence (default 5000)") optparser.add_option("-a", "--alpha", dest="alpha", default=0.1, help="sampler acceptance cutoff (default 0.1)") optparser.add_option("-x", "--xi", dest="xi", default=100, help="training data generated from the samples tau (default 100)") optparser.add_option("-e", "--eta", dest="eta", default=0.1, help="perceptron learning rate (default 0.1)") optparser.add_option("-p", "--epo", dest="epo", default=5, help="number of epochs for perceptron training (default 5)") (opts, _) = optparser.parse_args() # entry = namedtuple("entry", "sentence, bleu_score, smoothed_bleu, feature_list") entry = namedtuple("entry", "sentence, smoothed_bleu, feature_list") pre_process(opts.fr, opts.nbest) pre_process(opts.testfr, opts.testnbest) def get_sample(nbest): ''' nbest is a list of [setence, bleu_score, smoothed_bleu_score, featrue_list] We only use bleu_socre and smoothed_bleu_score here ''' # generate all the pairs combinations # len(pairs) = len(nbest) * (len(nbest) - 1) / 2 pairs = list(itertools.combinations(range(0, len(nbest)), 2)) random.shuffle(pairs)
optparser.add_option("-x", "--xi", dest="xi", type="int", default=100, help="training data generated from the samples tau (default 100)") optparser.add_option("-e", "--eta", dest="eta", type="float", default=0.1, help="perceptron learning rate (default 0.1)") optparser.add_option("-p", "--epo", dest="epo", type="int",default=5, help="number of epochs for perceptron training (default 5)") optparser.add_option("--fr", dest="fr", default=os.path.join("data", "train.fr"), help="train French file") optparser.add_option("--testnbest", dest="testnbest", default=os.path.join("data", "test.nbest"), help="test N-best file") optparser.add_option("--testfr", dest="testfr", default=os.path.join("data", "test.fr"), help="test French file") optparser.add_option("--nbestDic", dest="nbestDS", default=os.path.join("data", "nbest.ds.gz"), help="dumping file of the data structure that storing scores for nbestDic") optparser.add_option("--testen", dest="testen", default=os.path.join("data", "test.en"), help="test en") (opts, _) = optparser.parse_args() # entry = namedtuple("entry", "sentence, bleu_score, smoothed_bleu, feature_list") entry = namedtuple("entry", "sentence, smoothed_bleu, feature_list") pre_process( [(opts.fr, opts.nbest, opts.en), (opts.testfr, opts.testnbest, opts.testen)] ) def get_sample(nbest): ''' nbest is a list of [setence, bleu_score, smoothed_bleu_score, featrue_list] We only use bleu_socre and smoothed_bleu_score here ''' # generate all the pairs combinations # len(pairs) = len(nbest) * (len(nbest) - 1) / 2 pairs = list(itertools.combinations(range(0, len(nbest)), 2)) random.shuffle(pairs) samples = [];
help="test French file") optparser.add_option( "--nbestDic", dest="nbestDS", default=os.path.join("data", "nbest.ds.gz"), help="dumping file of the data structure that storing scores for nbestDic") optparser.add_option("--testen", dest="testen", default=os.path.join("data", "test.en"), help="test en") (opts, _) = optparser.parse_args() # entry = namedtuple("entry", "sentence, bleu_score, smoothed_bleu, feature_list") entry = namedtuple("entry", "sentence, smoothed_bleu, feature_list") pre_process([(opts.fr, opts.nbest, opts.en), (opts.testfr, opts.testnbest, opts.testen)]) def get_sample(nbest): ''' nbest is a list of [setence, bleu_score, smoothed_bleu_score, featrue_list] We only use bleu_socre and smoothed_bleu_score here ''' # generate all the pairs combinations # len(pairs) = len(nbest) * (len(nbest) - 1) / 2 pairs = list(itertools.combinations(range(0, len(nbest)), 2)) random.shuffle(pairs) samples = [] for pair in pairs: