예제 #1
0
optparser.add_option("--fr", dest="fr", default=os.path.join("data", "train.fr"), help="French file")
optparser.add_option("--testnbest", dest="testnbest", default=os.path.join("data", "test.nbest"), help="test N-best file")
optparser.add_option("--testfr", dest="testfr", default=os.path.join("data", "test.fr"), help="test French file")

optparser.add_option("-t", "--tau", dest="tau", default=5000, help="samples generated from n-best list per input sentence (default 5000)")
optparser.add_option("-a", "--alpha", dest="alpha", default=0.1, help="sampler acceptance cutoff (default 0.1)")
optparser.add_option("-x", "--xi", dest="xi", default=100, help="training data generated from the samples tau (default 100)")
optparser.add_option("-e", "--eta", dest="eta", default=0.1, help="perceptron learning rate (default 0.1)")
optparser.add_option("-p", "--epo", dest="epo", default=5, help="number of epochs for perceptron training (default 5)")


(opts, _) = optparser.parse_args()
# entry = namedtuple("entry", "sentence, bleu_score, smoothed_bleu, feature_list")
entry = namedtuple("entry", "sentence, smoothed_bleu, feature_list")

pre_process(opts.fr, opts.nbest)
pre_process(opts.testfr, opts.testnbest)


def get_sample(nbest):
    '''
    nbest is a list of [setence, bleu_score, smoothed_bleu_score, featrue_list]
    We only use bleu_socre and smoothed_bleu_score here
    '''

    # generate all the pairs combinations
    # len(pairs) = len(nbest) * (len(nbest) - 1) / 2
    pairs = list(itertools.combinations(range(0, len(nbest)), 2))
    random.shuffle(pairs)

예제 #2
0
optparser.add_option("-x", "--xi", dest="xi", type="int", default=100, help="training data generated from the samples tau (default 100)")
optparser.add_option("-e", "--eta", dest="eta", type="float", default=0.1, help="perceptron learning rate (default 0.1)")
optparser.add_option("-p", "--epo", dest="epo", type="int",default=5, help="number of epochs for perceptron training (default 5)")


optparser.add_option("--fr", dest="fr", default=os.path.join("data", "train.fr"), help="train French file")
optparser.add_option("--testnbest", dest="testnbest", default=os.path.join("data", "test.nbest"), help="test N-best file")
optparser.add_option("--testfr", dest="testfr", default=os.path.join("data", "test.fr"), help="test French file")
optparser.add_option("--nbestDic", dest="nbestDS", default=os.path.join("data", "nbest.ds.gz"), help="dumping file of the data structure that storing scores for nbestDic")
optparser.add_option("--testen", dest="testen", default=os.path.join("data", "test.en"), help="test en")

(opts, _) = optparser.parse_args()
# entry = namedtuple("entry", "sentence, bleu_score, smoothed_bleu, feature_list")
entry = namedtuple("entry", "sentence, smoothed_bleu, feature_list")

pre_process( [(opts.fr, opts.nbest, opts.en), (opts.testfr, opts.testnbest, opts.testen)] )


def get_sample(nbest):
    '''
    nbest is a list of [setence, bleu_score, smoothed_bleu_score, featrue_list]
    We only use bleu_socre and smoothed_bleu_score here
    '''

    # generate all the pairs combinations
    # len(pairs) = len(nbest) * (len(nbest) - 1) / 2
    pairs = list(itertools.combinations(range(0, len(nbest)), 2))
    random.shuffle(pairs)


    samples = [];
예제 #3
0
                     help="test French file")
optparser.add_option(
    "--nbestDic",
    dest="nbestDS",
    default=os.path.join("data", "nbest.ds.gz"),
    help="dumping file of the data structure that storing scores for nbestDic")
optparser.add_option("--testen",
                     dest="testen",
                     default=os.path.join("data", "test.en"),
                     help="test en")

(opts, _) = optparser.parse_args()
# entry = namedtuple("entry", "sentence, bleu_score, smoothed_bleu, feature_list")
entry = namedtuple("entry", "sentence, smoothed_bleu, feature_list")

pre_process([(opts.fr, opts.nbest, opts.en),
             (opts.testfr, opts.testnbest, opts.testen)])


def get_sample(nbest):
    '''
    nbest is a list of [setence, bleu_score, smoothed_bleu_score, featrue_list]
    We only use bleu_socre and smoothed_bleu_score here
    '''

    # generate all the pairs combinations
    # len(pairs) = len(nbest) * (len(nbest) - 1) / 2
    pairs = list(itertools.combinations(range(0, len(nbest)), 2))
    random.shuffle(pairs)

    samples = []
    for pair in pairs: