Ejemplo n.º 1
0
def main(outputFilename):
  sysSemis = loadResults(SysSemiFile)
  sysNonSemis = loadResults(SysNonSemiFile)
  sysAll = sysSemis + sysNonSemis
  baseSemis = loadResults(BaseSemiFile)
  baseNonSemis = loadResults(BaseNonSemiFile)
  baseAll = baseSemis + baseNonSemis
  output = ""
  for (sys, base) in [(sysSemis, baseSemis),
                      (sysNonSemis, baseNonSemis),
                      (sysAll, baseAll)]:
    sys = [(y, z) for (_, y, z) in sys]
    base = [(y, z) for (_, y, z) in base]
    output += "$n$ & System Best & Baseline Best & System Random & Baseline Random\n"
    for n in [1, 2, 3]:
      floatLen = 6
      sysBest = bleu(sys, n, True)
      baseBest = bleu(base, n, True)
      # since the random results vary from one call to another, we average over
      # many calls
      sysRand = mean([bleu(sys, n, False) for _ in xrange(100)])
      baseRand = mean([bleu(base, n, False) for _ in xrange(100)])
      output += "%d & %s & %s & %s & %s\n" \
                % (n, str(sysBest)[:floatLen], str(baseBest)[:floatLen], \
                      str(sysRand)[:floatLen], str(baseRand)[:floatLen])
    output += "\n" 
  output = output[:-1] # exclude extra newline
  fl = open(outputFilename, "w")
  fl.write(output)
  fl.close()
Ejemplo n.º 2
0
def evaluate(batch_idx, epoch):
    model.eval()
    hyp_list = []
    ref_list = []
    start_time = time.time()
    for ix, batch in enumerate(valid_iter, start=1):
        src_raw = batch[0]
        trg_raw = batch[1:]
        src, src_mask = convert_data(
            src_raw, src_vocab, device, True, UNK, PAD, SOS, EOS
        )
        with torch.no_grad():
            output = model.beamsearch(src, src_mask, opt.beam_size, normalize=True)
            best_hyp, best_score = output[0]
            best_hyp = convert_str([best_hyp], trg_vocab)
            hyp_list.append(best_hyp[0])
            ref = [x[0] for x in trg_raw]
            ref_list.append(ref)
    elapsed = time.time() - start_time

    hyp_list = [" ".join(x) for x in hyp_list]
    p_tmp = tempfile.mktemp()
    f_tmp = open(p_tmp, "w")
    f_tmp.write("\n".join(hyp_list))
    f_tmp.close()
    bleu2 = bleu_script(p_tmp)

    bleu_1_gram = bleu(hyp_list, ref_list, smoothing=True, n=1)
    bleu_2_gram = bleu(hyp_list, ref_list, smoothing=True, n=2)
    bleu_3_gram = bleu(hyp_list, ref_list, smoothing=True, n=3)
    bleu_4_gram = bleu(hyp_list, ref_list, smoothing=True, n=4)
    writer.add_scalar("./bleu_1_gram", bleu_1_gram, epoch)
    writer.add_scalar("./bleu_2_gram", bleu_2_gram, epoch)
    writer.add_scalar("./bleu_3_gram", bleu_3_gram, epoch)
    writer.add_scalar("./bleu_4_gram", bleu_4_gram, epoch)
    writer.add_scalar("./multi-bleu", bleu2, epoch)
    bleu_result = [
        ["multi-bleu", "bleu_1-gram", "bleu_2-gram", "bleu_3-gram", "bleu_4-gram"],
        [bleu2, bleu_1_gram, bleu_2_gram, bleu_3_gram, bleu_4_gram],
    ]
    bleu_table = AsciiTable(bleu_result)
    logger.info(
        "BLEU score for Epoch-{}-batch-{}: ".format(epoch, batch_idx)
        + "\n"
        + bleu_table.table
    )
    opt.score_list.append(
        (bleu2, bleu_1_gram, bleu_2_gram, bleu_3_gram, bleu_4_gram, batch_idx, epoch)
    )
Ejemplo n.º 3
0
def main():
    nbests = defaultdict(list)
    references = {}
    for i, line in enumerate(open(opts.en)):
        '''
        Initialize references to correct english sentences
        '''
        references[i] = line

    for line in open(opts.nbest):
        (i, sentence, features) = line.strip().split("|||")
        stats =  list(bleu_stats(sentence, references[int(i)]))
        bleu_score = bleu(stats)
        smoothed_bleu_score = smoothed_bleu(stats)
        # making the feature string to float list 
        feature_list = [float(x) for x in features.split()]
        nbests[int(i)].append((sentence, bleu_score, smoothed_bleu_score, feature_list))

    theta = [1.0/6 for _ in xrange(6)] #initialization
    

    for i in range(0, opts.epo):
        mistake = 0;
        for nbest in nbests:
            sample = get_sample(nbests[nbest])
            sample.sort(key=lambda i: i[0][2] - i[1][2], reverse=True)
            for i in range(0, min(len(sample), opts.xi)):
                for j in range(0, 6):
                    if theta[j] * sample[i][0][3][j] <= theta[j] * sample[i][1][3][j]:
                        mistake = mistake + 1
                        theta[j] = theta[j] + opts.eta * (sample[i][0][3][j] - sample[i][1][3][j])
        sys.stderr.write("Mistake:  %s\n" % (mistake,))
    print "\n".join([str(weight) for weight in theta])
Ejemplo n.º 4
0
def gold_score(hyp_line, ref_line):
    """Return the gold score for a translation hypothesis based on the
    data in *hyp_line* and *ref_line*."""
    _, hyp, _ = hyp_line.split(" ||| ")
    hyp_words = hyp.split()
    ref_words = ref_line.split()
    return bleu.bleu(tuple(bleu.bleu_stats(hyp_words, ref_words)))
Ejemplo n.º 5
0
def main():
    nbests = defaultdict(list)
    references = {}
    for i, line in enumerate(open(opts.en)):
        '''
        Initialize references to correct english sentences
        '''
        references[i] = line

    for line in open(opts.nbest):
        (i, sentence, features) = line.strip().split("|||")
        stats = list(bleu_stats(sentence, references[int(i)]))
        bleu_score = bleu(stats)
        smoothed_bleu_score = smoothed_bleu(stats)
        # making the feature string to float list
        feature_list = [float(x) for x in features.split()]
        nbests[int(i)].append(
            (sentence, bleu_score, smoothed_bleu_score, feature_list))

    theta = [1.0 / 6 for _ in xrange(6)]  #initialization

    for i in range(0, opts.epo):
        mistake = 0
        for nbest in nbests:
            sample = get_sample(nbests[nbest])
            sample.sort(key=lambda i: i[0][2] - i[1][2], reverse=True)
            for i in range(0, min(len(sample), opts.xi)):
                for j in range(0, 6):
                    if theta[j] * sample[i][0][3][j] <= theta[j] * sample[i][
                            1][3][j]:
                        mistake = mistake + 1
                        theta[j] = theta[j] + opts.eta * (sample[i][0][3][j] -
                                                          sample[i][1][3][j])
        sys.stderr.write("Mistake:  %s\n" % (mistake, ))
    print "\n".join([str(weight) for weight in theta])
Ejemplo n.º 6
0
def bleu_score(input_string):
    ref = [line.strip().split() for line in open(opts.reference)]
    hyp = [line.strip().split() for line in input_string.split('\n')]

    stats = [0 for i in xrange(10)]
    for (r,h) in zip(ref, hyp):
        stats = [sum(scores) for scores in zip(stats, bleu.bleu_stats(h,r))]
    return bleu.bleu(stats)
def main(opts, sysstdin):
	ref = [line.strip().split() for line in open(opts.en)]
	system = [line.strip().split() for line in sysstdin]

	stats = [0 for i in xrange(10)]
	for (r,s) in zip(ref, system):
	    stats = [sum(scores) for scores in zip(stats, bleu.bleu_stats(s,r))]
	
	return bleu.bleu(stats)
Ejemplo n.º 8
0
def score(predicted, reference):
    ref = [line.strip().split() for line in open(reference)]
    system = [line.strip().split() for line in predicted]

    stats = [0 for i in range(10)]
    for (r, s) in zip(ref, system):
        stats = [sum(scores) for scores in zip(stats, bleu.bleu_stats(s, r))]

    return bleu.bleu(stats)
Ejemplo n.º 9
0
def choose_best_interval(interval_ends, sentence_dict):
	current_best = 0
	best_interval = (0,0)
	for i in range(len(interval_ends)):
		all_stats = collect_BLEU_stats(sentence_dict, interval_ends[i])
		new_BLEU = 100*bleu.bleu(all_stats)
		if new_BLEU > current_best:
			current_best = new_BLEU
			best_interval = (interval_ends[i-1], interval_ends[i])
	return best_interval, current_best
Ejemplo n.º 10
0
def sum_bleu_scores_per_range(range_marker_dict):
    range_bleu_scores = {}
    for k, v in range_markers_dict.items():
        sum_bs = 0.0
        for h, r in v:
            b_stats = bleu.bleu_stats(h, r)
            bs = bleu.bleu(b_stats)
            sum_bs += bs
        range_bleu_scores[k] = sum_bs
    return range_bleu_scores
Ejemplo n.º 11
0
 def corpus_bleu_thumt(self, hyp_in):
     trans = []
     with open(hyp_in, 'r') as f:
         for line in f:
             trans.append(line.strip().split())
     refs = []
     with open(self.reference_path[:-3]+'.tok'+self.reference_path[-3:], 'r') as f:
         for line in f:
             refs.append([line.strip().split()])
     return  bleu(trans, refs)*100
def compute_bleu(hypo, ref="data/dev.ref"):
    f_ref = open(ref, 'r')
    f_hypo = open(hypo, 'r')
    ref = [line.strip().split() for line in f_ref]
    hyp = [line.strip().split() for line in f_hypo]
    f_hypo.close()
    f_ref.close()

    stats = [0 for i in xrange(10)]
    for (r, h) in zip(ref, hyp):
        stats = [sum(scores) for scores in zip(stats, bleu.bleu_stats(h, r))]
    return (100 * bleu.bleu(stats))
Ejemplo n.º 13
0
def bleu_score(mt_para_corpus, si_para_corpus, N=4):
    '''
   BLEU score between trans and inter
   '''
    stats = [0 for i in xrange(10)]
    for mt_sent_pair, si_sent_pair in zip(mt_para_corpus.sent_pairs,
                                          si_para_corpus.sent_pairs):
        ref = [w.tok for w in mt_sent_pair.tgt_sent.words]
        output = [w.tok for w in si_sent_pair.tgt_sent.words]
        stats = [
            sum(scores) for scores in zip(stats, bleu.bleu_stats(output, ref))
        ]
    return bleu.bleu(stats)
Ejemplo n.º 14
0
def main():
    nbests = []
    references = []
    sys.stderr.write("Reading English Sentences")
    for i, line in enumerate(open(opts.en)):
        '''Initialize references to correct english sentences'''
        references.append(line)
        if i % 100 == 0:
            sys.stderr.write(".")

    sys.stderr.write("\nReading ndests")
    for j, line in enumerate(open(opts.nbest)):
        (i, sentence, features) = line.strip().split("|||")
        i = int(i)
        stats = list(bleu_stats(sentence, references[i]))
        bleu_score = bleu(stats)
        smoothed_bleu_score = smoothed_bleu(stats)
        # making the feature string to float list
        feature_list = [float(x) for x in features.split()]
        if len(nbests) <= i:
            nbests.append([])
        nbests[i].append(
            entry(sentence, bleu_score, smoothed_bleu_score, feature_list))
        if j % 5000 == 0:
            sys.stderr.write(".")

    arg_num = len(nbests[0][0].feature_list)
    theta = [1.0 / arg_num for _ in xrange(arg_num)]  #initialization

    sys.stderr.write("\nTraining...\n")
    for i in xrange(opts.epo):
        mistake = 0
        for nbest in nbests:
            sample = get_sample(nbest)
            sample.sort(key=lambda i: i[0].smoothed_bleu - i[1].smoothed_bleu,
                        reverse=True)
            for i in xrange(min(len(sample), opts.xi)):
                v1 = sample[i][0].feature_list
                v2 = sample[i][1].feature_list
                if dot_product(theta, v1) <= dot_product(theta, v2):
                    mistake += 1
                    theta = vector_plus(theta, vector_plus(v1, v2, -1),
                                        opts.eta)


#                for j in xrange(arg_num):
#                    if theta[j] * sample[i][0][3][j] <= theta[j] * sample[i][1][3][j]:
#                        mistake = mistake + 1
#                        theta[j] += opts.eta * (sample[i][0].feature_list[j] - sample[i][1].feature_list[j])
        sys.stderr.write("Mistake:  %s\n" % (mistake, ))
    print "\n".join([str(weight) for weight in theta])
Ejemplo n.º 15
0
def test():
    # hypotheses = ["The brown fox jumps over the dog 笑"]
    # references = ["The quick brown fox jumps over the lazy dog 笑"]
    hypotheses = "我 的 的 买 次 天 , 的 个 还 天 段 没 点 个 点"
    references = "我 们 刚 一 周 二 段 转 的 三 段 , 二 段 还 有 一 点 点"
    # hypotheses = ["It is a guide to action which ensures that the military always obeys the commands of the party."]
    # references = ["It is a guide to action that ensures that the military will forever heed Party commands.","It is the guiding principle which guarantees the military forces always being under the command of the Party.","It is the practical guide for the army always to heed the directions of the party."]

    gram = 5
    for i in range(1, gram):
        cd_score = cdscore([hypotheses.strip().split()],
                           [references.strip().split()], i)
        print(" cdscore  ngram:" + str(i) + "-->" + str(cd_score))
    bleu_score = bleu([hypotheses.strip().split()],
                      [references.strip().split()])
    print(" bleu  score-->" + str(bleu_score))
Ejemplo n.º 16
0
def main():
    nbests = []
    references = []
    sys.stderr.write("Reading English Sentences")
    for i, line in enumerate(open(opts.en)):
        '''Initialize references to correct english sentences'''
        references.append(line)
        if i%100 == 0:
            sys.stderr.write(".")

    sys.stderr.write("\nReading ndests")
    for j,line in enumerate(open(opts.nbest)):
        (i, sentence, features) = line.strip().split("|||")
        i = int(i)
        stats = list(bleu_stats(sentence, references[i]))
        bleu_score = bleu(stats)
        smoothed_bleu_score = smoothed_bleu(stats)
        # making the feature string to float list
        feature_list = [float(x) for x in features.split()]
        if len(nbests)<=i:
            nbests.append([])
        nbests[i].append(entry(sentence, bleu_score, smoothed_bleu_score, feature_list))
        if j%5000 == 0:
            sys.stderr.write(".")

    arg_num = len(nbests[0][0].feature_list)
    theta = [1.0/arg_num for _ in xrange(arg_num)] #initialization

    sys.stderr.write("\nTraining...\n")
    for i in xrange(opts.epo):
        mistake = 0;
        for nbest in nbests:
            sample = get_sample(nbest)
            sample.sort(key=lambda i: i[0].smoothed_bleu - i[1].smoothed_bleu, reverse=True)
            for i in xrange(min(len(sample), opts.xi)):
                v1 = sample[i][0].feature_list
                v2 = sample[i][1].feature_list
                if dot_product(theta, v1) <= dot_product(theta, v2):
                    mistake += 1
                    theta = vector_plus(theta, vector_plus(v1, v2, -1), opts.eta)
#                for j in xrange(arg_num):
#                    if theta[j] * sample[i][0][3][j] <= theta[j] * sample[i][1][3][j]:
#                        mistake = mistake + 1
#                        theta[j] += opts.eta * (sample[i][0].feature_list[j] - sample[i][1].feature_list[j])
        sys.stderr.write("Mistake:  %s\n" % (mistake,))
    print "\n".join([str(weight) for weight in theta])
Ejemplo n.º 17
0
def sum_scores_per_range(rd, method):
    ranges_in_val = {}
    if method == 'bleu':
        for k, v in rd.items():
            stats = [0.0] * 10
            for hr_score, (hyp, ref) in v:
                stats = [stats[i] + hr_score[i] for i in xrange(10)]
            bs = bleu.bleu(stats)
            ranges_in_val[bs] = ranges_in_val.get(bs, [])
            ranges_in_val[bs].append(k)
    else:
        for k, v in rd.items():
            stats = 0.0
            for hr_score, (hyp, ref) in v:
                stats += hr_score
            ranges_in_val[stats] = ranges_in_val.get(stats, [])
            ranges_in_val[stats].append(k)
    return ranges_in_val
Ejemplo n.º 18
0
def compute_score(weights, refs, hyps):
    tot_stats = [0 for i in xrange(10)]
    hyp_list = []
    for s in xrange(0, num_sents):
        hyps_for_one_sent = all_hyps[s * 100:s * 100 + 100]
        (best_score, best) = (-1e300, '')
        for (num, hyp, feats) in hyps_for_one_sent:
            score = 0.0
            for feat in feats.split(' '):
                (k, v) = feat.split('=')
                score += weights[k] * float(v)
            if score > best_score:
                (best_score, best) = (score, hyp)
        hyp_list.append("%s\n" % best)
    for (r,h) in zip(refs, hyp_list):
        tot_stats = [sum(s) for s in zip(tot_stats, bleu.bleu_stats(r, h))]
        # for i in xrange(len(tot_stats)):
        #     tot_stats[i] += int(best[i])
    return bleu.bleu(tot_stats)
Ejemplo n.º 19
0
           'p(e|f)'     : float(opts.tm1),
           'p_lex(f|e)' : float(opts.tm2)}

ref = [line.strip().split() for line in open(opts.reference)]
all_hyps = [pair.split(' ||| ') for pair in open(opts.input)]
num_sents = len(zip(ref, all_hyps))

# Calculate the "gold" scoring function G
# which is just the local BLEU score here.
all_scores = defaultdict(list)
for s in xrange(0, num_sents):
    hyps_for_one_sent = all_hyps[s * 100:s * 100 + 100]
    for (num, hyp, feats) in hyps_for_one_sent:
        stats = [0 for i in xrange(10)]
        stats = [sum(scores) for scores in zip(stats, bleu.bleu_stats(hyp.split(" "), ref[s]))]
        score = bleu.bleu(stats)
        all_scores[s].append( (score, hyp, feats) )

def tune():
    ''' Finds best weight w '''
    w = array([[float(opts.lm), float(opts.tm1), float(opts.tm2)]])
    binary_classifier = svm.SVC(kernel="linear")

    for _ in range(0,5): # for desired number of iterations
        X, y = [], []
        for s in xrange(0, num_sents):
            samples = sampler(s, 5000, 50, 0.05)
            for (feats, sign) in samples:
                X.append(feats)
                y.append(sign)
Ejemplo n.º 20
0
  while len(nbests) <= i:
    nbests.append([])
  scores = tuple(bleu.bleu_stats(sentence.split(), ref[i]))
  inverse_scores = tuple([-x for x in scores])
  nbests[i].append(translation_candidate(sentence, scores, inverse_scores))
  if n % 2000 == 0:
    sys.stderr.write(".")

oracle = [nbest[0] for nbest in nbests]

stats = [0 for i in xrange(10)]
for candidate in oracle:
  stats = [sum(scores) for scores in zip(stats, candidate.scores)]

prev_score = 0
score = bleu.bleu(stats)

# greedy search for better oracle. For each sentence, choose the
# candidate translation that improves BLEU the most.
while score > prev_score:
  prev_score = score
  for i, nbest in enumerate(nbests): 
    for candidate in nbest:
      new_stats = [sum(scores) for scores in zip(stats, candidate.scores, oracle[i].inverse_scores)]
      new_score = bleu.bleu(new_stats)
      if new_score > score:
        score = new_score
        stats = new_stats
        oracle[i] = candidate

sys.stderr.write("\n")
Ejemplo n.º 21
0
def main():
    nbests = []
    references = []
    sys.stderr.write("Reading English Sentences")
    for i, line in enumerate(open(opts.en)):
        '''Initialize references to correct english sentences'''
        references.append(line)
        if i % 100 == 0:
            sys.stderr.write(".")

    sys.stderr.write("\nReading ndests")
    for j, line in enumerate(open(opts.nbest)):
        (i, sentence, features) = line.strip().split("|||")
        i = int(i)
        stats = list(bleu.bleu_stats(sentence, references[i]))
        # bleu_score = bleu.bleu(stats)
        smoothed_bleu_score = bleu.smoothed_bleu(stats)
        # making the feature string to float list
        feature_list = [float(x) for x in features.split()]
        if len(nbests) <= i:
            nbests.append([])
        # nbests[i].append(entry(sentence, bleu_score, smoothed_bleu_score, feature_list))
        nbests[i].append(entry(sentence, smoothed_bleu_score, feature_list))

        if j % 5000 == 0:
            sys.stderr.write(".")

    arg_num = len(nbests[0][0].feature_list)
    theta = [1.0 / arg_num for _ in xrange(arg_num)]  #initialization

    weights = [[] for _ in xrange(opts.epo)]
    sys.stderr.write("\nTraining...\n")
    for j in xrange(opts.epo):
        avg_theta = [0.0 for _ in xrange(arg_num)]
        avg_cnt = 0
        mistake = 0
        for nbest in nbests:
            sample = get_sample(nbest)
            sample.sort(key=lambda i: i[0].smoothed_bleu - i[1].smoothed_bleu,
                        reverse=True)
            for i in xrange(min(len(sample), opts.xi)):
                v1 = sample[i][0].feature_list
                v2 = sample[i][1].feature_list
                if dot_product(theta, v1) <= dot_product(theta, v2):
                    mistake += 1
                    theta = vector_plus(theta, vector_plus(v1, v2, -1),
                                        opts.eta)

                avg_theta = vector_plus(avg_theta, theta)
                avg_cnt += 1

        sys.stderr.write("Mistake:  %s\n" % (mistake, ))
        weights[j] = [
            avg / avg_cnt if avg_cnt != 0 else 1 / float(arg_num)
            for avg in avg_theta
        ]

    sys.stderr.write("Computing best BLEU score and outputing...\n")
    # instead of print the averaged-out weights, print the weights that maximize the BLEU score
    # print "\n".join([str(weight) for weight in final_theta])

    bleu_score = [0 for _ in weights]
    for j, w in enumerate(weights):
        trans = []
        translation = namedtuple("translation", "english, score")
        system = []
        for i, nbest in enumerate(nbests):
            # for one sentence
            for et in nbest:
                if len(trans) <= int(i):
                    trans.append([])

                trans[int(i)].append(
                    translation(
                        et.sentence,
                        sum([x * y for x, y in zip(w, et.feature_list)])))

            for tran in trans:
                system.append(sorted(tran, key=lambda x: -x.score)[0].english)

        stats = [0 for i in xrange(10)]
        for (r, s) in zip(references, system):
            stats = [
                sum(scores) for scores in zip(stats, bleu.bleu_stats(s, r))
            ]

        bleu_score[j] = bleu.bleu(stats)

    idx = [
        i for i, bscore in enumerate(bleu_score) if bscore == max(bleu_score)
    ][0]
    sys.stderr.write("Maximum BLEU score of training data is: {}\n".format(
        max(bleu_score)))
    sys.stderr.write("Corresponding weights are: {}\n".format(" ".join(
        [str(w) for w in weights[idx]])))
    print "\n".join([str(weight) for weight in weights[idx]])
Ejemplo n.º 22
0
def train(args):
    option = default_option()

    # predefined model names
    pathname, basename = os.path.split(args.model)
    modelname = get_filename(basename)
    autoname = os.path.join(pathname, modelname + ".autosave.pkl")
    bestname = os.path.join(pathname, modelname + ".best.pkl")

    # load models
    if os.path.exists(args.model):
        option, params = load_model(args.model)
        init = False
    else:
        init = True

    if args.initialize:
        init_params = load_model(args.initialize)
        init_params = init_params[1]
        restore = True
    else:
        restore = False

    override(option, args)
    print_option(option)

    # load references
    if option["references"]:
        references = load_references(option["references"])
    else:
        references = None

    # input corpus
    batch = option["batch"]
    sortk = option["sort"] or 1
    shuffle = option["seed"] if option["shuffle"] else None
    reader = textreader(option["corpus"], shuffle)
    processor = [data_length, data_length]
    stream = textiterator(reader, [batch, batch * sortk], processor,
                          option["limit"], option["sort"])

    if shuffle and option["indices"] is not None:
        reader.set_indices(option["indices"])

    if args.reset:
        option["count"] = [0, 0]
        option["epoch"] = 0
        option["cost"] = 0.0

    skip_stream(reader, option["count"][1])
    epoch = option["epoch"]
    maxepoch = option["maxepoch"]

    # create session
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    if args.gpuid >= 0:
        config.gpu_options.visible_device_list = "%d" % args.gpuid

    with tf.Session(config=config):
        # set seed
        np.random.seed(option["seed"])
        tf.set_random_seed(option["seed"])

        # create model
        initializer = tf.random_uniform_initializer(-0.08, 0.08)
        model = rnnsearch(initializer=initializer, **option)

        print "parameters:", count_parameters(tf.trainable_variables())

        variables = None

        if restore:
            matched, not_matched = match_variables(tf.trainable_variables(),
                                                   init_params)
            if args.finetune:
                variables = not_matched

        # create optimizer
        constraint = ["norm", option["norm"]]
        optim = optimizer(model, algorithm=option["optimizer"], norm=True,
                          constraint=constraint, variables=variables)

        tf.global_variables_initializer().run()

        if not init:
            set_variables(tf.trainable_variables(), params)

        if restore:
            restore_variables(matched, not_matched)

        # beamsearch option
        search_opt = {}
        search_opt["beamsize"] = option["beamsize"]
        search_opt["normalize"] = option["normalize"]
        search_opt["maxlen"] = option["maxlen"]
        search_opt["minlen"] = option["minlen"]

        # vocabulary and special symbol
        svocabs, tvocabs = option["vocabulary"]
        svocab, isvocab = svocabs
        tvocab, itvocab = tvocabs
        unk_sym = option["unk"]
        eos_sym = option["eos"]

        # summary
        count = option["count"][0]
        totcost = option["cost"]
        best_score = option["bleu"]
        alpha = option["alpha"]

        for i in range(epoch, maxepoch):
            for data in stream:
                xdata, xlen = convert_data(data[0], svocab, unk_sym, eos_sym)
                ydata, ylen = convert_data(data[1], tvocab, unk_sym, eos_sym)

                t1 = time.time()
                cost, norm = optim.optimize(xdata, xlen, ydata, ylen)
                optim.update(alpha=alpha)
                t2 = time.time()

                count += 1
                cost = cost * len(ylen) / sum(ylen)
                totcost += cost / math.log(2)

                print i + 1, count, cost, norm, t2 - t1

                # save model
                if count % option["freq"] == 0:
                    option["indices"] = reader.get_indices()
                    option["bleu"] = best_score
                    option["cost"] = totcost
                    option["count"] = [count, reader.count]
                    serialize(autoname, option)

                if count % option["vfreq"] == 0:
                    if option["validation"] and references:
                        trans = translate(model, option["validation"],
                                          **search_opt)
                        bleu_score = bleu(trans, references)
                        print "bleu: %2.4f" % bleu_score
                        if bleu_score > best_score:
                            best_score = bleu_score
                            option["indices"] = reader.get_indices()
                            option["bleu"] = best_score
                            option["cost"] = totcost
                            option["count"] = [count, reader.count]
                            serialize(bestname, option)

                if count % option["sfreq"] == 0:
                    batch = len(data[0])
                    ind = np.random.randint(0, batch)
                    sdata = data[0][ind]
                    tdata = data[1][ind]
                    xdata = xdata[:, ind : ind + 1]
                    xlen = xlen[ind : ind + 1]
                    hls = beamsearch(model, xdata, xlen, **search_opt)
                    best, score = hls[0]
                    print sdata
                    print tdata
                    print "search score:", score
                    print "translation:", " ".join(best[:-1])

            print "--------------------------------------------------"

            if option["vfreq"] and references:
                trans = translate(model, option["validation"], **search_opt)
                bleu_score = bleu(trans, references)
                print "iter: %d, bleu: %2.4f" % (i + 1, bleu_score)
                if bleu_score > best_score:
                    best_score = bleu_score
                    option["indices"] = reader.get_indices()
                    option["bleu"] = best_score
                    option["cost"] = totcost
                    option["count"] = [count, reader.count]
                    serialize(bestname, option)

            print "averaged cost: ", totcost / option["count"][0]
            print "--------------------------------------------------"

            # early stopping
            if i >= option["stop"]:
                alpha = alpha * option["decay"]

            count = 0
            totcost = 0.0
            stream.reset()

            # update autosave
            option["epoch"] = i + 1
            option["alpha"] = alpha
            option["indices"] = reader.get_indices()
            option["bleu"] = best_score
            option["cost"] = totcost
            option["count"] = [0, 0]
            serialize(autoname, option)

        print "best(bleu): %2.4f" % best_score

    stream.close()
Ejemplo n.º 23
0
ref = [line.strip().split() for line in open(opts.reference)]
all_hyps = [pair.split(' ||| ') for pair in open(opts.input)]
num_sents = len(zip(ref, all_hyps))

# Calculate the "gold" scoring function G
# which is just the local BLEU score here.
all_scores = defaultdict(list)
for s in xrange(0, num_sents):
    hyps_for_one_sent = all_hyps[s * 100:s * 100 + 100]
    for (num, hyp, feats) in hyps_for_one_sent:
        stats = [0 for i in xrange(10)]
        stats = [
            sum(scores)
            for scores in zip(stats, bleu.bleu_stats(hyp.split(" "), ref[s]))
        ]
        score = bleu.bleu(stats)
        all_scores[s].append((score, hyp, feats))


def tune():
    ''' Finds best weight w '''
    w = array([[float(opts.lm), float(opts.tm1), float(opts.tm2)]])
    binary_classifier = svm.SVC(kernel="linear")

    for _ in range(0, 5):  # for desired number of iterations
        X, y = [], []
        for s in xrange(0, num_sents):
            samples = sampler(s, 5000, 50, 0.05)
            for (feats, sign) in samples:
                X.append(feats)
                y.append(sign)
Ejemplo n.º 24
0
    num2 = random.randint(0, 99)
    if num1 == num2:
      if num1 == 0:
        num2 = num2 + 1
      else:
        num2 = num2 - 1 #just making sure they arent the same example
    hyp1 = hyps_for_one_sent[num1][1]
    #print(hyp1)
    hyp2 = hyps_for_one_sent[num2][1]
    #print(hyp2)
    feats1 = hyps_for_one_sent[num1][2]
    feats2 = hyps_for_one_sent[num2][2]

    #calculate bleu score for each example
    s1=list(bleu.bleu_stats(hyp1, ref))
    bs1=bleu.bleu(s1)
    s2=list(bleu.bleu_stats(hyp2, ref))
    bs2=bleu.bleu(s2)
    #print(bs1, bs2)
    #make training vector with difference in values of feats and indicator
    if bs1 > bs2:
      indic = 1
    else:
      if bs1 < bs2:
        indic = -1
      else:
          continue
      #ignore the ones that have same bleu score?
    #get feat values for each pair of features and subtract
    trainfeats = []
    for f1, f2 in zip(feats1.split(), feats2.split()):
Ejemplo n.º 25
0
def train(nbest_candidates,
          reference_files,
          init_weights=None,
          epochs=5,
          alpha=0.04,
          tau=100,
          xi=20,
          eta=0.0001):

    # initialization
    print >> sys.stderr, "Initializing training data"
    candidate = namedtuple("candidate",
                           "sentence, features, bleu, smoothed_bleu")
    refs = []
    for reference_file in reference_files:
        refs.append([line.strip().split() for line in open(reference_file)])
    nbests = []
    for n, line in enumerate(nbest_candidates):
        (i, sentence, features) = line.strip().split("|||")
        i = int(i)
        sentence = sentence.strip()
        features = np.array([float(h) for h in features.strip().split()])

        # calculate bleu score and smoothed bleu score
        max_bleu_score = -float('inf')
        for ref in refs:
            stats = tuple(bleu.bleu_stats(sentence.split(), ref[i]))
            bleu_score = bleu.bleu(stats)
            smoothed_bleu_score = bleu.smoothed_bleu(stats)
            max_bleu_score = max(max_bleu_score, smoothed_bleu_score)

        while len(nbests) <= i:
            nbests.append([])
        nbests[i].append(
            candidate(sentence, features, bleu_score, max_bleu_score))

        if n % 2000 == 0:
            sys.stderr.write(".")
    print >> sys.stderr, "\nRetrieved %d candidates for %d sentences" % (
        n, len(nbests))

    # set weights to default
    w = init_weights if init_weights is not None else \
        np.array([1.0/len(nbests[0][0].features)] * len(nbests[0][0].features))
    assert len(w) == len(nbests[0][0].features)
    w_sum = np.zeros(len(nbests[0][0].features))

    # training
    random.seed()
    for i in range(epochs):
        print >> sys.stderr, "Training epoch %d:" % i
        mistakes = 0
        for nbest in nbests:
            if len(nbest) < 2:
                continue

            sample = []
            for j in range(tau):
                (s1, s2) = (nbest[k]
                            for k in random.sample(range(len(nbest)), 2))
                if fabs(s1.smoothed_bleu - s2.smoothed_bleu) > alpha:
                    if s1.smoothed_bleu > s2.smoothed_bleu:
                        sample.append((s1, s2))
                    else:
                        sample.append((s2, s1))
                else:
                    continue

            sample.sort(key=lambda s: s[0].smoothed_bleu - s[1].smoothed_bleu,
                        reverse=True)
            for (s1, s2) in sample[:xi]:
                if np.dot(w, s1.features) <= np.dot(w, s2.features):
                    mistakes += 1
                    w += eta * (s1.features - s2.features
                                )  # this is vector addition!

        w_sum += w
        print >> sys.stderr, "Number of mistakes: %d" % mistakes

    w = w_sum / float(epochs)
    return w
Ejemplo n.º 26
0
    while len(nbests) <= i:
        nbests.append([])
    scores = tuple(bleu.bleu_stats(sentence.split(), ref[i]))
    inverse_scores = tuple([-x for x in scores])
    nbests[i].append(translation_candidate(sentence, scores, inverse_scores))
    if n % 2000 == 0:
        sys.stderr.write(".")

oracle = [nbest[0] for nbest in nbests]

stats = [0 for i in xrange(10)]
for candidate in oracle:
    stats = [sum(scores) for scores in zip(stats, candidate.scores)]

prev_score = 0
score = bleu.bleu(stats)

# greedy search for better oracle. For each sentence, choose the
# candidate translation that improves BLEU the most.
while score > prev_score:
    prev_score = score
    for i, nbest in enumerate(nbests): 
        for candidate in nbest:
            new_stats = [sum(scores) for scores in zip(stats, candidate.scores, oracle[i].inverse_scores)]
            new_score = bleu.bleu(new_stats)
            if new_score > score:
                score = new_score
                stats = new_stats
                oracle[i] = candidate

sys.stderr.write("\n")
#!/usr/bin/env python
import optparse, sys, os
import bleu

optparser = optparse.OptionParser()
optparser.add_option("-r", "--reference", dest="reference", default=os.path.join("/usr/shared/CMPT/nlp-class/project/test/", "all.cn-en.en0"), help="English reference sentences")
optparser.add_option("-i", "--input", dest="input", default=os.path.join("/home/yongyiw/Documents/Github/final-project/Code", "output_1"), help="decoder output")
(opts,_) = optparser.parse_args()

# print opts.reference, opts.input
ref = [line.strip().split() for line in open(opts.reference)]
system = [line.strip().split() for line in open(opts.input)]

stats = [0 for i in xrange(10)]
for (r,s) in zip(ref, system):
  stats = [sum(scores) for scores in zip(stats, bleu.bleu_stats(s,r))]
print bleu.bleu(stats)
Ejemplo n.º 28
0
def main(args):
    """
    Training and validation the model
    """

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    logger.debug("DEVICE: {}".format(device))

    # load vocabulary
    with open(args.vocab_path, "rb") as f:
        vocab = pickle.load(f)

    # encoder model setting
    encoder = EncoderResNet()
    encoder_optimizer = torch.optim.Adam(
        params=filter(lambda p: p.requires_grad, encoder.parameters()),
        lr=args.encoder_lr) if args.fine_tune_encoder else None

    # decoder model setting
    decoder = Decoder(vis_dim=args.vis_dim,
                      vis_num=args.vis_num,
                      embed_dim=args.embed_dim,
                      hidden_dim=args.hidden_dim,
                      vocab_size=args.vocab_size,
                      num_layers=args.num_layers,
                      dropout_ratio=args.dropout_ratio)
    decoder_optimizer = torch.optim.Adam(params=filter(
        lambda p: p.requires_grad, decoder.parameters()),
                                         lr=args.decoder_lr)

    # move to GPU
    encoder = nn.DataParallel(encoder).to(device)
    decoder = nn.DataParallel(decoder).to(device)

    # loss function
    criterion = nn.CrossEntropyLoss()

    # data loader
    transform = set_transform(args.resize,
                              args.crop_size,
                              horizontal_flip=True,
                              normalize=True)
    train_img_dirc = os.path.join(args.root_img_dirc, "train2014")
    train_loader = get_image_loader(train_img_dirc, args.train_data_path,
                                    vocab, transform, args.batch_size,
                                    args.shuffle, args.num_workers)
    val_img_dirc = os.path.join(args.root_img_dirc, "val2014")
    val_loader = get_image_loader(val_img_dirc, args.val_data_path, vocab,
                                  transform, 1, args.shuffle, args.num_workers)

    # initialization
    best_bleu_score = -100
    not_improved_cnt = 0

    for epoch in range(1, args.num_epochs):
        # training
        train(encoder, decoder, encoder_optimizer, decoder_optimizer,
              train_loader, criterion, epoch)

        # validation
        pred_df = validation(encoder, decoder, val_loader, criterion, epoch,
                             vocab, args.beam_size)

        # calculate BLEU-4 score
        pred_cap_lst = decode_caption(pred_df["pred"], vocab.idx2word)
        ans_cap_lst = decode_caption(pred_df["ans"], vocab.idx2word)
        assert len(pred_cap_lst) == len(ans_cap_lst)
        bleu_score_lst = []
        for i in range(len(pred_cap_lst)):
            bleu_score_lst.append(
                bleu(pred_cap_lst[i], ans_cap_lst[i], mode="4-gram"))
        bleu_score = np.mean(bleu_score_lst)

        # early stopping
        if bleu_score < best_bleu_score:
            not_improved_cnt += 1
        else:
            # learning is going well
            best_bleu_score = bleu_score
            not_improved_cnt = 0

            # save best params model
            torch.save(encoder.state_dict(), args.save_encoder_path)
            torch.save(decoder.state_dict(), args.save_decoder_path)

        # logging status
        logger.debug(
            "\n************************ VAL ************************\n"
            "EPOCH          : [{0}/{1}]\n"
            "BLEU-4         : {2}\n"
            "EARLY STOPPING : [{3}/{4}]\n"
            "*****************************************************\n".format(
                epoch, args.num_epochs, bleu_score, not_improved_cnt,
                args.stop_count))

        if not_improved_cnt == args.stop_count:
            logger.debug("Early Stopping")
            break

        # decay learning rate if there is no improvement for 10 consecutive epochs
        if not_improved_cnt % 10 == 0:
            if args.fine_tune_encoder:
                adjust_learning_rate(encoder_optimizer, 0.8)
            adjust_learning_rate(decoder_optimizer, 0.8)
Ejemplo n.º 29
0
def get_validation_bleu(hypotheses):
    stats = numpy.array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
    for hyp, ref in zip(hypotheses, dev_tgt):
        hyp, ref = (hyp.strip().split(), ref.strip().split())
        stats += numpy.array(bleu_stats(hyp, ref))
    return "%.2f" % (100 * bleu(stats))
import googletrans
from fairseq.models.lightconv import LightConvModel
import jieba
from bleu import bleu

google_transer = googletrans.Translator()
zh2en = LightConvModel.from_pretrained(
    'wmt17.zh-en.dynamicconv-glu/',
    checkpoint_file='model.pt',
    data_name_or_path='wmt17.zh-en.dynamicconv-glu',
    bpe='subword_nmt',
    bpe_codes='wmt17.zh-en.dynamicconv-glu/en.code',
    sampling=1)
s_text = "湯姆指出湯姆事件爆發後, 湯姆針對進口口罩逐批查驗,從8月10日至9月3日,湯姆查獲577件共83萬0832片偽標台灣製造的非醫用口罩,全數扣押在湯姆 "
#text = str(google_transer.translate(s_text, src="zh-tw", dest="zh-cn").text)
text = " ".join(jieba.lcut(s_text))
print(text)

text = zh2en.translate(text)
print(text)

zh = google_transer.translate(text, src="en", dest="zh-tw").text
print(bleu(ans=s_text, translated_txet=zh), zh)
Ejemplo n.º 31
0
def get_validation_bleu(hypotheses):
    stats = numpy.array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
    for hyp, ref in zip(hypotheses, dev_tgt):
        hyp, ref = (hyp.strip().split(), ref.strip().split())
        stats += numpy.array(bleu_stats(hyp, ref))
    return "%.2f" % (100 * bleu(stats))
Ejemplo n.º 32
0
                        '--references',
                        dest='r',
                        required=True,
                        nargs='+',
                        help='Reads the reference_[0, 1, ...]')
    parser.add_argument('-lc', help='Lowercase', action='store_true')
    parser.add_argument('-v', help='print log', action='store_true')

    args = parser.parse_args()

    hypo_b = open(args.b, 'r').read().strip()
    hypo_m = open(args.m, 'r').read().strip()
    refs = [open(ref_fpath, 'r').read().strip() for ref_fpath in args.r]

    cased = (not args.lc)
    bleu_b = bleu(hypo_b, refs, 4, cased=cased)
    bleu_m = bleu(hypo_m, refs, 4, cased=cased)
    wlog('Baseline BLEU: {:4.2f}'.format(bleu_b))
    wlog('Model BLEU   : {:4.2f}'.format(bleu_m))

    list_hypo_b, list_hypo_m = hypo_b.split('\n'), hypo_m.split('\n')
    better = worse = 0
    fake = list_hypo_b[:]
    assert len(list_hypo_b) == len(list_hypo_m), 'Length mismatch ... '

    num = len(list_hypo_b)
    point_every, number_every = int(math.ceil(num / 100)), int(
        math.ceil(num / 10))

    for i in xrange(len(fake)):
Ejemplo n.º 33
0
def get_bleu_score(tgt,src):
    stats=[ 0 for i in range(10)]
    stats = [sum(scores) for scores in zip(stats, bleu.bleu_stats(tgt,src))]
    return bleu.bleu(stats)
Ejemplo n.º 34
0
def main():
    nbests = []
    references = []
    sys.stderr.write("Reading English Sentences")
    for i, line in enumerate(open(opts.en)):
        '''Initialize references to correct english sentences'''
        references.append(line)
        if i%100 == 0:
            sys.stderr.write(".")

    sys.stderr.write("\nReading ndests")
    for j,line in enumerate(open(opts.nbest)):
        (i, sentence, features) = line.strip().split("|||")
        i = int(i)
        stats = list(bleu.bleu_stats(sentence, references[i]))
        # bleu_score = bleu.bleu(stats)
        smoothed_bleu_score = bleu.smoothed_bleu(stats)
        # making the feature string to float list
        feature_list = [float(x) for x in features.split()]
        if len(nbests)<=i:
            nbests.append([])
        # nbests[i].append(entry(sentence, bleu_score, smoothed_bleu_score, feature_list))
        nbests[i].append(entry(sentence, smoothed_bleu_score, feature_list))

        if j%5000 == 0:
            sys.stderr.write(".")

    arg_num = len(nbests[0][0].feature_list)
    theta = [1.0/arg_num for _ in xrange(arg_num)] #initialization

    weights = [ [] for _ in xrange(opts.epo)]
    sys.stderr.write("\nTraining...\n")
    for j in xrange(opts.epo):
        avg_theta = [ 0.0 for _ in xrange(arg_num)]
        avg_cnt = 0
        mistake = 0;
        for nbest in nbests:
            sample = get_sample(nbest)
            sample.sort(key=lambda i: i[0].smoothed_bleu - i[1].smoothed_bleu, reverse=True)
            for i in xrange(min(len(sample), opts.xi)):
                v1 = sample[i][0].feature_list
                v2 = sample[i][1].feature_list
                if dot_product(theta, v1) <= dot_product(theta, v2):
                    mistake += 1
                    theta = vector_plus(theta, vector_plus(v1, v2, -1), opts.eta)
                    
                avg_theta = vector_plus(avg_theta, theta)
                avg_cnt += 1

        sys.stderr.write("Mistake:  %s\n" % (mistake,))
        weights[j] = [ avg / avg_cnt if avg_cnt !=0 else 1/float(arg_num) for avg in avg_theta ]



    sys.stderr.write("Computing best BLEU score and outputing...\n")
    # instead of print the averaged-out weights, print the weights that maximize the BLEU score    
    # print "\n".join([str(weight) for weight in final_theta])

    bleu_score = [0 for _ in weights]
    for j, w in enumerate(weights):
        trans = []
        translation = namedtuple("translation", "english, score")
        system = []
        for i, nbest in enumerate(nbests):
            # for one sentence
            for et in nbest:
                if len(trans) <= int(i):
                    trans.append([])

                trans[int(i)].append(translation(et.sentence, sum([x*y for x,y in zip(w, et.feature_list)])))

            for tran in trans:
                system.append(sorted(tran, key=lambda x: -x.score)[0].english)
        
        stats = [0 for i in xrange(10)]
        for (r,s) in zip(references, system):
            stats = [sum(scores) for scores in zip(stats, bleu.bleu_stats(s,r))]

        bleu_score[j] = bleu.bleu(stats)

    idx = [i for i, bscore in enumerate(bleu_score) if bscore == max(bleu_score)][0]
    sys.stderr.write("Maximum BLEU score of training data is: {}\n".format(max(bleu_score)))
    sys.stderr.write("Corresponding weights are: {}\n".format(" ".join([ str(w) for w in weights[idx] ])))
    print "\n".join([str(weight) for weight in weights[idx]])
Ejemplo n.º 35
0
def score_bleu_stats(bleu_stats):
  stats = [0.0 for i in xrange(10)]
  for s in bleu_stats:
    stats = [sum(scores) for scores in zip(s, stats)]
  return bleu.bleu(stats)
Ejemplo n.º 36
0
#!/usr/bin/env python
import optparse
import sys
import bleu

optparser = optparse.OptionParser()
optparser.add_option("-r", "--reference", dest="reference", default="data/test.en", help="English reference sentences")
(opts,_) = optparser.parse_args()

ref = [line.strip().split() for line in open(opts.reference)]
system = [line.strip().split() for line in sys.stdin]

stats = [0 for i in xrange(10)]
for (r,s) in zip(ref, system):
    stats = [sum(scores) for scores in zip(stats, bleu.bleu_stats(s,r))]
print bleu.bleu(stats)
Ejemplo n.º 37
0
          " ".join(map(lambda x: i2w_trg[x], generate(sample_dev))))
    # Evaluate on dev set
    dev_words, dev_loss = 0, 0.0
    start_time = time.time()
    for sent_id, (start, length) in enumerate(dev_order):
        dev_batch = dev[start:start + length]
        my_loss, num_words = calc_loss(dev_batch, 0.0)
        dev_loss += my_loss.item()
        dev_words += num_words
    print("[DEV] iter %r: dev loss/word=%.4f, ppl=%.4f, time=%.2fs" %
          (ITER, dev_loss / dev_words, math.exp(
              dev_loss / dev_words), time.time() - start_time))
    if best_dev > dev_loss:
        print("[DEV] Best model so far, saving snapshot.")
        torch.save(model, "batched_enc_dec_model.pt")
        best_dev = dev_loss

    # this is how you generate, can replace with desired sentenced to generate
model = torch.load("batched_enc_dec_model.pt")
sentences = []
stats = np.array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
for sent in test:
    hyp = generate(sent)
    sentences.append(hyp)
    stats += np.array(
        bleu_stats(" ".join(map(lambda x: i2w_trg[x], hyp)),
                   " ".join(map(lambda x: i2w_trg[x], sent[1]))))
print("Corpus BLEU: %.2f" % (100 * bleu(stats)))
for sent in sentences[:10]:
    print(" ".join(map(lambda x: i2w_trg[x], sent)))
Ejemplo n.º 38
0
        rand_idx = np.random.randint(low=1, high=batch_size)
        print_str_predict = ' '
        for w in np.concatenate(du_labels,
                                axis=0)[rand_idx::batch_size].tolist():
            print_str_predict += tgt_reverse_dictionary[w] + ' '
            if tgt_reverse_dictionary[w] == '</s>':
                break
        print(print_str_predict)

        print_str_reference = ' '
        for w in tr_pred[rand_idx::batch_size].tolist():
            print_str_reference += tgt_reverse_dictionary[w] + ' '
            if tgt_reverse_dictionary[w] == '</s>':
                break
        print(print_str_reference)
        print("bleu score", bleu(print_str_predict, print_str_reference, [1]))

    avg_loss += l

    if (step + 1) % 200 == 0:
        print('============= Step ', str(step + 1), ' =============')
        print('\t Loss: ', avg_loss / 500.0)
        # save_path = saver.save(sess, "/logs/testmodel")
        # print("Model saved in path: %s" % save_path)
        avg_file.write(str(avg_loss))

        avg_loss = 0.0
        sess.run(inc_gstep)

avg_file.close()
Ejemplo n.º 39
0
def writeFeatureVector():
    hypothesis_sentences = namedtuple("hyp", "features, bleu")
    ref = [line.strip().split() for line in open(opts.ref)][:sys.maxint]
    src_dev = [line.strip().split("|||")[1] for line in open(opts.src_dev)][:sys.maxint]
    
    sys.stderr.write("reading dev data...")
    nbests = [[] for _ in ref]
    all_hyps = [pair.split(' ||| ') for pair in open(opts.dev)]
    num_sents = len(all_hyps) / 100
    for s in xrange(0, num_sents):
        hyps_for_one_sent = all_hyps[s * 100:s * 100 + 100]
        for (num, hyp, feats) in hyps_for_one_sent:           
            feats = [float(h.split('=')[1]) for h in feats.strip().split()]
            stats = tuple(bleu.bleu_stats(hyp.strip().split(), ref[s]))
            #TODO: add extra feature here
            # 1. adding number of target words
            enWordsNO = len(hyp.strip().split())
            feats.append(enWordsNO)
            
            #2. adding number of untranslated source words
            feats.append(calcNotTranslatedWords(src_dev[s], hyp))
            
            nbests[s].append(hypothesis_sentences(feats, bleu.bleu(stats)))
        
    # pairwise sampling. Figure 4 of the paper
    random.seed(0)
    sampling_hypothesis = namedtuple("sample", "hyp1, hyp2, gDiff")
    def sampling():
        V = []
        for _ in xrange(opts.tau):
            c1 = random.choice(nbest)
            c2 = random.choice(nbest)
            if c1 != c2 and math.fabs(c1.bleu - c2.bleu) > opts.alpha:
                V.append(sampling_hypothesis(c1, c2, math.fabs(c1.bleu - c2.bleu))) 
        return V
    
    x = []
    nbest_count = 0
    for nbest in nbests:
        nbest_count = nbest_count +1
        
        V = sampling()
        sortedV = sorted(V , key=lambda h: h.gDiff, reverse=True)[:opts.xi]  
        x_count = 0
        for idx, sample in enumerate(sortedV):
            x_count = x_count + 1
             
            tmp = [c1j-c2j for c1j,c2j in zip(sample.hyp1.features, sample.hyp2.features)]
            tmp.append(cmp(sample.hyp1.bleu , sample.hyp2.bleu))
            x.append(tmp)
            tmp = [c2j-c1j for c1j,c2j in zip(sample.hyp1.features, sample.hyp2.features)]
            tmp.append(cmp(sample.hyp2.bleu , sample.hyp1.bleu))
            x.append(tmp)
            
        if x_count != opts.xi: 
            sys.stderr.write("%d\n" % (x_count))
    
        
    #writing feature vector
    for f in x:
        print ",".join(str(f0) for f0 in f)
Ejemplo n.º 40
0
 def Score(self, hyp, ref):
   stats = [0 for i in xrange(10)]
   stats = [sum(scores) for scores in zip(stats, bleu.bleu_stats(hyp,ref))]
   return bleu.bleu(stats)
def cal_store(ref, system):
	stats = [0 for i in xrange(10)]
	for (r,s) in zip(ref, system):
		stats = [sum(scores) for scores in zip(stats, bleu.bleu_stats(s,r))]
	return bleu.bleu(stats)