Ejemplo n.º 1
0
def get_ed_score(h, r):
    h = h.split()
    r = r.split()
    return ed.edratio(h, r)
Ejemplo n.º 2
0
        )  # making a set of all possible target tokens that appear with source
        n_e.update(
            tokens_target
        )  # adding all tokens of the target sentence a potential translations for source token 'e'
        initial_translation[e] = n_e  #saving in a map
if method == 'uniform':
    for k, v in initial_translation.iteritems(
    ):  # walking through the map and setting initial translation probability uniformaly.
        for v_es in v:
            translations[v_es, k] = 1.0 / len(v)
            #print 'initial t:'
            #pp(translations)
else:
    """
    What if we dont set the initial translation  probabilities uniformly?
    look at: http://research.microsoft.com/pubs/150581/acl11.pdf
    """
    add_delta = 1.0
    for k, v in initial_translation.iteritems():
        print 'setting intial for ', k
        edr_k = map(lambda t: ed.edratio(t, k) + add_delta, v)
        sum_edr = sum(edr_k)
        for v_es, edr_es in zip(v, edr_k):
            translations[v_es, k] = edr_es / sum_edr

writer = open(save, 'w')
for k, v in translations.iteritems():
    writer.write(str(' '.join(k)) + '\t' + str(v) + '\n')
writer.flush()
writer.close()
    corpus_target[k] = tokens_target
    for e in tokens_source:
        n_e = initial_translation.get(e, set()) # making a set of all possible target tokens that appear with source
        n_e.update(tokens_target) # adding all tokens of the target sentence a potential translations for source token 'e'
        initial_translation[e] = n_e  #saving in a map
if method == 'uniform':
    for k, v in initial_translation.iteritems():  # walking through the map and setting initial translation probability uniformaly.
        for v_es in v:
            translations[v_es, k] = 1.0 / len(v)
            #print 'initial t:'
            #pp(translations)
else:
    """
    What if we dont set the initial translation  probabilities uniformly?
    look at: http://research.microsoft.com/pubs/150581/acl11.pdf
    """
    add_delta = 1.0
    for k, v in initial_translation.iteritems():
        print 'setting intial for ', k
        edr_k = map(lambda t: ed.edratio(t, k) + add_delta, v)
        sum_edr = sum(edr_k)
        for v_es, edr_es in zip(v, edr_k):
            translations[v_es, k] = edr_es / sum_edr

writer = open(save, 'w')
for k, v in translations.iteritems():
    writer.write(str(' '.join(k)) + '\t' + str(v) + '\n')
writer.flush()
writer.close()