Example #1
0
def backtrack_k(bins,paths):
    partial_scores = []
    translated_sentences = []
    tags = heap_lm_k.FT.tags
    for path in paths:
        partial_score = bins[0].data[0].partial_score
        translated_sentence = []
        father_key = bins[0].data[0].getKey()
        for i in xrange(len(path)):
            bin_id = path[i][0]
            state_id = path[i][1]
            ft_id = path[i][2]
            state = bins[bin_id].data[state_id]
            father_tuple = state.fathers[ft_id]
            assert( father_key == father_tuple[tags.FATHER_KEY].getKey())
            e_pharse = father_tuple[tags.E_PHRASE]
            ps = father_tuple[tags.PARTIAL_SCORE]
            partial_score = array_plus(partial_score,ps)
            translated_sentence.append(e_pharse)
            father_key = state.getKey()
        partial_scores.append(partial_score)
        translated_sentences.append(translated_sentence)
    return partial_scores, translated_sentences
Example #2
0
def decode_k(f_sentence, phrase_table, seen_words, lm_model,
           lm_weight, d_weight, d_limit, beam_size, num_feature, tempFilePath, debug=False, k_best = 1):
    '''generate the k-best translations
    
    return partial_scores, translated_sentences
    partial_scores: [[feature_value]*number_features] * k_best
    translated_sentences : [[e_phrase] * num_phrases ] * k_best
    '''


    start = datetime.now()

    # using monotone to generate future cost and transition options
    fcost, options, new_phrase_table = future_cost_and_options(
        f_sentence, phrase_table, seen_words, num_feature)

    new_phrase_table = lmize(lm_model, lm_weight, new_phrase_table)



    if debug:
        for key in new_phrase_table:
            s = new_phrase_table[key][0]
            print key, s
        print sorted(options)

    # beam search

    bins = []
    for i in xrange(len(f_sentence) + 1):
        b = heap_lm_k.Heap(beam_size,k_best)
        bins.append(b)

    empty = heap_lm_k.State((), 0, '<s>', 0, 0,[0.0]*num_feature)
    bins[0].add(empty)

    for i in xrange(len(f_sentence)):
        b = bins[i]

        for k in xrange(b.size):
            state = b.data[k]

            for option in options:
                cover = state.cover
                j = state.j
                d = option[0] - j
                last_e = state.last_e

                if (not cross(cover, option)) and (abs(d) <= d_limit):

                    # whether current f_pharse is the last part un-translated 
                    is_end = False
                    new_cover = merge(cover, option)
                    if len(new_cover) == len(f_sentence):
                        is_end = True

                    # reorder the e_pharse based on last_e LM score
                    f_phrase = tuple(f_sentence[option[0]:option[1]])
                    items = new_phrase_table[f_phrase]
                    items = lm_order_list(
                        lm_model, items, lm_weight, last_e, is_end,limit = 10)

                    # add into the bins
                                        
                    for item in items:
                        t_score = item[0]  # lm score has already incorporated
                        d_score = - abs(d) * d_weight  # note!
                        score = d_score + t_score + state.s

                        partial_score = list(item[2])
                        partial_score.append( - abs(d) )
                        new_partial_score = array_plus(partial_score, state.partial_score)

                        e_phrase = item[1]
                        new_last_e = e_phrase[-1]
                        new_j = option[1]
                        new_h = get_future_cost(fcost, new_cover, len(f_sentence))


                        child_state = heap_lm_k.State(
                            new_cover, new_j, new_last_e, score, new_h, new_partial_score)
                        child_state.e_phrase = e_phrase
                        delta = (child_state.f, child_state.s - state.s, child_state.e_phrase,array_minus(child_state.partial_score, state.partial_score),state)
                        child_state.fathers = [delta,]
                        n_cover = len(new_cover)
                        bins[n_cover].add(child_state)


    if debug:
        for b in bins:
            print b.data

    paths = _mcr.get_k_best_paths(bins,k_best,tempFilePath)
    partial_scores, translated_sentence = _mcr.backtrack_k(bins,paths)

    end = datetime.now()
    print end-start
    
    return partial_scores, translated_sentence
Example #3
0
def decode_k(f_sentence,
             phrase_table,
             seen_words,
             lm_model,
             lm_weight,
             d_weight,
             d_limit,
             beam_size,
             num_feature,
             tempFilePath,
             debug=False,
             k_best=1):
    '''generate the k-best translations
    
    return partial_scores, translated_sentences
    partial_scores: [[feature_value]*number_features] * k_best
    translated_sentences : [[e_phrase] * num_phrases ] * k_best
    '''

    start = datetime.now()

    # using monotone to generate future cost and transition options
    fcost, options, new_phrase_table = future_cost_and_options(
        f_sentence, phrase_table, seen_words, num_feature)

    new_phrase_table = lmize(lm_model, lm_weight, new_phrase_table)

    if debug:
        for key in new_phrase_table:
            s = new_phrase_table[key][0]
            print key, s
        print sorted(options)

    # beam search

    bins = []
    for i in xrange(len(f_sentence) + 1):
        b = heap_lm_k.Heap(beam_size, k_best)
        bins.append(b)

    empty = heap_lm_k.State((), 0, '<s>', 0, 0, [0.0] * num_feature)
    bins[0].add(empty)

    for i in xrange(len(f_sentence)):
        b = bins[i]

        for k in xrange(b.size):
            state = b.data[k]

            for option in options:
                cover = state.cover
                j = state.j
                d = option[0] - j
                last_e = state.last_e

                if (not cross(cover, option)) and (abs(d) <= d_limit):

                    # whether current f_pharse is the last part un-translated
                    is_end = False
                    new_cover = merge(cover, option)
                    if len(new_cover) == len(f_sentence):
                        is_end = True

                    # reorder the e_pharse based on last_e LM score
                    f_phrase = tuple(f_sentence[option[0]:option[1]])
                    items = new_phrase_table[f_phrase]
                    items = lm_order_list(lm_model,
                                          items,
                                          lm_weight,
                                          last_e,
                                          is_end,
                                          limit=10)

                    # add into the bins

                    for item in items:
                        t_score = item[0]  # lm score has already incorporated
                        d_score = -abs(d) * d_weight  # note!
                        score = d_score + t_score + state.s

                        partial_score = list(item[2])
                        partial_score.append(-abs(d))
                        new_partial_score = array_plus(partial_score,
                                                       state.partial_score)

                        e_phrase = item[1]
                        new_last_e = e_phrase[-1]
                        new_j = option[1]
                        new_h = get_future_cost(fcost, new_cover,
                                                len(f_sentence))

                        child_state = heap_lm_k.State(new_cover, new_j,
                                                      new_last_e, score, new_h,
                                                      new_partial_score)
                        child_state.e_phrase = e_phrase
                        delta = (child_state.f, child_state.s - state.s,
                                 child_state.e_phrase,
                                 array_minus(child_state.partial_score,
                                             state.partial_score), state)
                        child_state.fathers = [
                            delta,
                        ]
                        n_cover = len(new_cover)
                        bins[n_cover].add(child_state)

    if debug:
        for b in bins:
            print b.data

    paths = _mcr.get_k_best_paths(bins, k_best, tempFilePath)
    partial_scores, translated_sentence = _mcr.backtrack_k(bins, paths)

    end = datetime.now()
    print end - start

    return partial_scores, translated_sentence