Beispiel #1
0
    def search(self, seq, n_samples, ignore_unk=False, minlen=1):
        c = self.comp_repr(seq)[0]
        states = map(lambda x : x[None, :], self.comp_init_states(c))
        dim = states[0].shape[1]

        num_levels = len(states)

        fin_trans = []
        fin_costs = []

        trans = [[]]
        costs = [0.0]

        for k in range(3 * len(seq)):
            if n_samples == 0:
                break

            # Compute probabilities of the next words for
            # all the elements of the beam.
            beam_size = len(trans)
            last_words = (numpy.array(map(lambda t : t[-1], trans))
                    if k > 0
                    else numpy.zeros(beam_size, dtype="int64"))
            log_probs = numpy.log(self.comp_next_probs(c, k, last_words, *states)[0])

            # Adjust log probs according to search restrictions
            if ignore_unk:
                log_probs[:,self.unk_id] = -numpy.inf
            # TODO: report me in the paper!!!
            if k < minlen:
                log_probs[:,self.eos_id] = -numpy.inf

            # Find the best options by calling argpartition of flatten array
            next_costs = numpy.array(costs)[:, None] - log_probs
            flat_next_costs = next_costs.flatten()
            best_costs_indices = argpartition(
                    flat_next_costs.flatten(),
                    n_samples)[:n_samples]

            # Decypher flatten indices
            voc_size = log_probs.shape[1]
            trans_indices = best_costs_indices / voc_size
            word_indices = best_costs_indices % voc_size
            costs = flat_next_costs[best_costs_indices]

            # Form a beam for the next iteration
            new_trans = [[]] * n_samples
            new_costs = numpy.zeros(n_samples)
            new_states = [numpy.zeros((n_samples, dim), dtype="float32") for level
                    in range(num_levels)]
            inputs = numpy.zeros(n_samples, dtype="int64")
            for i, (orig_idx, next_word, next_cost) in enumerate(
                    zip(trans_indices, word_indices, costs)):
                new_trans[i] = trans[orig_idx] + [next_word]
                new_costs[i] = next_cost
                for level in range(num_levels):
                    new_states[level][i] = states[level][orig_idx]
                inputs[i] = next_word
            new_states = self.comp_next_states(c, k, inputs, *new_states)

            # Filter the sequences that end with end-of-sequence character
            trans = []
            costs = []
            indices = []
            for i in range(n_samples):
                if new_trans[i][-1] != self.enc_dec.state['null_sym_target']:
                    trans.append(new_trans[i])
                    costs.append(new_costs[i])
                    indices.append(i)
                else:
                    n_samples -= 1
                    fin_trans.append(new_trans[i])
                    fin_costs.append(new_costs[i])
            states = map(lambda x : x[indices], new_states)

        # Dirty tricks to obtain any translation
        if not len(fin_trans):
            if ignore_unk:
                logger.warning("Did not manage without UNK")
                return self.search(seq, n_samples, False, minlen)
            elif n_samples < 500:
                logger.warning("Still no translations: try beam size {}".format(n_samples * 2))
                return self.search(seq, n_samples * 2, False, minlen)
            else:
                logger.error("Translation failed")

        fin_trans = numpy.array(fin_trans)[numpy.argsort(fin_costs)]
        fin_costs = numpy.array(sorted(fin_costs))
        return fin_trans, fin_costs
Beispiel #2
0
    def search(self, seq, n_samples, ignore_unk=False, minlen=1, compute_alignment=False, have_source = False):

        x = []
        last_split = -1
        for i in xrange(len(seq)):
            if seq[i] == self.split_id:
                tmp = copy.deepcopy(seq[last_split+1:i+1])
                x.append(tmp)
                last_split = i
        assert self.num_systems == len(x)
        for i in xrange(self.num_systems):
            x[i][-1]=self.source_eos_id
        c = self.comp_repr(*x)#[0]
        '''
        print len(c)
        for i in c:
            print i.shape
        '''
        #print self.get_sample(1,5,1,*x)
        states = map(lambda x : x[None, :], self.comp_init_states(*c))
        #c = numpy.concatenate(c, axis=0)
        dim = states[0].shape[1]

        num_levels = len(states)

        fin_trans = []
        fin_costs = []
        

        trans = [[]]
        costs = [0.0]

        if have_source:
            minlen = (len(x[0])-1)/2
            #print minlen
        else:
            minlen = (len(seq)-self.num_systems)/self.num_systems/2

        if compute_alignment:
            fin_aligns = []
            aligns = [[]]

        for k in range(6 * minlen):
            if n_samples == 0:
                break

            # Compute probabilities of the next words for
            # all the elements of the beam.
            beam_size = len(trans)
            last_words = (numpy.array(map(lambda t : t[-1], trans))
                    if k > 0
                    else numpy.zeros(beam_size, dtype="int64"))

            if compute_alignment:
                align = self.comp_align(k, last_words, *(states+c))
            log_probs = numpy.log(self.comp_next_probs(k, last_words, *(states+c))[0])


            #print log_probs

            # Adjust log probs according to search restrictions
            if ignore_unk:
                log_probs[:,self.unk_id] = -numpy.inf
            # TODO: report me in the paper!!!
            if k < minlen:
                log_probs[:,self.eos_id] = -numpy.inf

            # Find the best options by calling argpartition of flatten array
            next_costs = numpy.array(costs)[:, None] - log_probs
            flat_next_costs = next_costs.flatten()
            best_costs_indices = argpartition(
                    flat_next_costs.flatten(),
                    n_samples)[:n_samples]

            #print best_costs_indices

            # Decypher flatten indices
            voc_size = log_probs.shape[1]
            trans_indices = best_costs_indices / voc_size
            word_indices = best_costs_indices % voc_size
            costs = flat_next_costs[best_costs_indices]

            #print trans_indices
            #print word_indices

            # Form a beam for the next iteration
            new_trans = [[]] * n_samples
            new_costs = numpy.zeros(n_samples)
            new_states = [numpy.zeros((n_samples, dim), dtype="float32") for level
                    in range(num_levels)]
            if compute_alignment:
                new_aligns = [[]] * n_samples
            inputs = numpy.zeros(n_samples, dtype="int64")
            for i, (orig_idx, next_word, next_cost) in enumerate(
                    zip(trans_indices, word_indices, costs)):
                new_trans[i] = trans[orig_idx] + [next_word]
                new_costs[i] = next_cost
                if compute_alignment:
                    new_aligns[i] = aligns[orig_idx]+[align[:,orig_idx]]
                for level in range(num_levels):
                    new_states[level][i] = states[level][orig_idx]
                inputs[i] = next_word
            new_states = self.comp_next_states(k, inputs, *(new_states+c))

            # Filter the sequences that end with end-of-sequence character
            trans = []
            costs = []
            aligns = []
            indices = []
            for i in range(n_samples):
                if new_trans[i][-1] != self.enc_dec.state['null_sym_target']:
                    trans.append(new_trans[i])
                    costs.append(new_costs[i])
                    if compute_alignment:
                        aligns.append(new_aligns[i])
                    indices.append(i)
                else:
                    n_samples -= 1
                    fin_trans.append(new_trans[i])
                    fin_costs.append(new_costs[i])
                    if compute_alignment:
                        fin_aligns.append(new_aligns[i])
            states = map(lambda x : x[indices], new_states)


        # Dirty tricks to obtain any translation
        if not len(fin_trans):
            if ignore_unk:
                logger.warning("Did not manage without UNK")
                return self.search(seq, n_samples, False, minlen)
            elif n_samples < 100:
                logger.warning("Still no translations: try beam size {}".format(n_samples * 2))
                return self.search(seq, n_samples * 2, False, minlen)
            else:
                logger.error("Translation failed")

        fin_trans = numpy.array(fin_trans)[numpy.argsort(fin_costs)]
        if compute_alignment:
            fin_aligns = numpy.array(fin_aligns)[numpy.argsort(fin_costs)]
        fin_costs = numpy.array(sorted(fin_costs))
        if compute_alignment:
            return fin_trans, fin_aligns, fin_costs
        else:
            return fin_trans, fin_costs
Beispiel #3
0
    def search(self,
               seq,
               n_samples,
               eos_id,
               unk_id,
               ignore_unk=False,
               minlen=1,
               final=False):
        num_models = len(self.enc_decs)
        c = []
        for i in xrange(num_models):
            c.append(self.comp_repr[i](seq)[0])
        states = []
        for i in xrange(num_models):
            states.append(
                map(lambda x: x[None, :], self.comp_init_states[i](c[i])))
        dim = states[0][0].shape[1]

        num_levels = len(states[0])

        fin_trans = []
        fin_costs = []

        trans = [[]]
        costs = [0.0]

        for k in range(3 * len(seq)):
            if n_samples == 0:
                break

            # Compute probabilities of the next words for
            # all the elements of the beam.
            beam_size = len(trans)
            last_words = (numpy.array(map(lambda t: t[-1], trans))
                          if k > 0 else numpy.zeros(beam_size, dtype="int64"))
            #log_probs = (numpy.log(self.comp_next_probs_0(c, k, last_words, *states)[0]) +  numpy.log(self.comp_next_probs_1(c, k, last_words, *states)[0]))/2.
            log_probs = sum(
                numpy.log(self.comp_next_probs[i]
                          (c[i], k, last_words, *states[i])[0])
                for i in xrange(num_models)) / num_models

            # Adjust log probs according to search restrictions
            if ignore_unk:
                log_probs[:, unk_id] = -numpy.inf
            # TODO: report me in the paper!!!
            if k < minlen:
                log_probs[:, eos_id] = -numpy.inf

            # Find the best options by calling argpartition of flatten array
            next_costs = numpy.array(costs)[:, None] - log_probs
            flat_next_costs = next_costs.flatten()
            best_costs_indices = argpartition(flat_next_costs.flatten(),
                                              n_samples)[:n_samples]

            # Decypher flatten indices
            voc_size = log_probs.shape[1]
            trans_indices = best_costs_indices / voc_size
            word_indices = best_costs_indices % voc_size
            costs = flat_next_costs[best_costs_indices]

            # Form a beam for the next iteration
            new_trans = [[]] * n_samples
            new_costs = numpy.zeros(n_samples)
            new_states = []
            for i in xrange(num_models):
                new_states.append([
                    numpy.zeros((n_samples, dim), dtype="float32")
                    for level in range(num_levels)
                ])
            inputs = numpy.zeros(n_samples, dtype="int64")
            for i, (orig_idx, next_word, next_cost) in enumerate(
                    zip(trans_indices, word_indices, costs)):
                new_trans[i] = trans[orig_idx] + [next_word]
                new_costs[i] = next_cost
                for level in range(num_levels):
                    for j in xrange(num_models):
                        new_states[j][level][i] = states[j][level][orig_idx]
                inputs[i] = next_word
            for i in xrange(num_models):
                new_states[i] = self.comp_next_states[i](c[i], k, inputs,
                                                         *new_states[i])

            # Filter the sequences that end with end-of-sequence character
            trans = []
            costs = []
            indices = []
            for i in range(n_samples):
                if new_trans[i][-1] != eos_id:
                    trans.append(new_trans[i])
                    costs.append(new_costs[i])
                    indices.append(i)
                else:
                    n_samples -= 1
                    fin_trans.append(new_trans[i])
                    fin_costs.append(new_costs[i])
            for i in xrange(num_models):
                states[i] = map(lambda x: x[indices], new_states[i])

        # Dirty tricks to obtain any translation
        if not len(fin_trans):
            if ignore_unk:
                logger.warning("Did not manage without UNK")
                return self.search(seq,
                                   n_samples,
                                   eos_id=eos_id,
                                   unk_id=unk_id,
                                   ignore_unk=False,
                                   minlen=minlen,
                                   final=final)
            elif not final:
                logger.warning(
                    "No appropriate translations: using larger vocabulary")
                raise RuntimeError
            else:
                logger.warning(
                    "No appropriate translation: return empty translation")
                fin_trans = [[]]
                fin_costs = [0.0]

        fin_trans = numpy.array(fin_trans)[numpy.argsort(fin_costs)]
        fin_costs = numpy.array(sorted(fin_costs))
        return fin_trans, fin_costs
Beispiel #4
0
    def search_with_truth(self, seq, truth, n_samples, ignore_unk=False, minlen=1, idict=None):
        for ww in truth:
            print idict[ww],
        print ''
        c = self.comp_repr(seq)[0]
        # one representation at each encoding time step
        # c.shape[0] = len(seq)

        # states is a dim_dimensional vector, output of initialization unit
        states = map(lambda x : x[None, :], self.comp_init_states(c))
        # dimension of hidden layer
        dim = states[0].shape[1]

        # always 1 in case of non-deep GRU
        num_levels = len(states)

        fin_trans = []
        fin_costs = []

        trans = [[]]
        costs = [0.0]

        # maximum translation length allowed is 3*len(source)
        for k in range(3 * len(seq)):
            if n_samples == 0:
                # all translation ended
                break

            # Compute probabilities of the next words for
            # all the elements of the beam.
            beam_size = len(trans)
            last_words = (numpy.array(map(lambda t : t[-1], trans))
                    if k > 0
                    else numpy.zeros(beam_size, dtype="int64"))
            log_probs = numpy.log(self.comp_next_probs(c, k, last_words, *states)[0])

            print str(k) + '\t' + '|', 
            if k > 0 and k <= len(truth):
                if truth[k - 1] < 30000:
                    print idict[truth[k - 1]] + '\t' + '|',
                else:
                    print '<EOS>' + '\t',
                for ww in last_words:
                    print idict[ww] + ' ',
                print ''
            else:
                print last_words


            # Adjust log probs according to search restrictions
            if ignore_unk:
                log_probs[:,self.unk_id] = -numpy.inf
            # TODO: report me in the paper!!!
            if k < minlen:
                log_probs[:,self.eos_id] = -numpy.inf

            # Find the best options by calling argpartition of flatten array
            next_costs = numpy.array(costs)[:, None] - log_probs
            flat_next_costs = next_costs.flatten()
            best_costs_indices = argpartition(
                    flat_next_costs.flatten(),
                    n_samples)[:n_samples]

            # Decypher flatten indices
            voc_size = log_probs.shape[1]
            # which beam?
            trans_indices = best_costs_indices / voc_size
            # which word?
            word_indices = best_costs_indices % voc_size
            costs = flat_next_costs[best_costs_indices]

            # Form a beam for the next iteration
            new_trans = [[]] * (n_samples)
            new_costs = numpy.zeros(n_samples)
            new_states = [numpy.zeros((n_samples, dim), dtype="float32") for level
                    in range(num_levels)]
            inputs = numpy.zeros(n_samples, dtype="int64")
            for i, (orig_idx, next_word, next_cost) in enumerate(
                    zip(trans_indices, word_indices, costs)):
                new_trans[i] = trans[orig_idx] + [next_word]
                new_costs[i] = next_cost
                for level in range(num_levels):
                    new_states[level][i] = states[level][orig_idx]
                inputs[i] = next_word

            new_states = self.comp_next_states(c, k, inputs, *new_states)

            # Filter the sequences that end with end-of-sequence character
            trans = []
            costs = []
            indices = []
            for i in range(n_samples):
                if new_trans[i][-1] != self.enc_dec.state['null_sym_target']:
                    trans.append(new_trans[i])
                    costs.append(new_costs[i])
                    indices.append(i)
                else:
                    n_samples -= 1
                    fin_trans.append(new_trans[i])
                    fin_costs.append(new_costs[i])

            # beam size is naturally reduced when multiple best 
            # new trans came from same beam
            states = map(lambda x : x[indices], new_states)

        # Dirty tricks to obtain any translation
        if not len(fin_trans):
            if ignore_unk:
                logger.warning("Did not manage without UNK")
                return self.search_with_truth(seq, truth, n_samples, False, minlen, idict)
            elif n_samples < 500:
                logger.warning("Still no translations: try beam size {}".format(n_samples * 2))
                return self.search_with_truth(seq, truth, n_samples * 2, False, minlen, idict)
            else:
                logger.error("Translation failed")

        fin_trans = numpy.array(fin_trans)[numpy.argsort(fin_costs)]
        fin_costs = numpy.array(sorted(fin_costs))
        return fin_trans, fin_costs
Beispiel #5
0
    def search(self, seq, n_samples, ignore_unk=False, minlen=1):
        print seq
        rw, ww = self.enc_dec.view_encoder_weight()(seq)

        #print rw.shape
        #print ww.shape
        #print rw
        #print ww
        #visual2d(rw[:,:len(seq)])
        #visual2d(ww[:,:len(seq)])
        visual2d(rw)
        visual2d(ww)
        
        
        c, m = self.comp_repr(seq)
        #visual2d(c)
        #visual2d(m[0])
        visual2d(m[-1])
        #print c
        #print m
        print numpy.abs(c).sum(axis=1)
        print numpy.abs(m).sum(axis=2).sum(axis=1)
        states = map(lambda x : x[None, :], self.comp_init_states(c))

        #print states
        
        mem = m[-1:]
        dim = states[0].shape[1]

        num_levels = len(states)

        fin_trans = []
        fin_costs = []

        trans = [[]]
        costs = [0.0]
        derw = []
        deww = []

        for k in range(3 * len(seq)):
            #raw_input('press any key to continue')
            
            if n_samples == 0:
                break

            # Compute probabilities of the next words for
            # all the elements of the beam.
            beam_size = len(trans)
            last_words = (numpy.array(map(lambda t : t[-1], trans))
                    if k > 0
                    else numpy.zeros(beam_size, dtype="int64"))
            

            log_probs = numpy.log(self.comp_next_probs(c, k, last_words, mem, *states)[0])

            # Adjust log probs according to search restrictions
            if ignore_unk:
                log_probs[:,self.unk_id] = -numpy.inf
            # TODO: report me in the paper!!!
            if k < minlen:
                log_probs[:,self.eos_id] = -numpy.inf

            # Find the best options by calling argpartition of flatten array
            next_costs = numpy.array(costs)[:, None] - log_probs
            flat_next_costs = next_costs.flatten()
            best_costs_indices = argpartition(
                    flat_next_costs.flatten(),
                    n_samples)[:n_samples]

            # Decypher flatten indices
            voc_size = log_probs.shape[1]
            trans_indices = best_costs_indices / voc_size
            word_indices = best_costs_indices % voc_size
            costs = flat_next_costs[best_costs_indices]

            # Form a beam for the next iteration
            new_trans = [[]] * n_samples
            new_costs = numpy.zeros(n_samples)
            new_states = [numpy.zeros((n_samples, dim), dtype="float32") for level
                    in range(num_levels)]
            new_mem = numpy.zeros((n_samples,mem.shape[1],mem.shape[2]), dtype="float32")
            inputs = numpy.zeros(n_samples, dtype="int64")
            for i, (orig_idx, next_word, next_cost) in enumerate(
                    zip(trans_indices, word_indices, costs)):
                new_trans[i] = trans[orig_idx] + [next_word]
                new_costs[i] = next_cost
                for level in range(num_levels):
                    new_states[level][i] = states[level][orig_idx]
                inputs[i] = next_word
                new_mem[i] = mem[orig_idx]
                h,mem,rw,ww = self.comp_next_debug(c, k, inputs, mem, *states)
            print h.shape,mem.shape,rw.shape,ww.shape
            derw.append(rw[0])
            deww.append(ww[0])
            result = self.comp_next_states(c, k, inputs, new_mem,*new_states)
            new_states =[result[0]]
            new_mem = result[1]
            #print new_states
            print new_mem.shape
            #visual2d(new_mem[0])

            # Filter the sequences that end with end-of-sequence character
            trans = []
            costs = []
            indices = []
            for i in range(n_samples):
                if new_trans[i][-1] != self.enc_dec.state['null_sym_target']:
                    trans.append(new_trans[i])
                    costs.append(new_costs[i])
                    indices.append(i)
                else:
                    n_samples -= 1
                    fin_trans.append(new_trans[i])
                    fin_costs.append(new_costs[i])
            states = map(lambda x : x[indices], new_states)
            mem = map(lambda x : x[indices], [new_mem])[0]

        print '--decoder weight--'
        #print derw
        #visual2d(numpy.asarray(derw)[:,:len(seq)])
        #visual2d(numpy.asarray(deww)[:,:len(seq)])
        visual2d(numpy.asarray(derw))
        visual2d(numpy.asarray(deww))
        # Dirty tricks to obtain any translation
        if not len(fin_trans):
            if ignore_unk:
                logger.warning("Did not manage without UNK")
                return self.search(seq, n_samples, False, minlen)
            elif n_samples < 500:
                logger.warning("Still no translations: try beam size {}".format(n_samples * 2))
                return self.search(seq, n_samples * 2, False, minlen)
            else:
                logger.error("Translation failed")

        fin_trans = numpy.array(fin_trans)[numpy.argsort(fin_costs)]
        fin_costs = numpy.array(sorted(fin_costs))
        return fin_trans, fin_costs
    def search(self, sen, seq, n_samples, ignore_unk=False, minlen=1):
        src_seq = sen.split(' ')
        uni_trans_set = self.get_uni_trans(src_seq)
        
        #print >> sys.stderr, uni_trans_set

        c = self.comp_repr(seq)[0]
        states = map(lambda x : x[None, :], self.comp_init_states(c))
        dim = states[0].shape[1]

        num_levels = len(states)

        fin_trans = []
        fin_costs = []
        fin_str_trans = []
        #fin_infos = []

        fin_aligns = []
        fin_lm_costs = []
        fin_tm_costs = []
        fin_rnn_costs = []
        fin_unk_nums = []

        trans = [[]]
        costs = [0.0]
        str_trans = [[]]
        #infos = [[]]

        lm_costs = [[]]
        tm_costs = [[]]
        rnn_costs = [[]]
        unk_nums = [[]]
        aligns = [[]]

        for k in range(3 * len(seq)):
            if n_samples == 0:
                break

            # Compute probabilities of the next words for
            # all the elements of the beam.
            beam_size = len(trans)
            last_words = (numpy.array(map(lambda t : t[-1], trans))
                    if k > 0
                    else numpy.zeros(beam_size, dtype="int64"))

            next_probs, aln_score_mat = self.comp_next_probs(c, k, last_words, *states)
            log_probs = numpy.log(next_probs)

            # Adjust log probs according to search restrictions
            if ignore_unk:
                log_probs[:,self.unk_id] = -numpy.inf
            if k < minlen:
                log_probs[:,self.eos_id] = -numpy.inf

            # Find the best options by calling argpartition of flatten array

            next_costs = numpy.array(costs)[:, None] - log_probs * self.weight_rnn

            flat_next_costs = next_costs.flatten()
            cands_costs_indices = argpartition(
                    flat_next_costs.flatten(),
                    n_samples * 100)[:n_samples * 100]

            # Decypher flatten indices
            voc_size = log_probs.shape[1]
            cands_trans_indices = cands_costs_indices / voc_size
            cands_word_indices = cands_costs_indices % voc_size
            cands_costs = flat_next_costs[cands_costs_indices]
            cands_lm_costs = numpy.zeros(len(cands_costs))
            cands_tm_costs = numpy.zeros(len(cands_costs))
            cands_unk_nums = numpy.zeros(len(cands_costs))
            cands_rnn_costs = (-1 * log_probs).flatten()[cands_costs_indices]

            unk_trans = {}
            #add SMT feature scores to costs
            for i, (orig_idx, next_word, next_cost) in enumerate(
                    zip(cands_trans_indices, cands_word_indices, cands_costs)):

                sorted_aln_idx = numpy.argsort(-aln_score_mat[:,orig_idx])[:3]
                aln_score_array = []
                for aln_idx in sorted_aln_idx:
                    aln_score_array.append([aln_idx, aln_score_mat[aln_idx, orig_idx]])

                lm_score = -self.get_lm_score(str_trans[orig_idx] + self.trg_i2w([next_word]))
                #tm_score, unk_tm_num = self.get_tm_score(src_seq, aln_score_mat[:,orig_idx], self.trg_i2w([next_word])[0])
                tm_score, unk_tm_num, _ = self.get_tm_score_new(src_seq, aln_score_array, self.trg_i2w([next_word])[0])
                tm_score = -tm_score

                if next_word == self.unk_id:
                    #lm_score = numpy.inf
                    #tm_score = numpy.inf
                    unk_trans[orig_idx] = 'UNK'
                    _unk_score = tm_score

                    for t in uni_trans_set:
                        _ls = -self.get_lm_score(str_trans[orig_idx] + [t])
                        #_ts, _unk_tm_num = self.get_tm_score(src_seq, aln_score_mat[:,orig_idx], t)
                        _ts, _unk_tm_num, match_idx = self.get_tm_score_new(src_seq, aln_score_array, t)
                        _ts = -_ts
                        if match_idx == 0 and _ls * self.weight_lm + _ts * self.weight_tm < lm_score * self.weight_lm + tm_score * self.weight_tm:
                            lm_score = _ls
                            tm_score = _ts
                            unk_tm_num = _unk_tm_num
                            unk_trans[orig_idx] = t

                #cands_rnn_costs[i] = cands_costs[i]
                cands_costs[i] += lm_score * self.weight_lm + tm_score * self.weight_tm + self.weight_wp
                cands_lm_costs[i] = lm_score
                cands_tm_costs[i] = tm_score
                cands_unk_nums[i] = unk_tm_num

            best_costs_indices = argpartition(cands_costs.flatten(), n_samples)[:n_samples]
            trans_indices = cands_trans_indices[best_costs_indices]
            word_indices = cands_word_indices[best_costs_indices]
            costs = cands_costs[best_costs_indices]
            _lm_costs = cands_lm_costs[best_costs_indices]
            _tm_costs = cands_tm_costs[best_costs_indices]
            _unk_nums = cands_unk_nums[best_costs_indices]
            _rnn_costs = cands_rnn_costs[best_costs_indices]

            # Form a beam for the next iteration
            new_trans = [[]] * n_samples
            new_str_trans = [[]] * n_samples
            new_costs = numpy.zeros(n_samples)
            new_lm_costs = [[]] * n_samples
            new_tm_costs = [[]] * n_samples
            new_rnn_costs = [[]] * n_samples
            new_unk_nums = [[]] * n_samples
            new_aligns = [[]] * n_samples
            new_states = [numpy.zeros((n_samples, dim), dtype="float32") for level
                    in range(num_levels)]
            inputs = numpy.zeros(n_samples, dtype="int64")
            for i, (orig_idx, next_word, next_cost, _lm_score, _tm_score, _unk_num, _rnn_cost) in enumerate(
                    zip(trans_indices, word_indices, costs, _lm_costs, _tm_costs, _unk_nums, _rnn_costs)):
                new_trans[i] = trans[orig_idx] + [next_word]
                if next_word == self.unk_id:
                    new_str_trans[i] = str_trans[orig_idx] + [unk_trans[orig_idx]]
                else:
                    new_str_trans[i] = str_trans[orig_idx] + self.trg_i2w([next_word])
                new_costs[i] = next_cost
                new_lm_costs[i] = lm_costs[orig_idx] + [_lm_score]
                new_tm_costs[i] = tm_costs[orig_idx] + [_tm_score]
                new_unk_nums[i] = unk_nums[orig_idx] + [_unk_num]
                new_rnn_costs[i] = rnn_costs[orig_idx] + [_rnn_cost]
                new_aligns[i] = aligns[orig_idx] + [aln_score_mat[:,orig_idx]]
                for level in range(num_levels):
                    new_states[level][i] = states[level][orig_idx]
                inputs[i] = next_word
            new_states = self.comp_next_states(c, k, inputs, *new_states)

            # Filter the sequences that end with end-of-sequence character
            trans = []
            costs = []
            str_trans = []
            #infos = []

            lm_costs = []
            tm_costs = []
            unk_nums = []
            rnn_costs = []
            aligns = []
            indices = []
            for i in range(n_samples):
                if new_trans[i][-1] != self.enc_dec.state['null_sym_target']:
                    trans.append(new_trans[i])
                    costs.append(new_costs[i])
                    str_trans.append(new_str_trans[i])
                    #infos.append(new_infos[i])
                    lm_costs.append(new_lm_costs[i])
                    tm_costs.append(new_tm_costs[i])
                    rnn_costs.append(new_rnn_costs[i])
                    unk_nums.append(new_unk_nums[i])
                    aligns.append(new_aligns[i])
                    indices.append(i)
                else:
                    n_samples -= 1
                    fin_trans.append(new_trans[i])
                    fin_costs.append(new_costs[i])
                    fin_str_trans.append(new_str_trans[i])
                    #fin_infos.append(new_infos[i])
                    fin_lm_costs.append(new_lm_costs[i])
                    fin_tm_costs.append(new_tm_costs[i])
                    fin_rnn_costs.append(new_rnn_costs[i])
                    fin_unk_nums.append(new_unk_nums[i])
                    fin_aligns.append(new_aligns[i])

            states = map(lambda x : x[indices], new_states)

        if not len(fin_trans):
            if ignore_unk:
                logger.warning("Did not manage without UNK")
                return self.search(sen, seq, n_samples, False, minlen)
            elif n_samples < 50:
                logger.warning("Still no translations: try beam size {}".format(n_samples * 2))
                return self.search(sen, seq, n_samples * 2, False, minlen)
            elif n_samples < 100:
                logger.warning("Still no translations: try beam size {}, and --ignore UNK".format(n_samples))
                return self.search(sen, seq, n_samples, True, minlen)
            else:
                logger.err("cannot find translations, return an unreliable result")
                fin_trans = trans
                fin_str_trans = str_trans
                fin_costs = costs
                fin_lm_costs = lm_costs
                fin_tm_costs = tm_costs
                fin_rnn_costs = rnn_costs
                fin_unk_nums = unk_nums
                fin_aligns = aligns
 
            #else:
            #    logger.error("Translation failed")

        fin_trans = numpy.array(fin_trans)[numpy.argsort(fin_costs)]
        fin_aligns = numpy.array(fin_aligns)[numpy.argsort(fin_costs)]
        fin_str_trans = numpy.array(fin_str_trans)[numpy.argsort(fin_costs)]
        fin_tm_costs = numpy.array(fin_tm_costs)[numpy.argsort(fin_costs)]
        fin_lm_costs = numpy.array(fin_lm_costs)[numpy.argsort(fin_costs)]
        fin_unk_nums = numpy.array(fin_unk_nums)[numpy.argsort(fin_costs)]
        fin_rnn_costs = numpy.array(fin_rnn_costs)[numpy.argsort(fin_costs)]
        fin_costs = numpy.array(sorted(fin_costs))

        return fin_trans, fin_costs, fin_aligns, fin_lm_costs, fin_tm_costs, fin_str_trans, fin_unk_nums, fin_rnn_costs
Beispiel #7
0
    def search(self, seq, n_samples, eos_id, unk_id, ignore_unk=False, minlen=1, final=False):
        num_models = len(self.enc_decs)
        c = []
        for i in xrange(num_models):
            c.append(self.comp_repr[i](seq)[0])
        states = []
        for i in xrange(num_models):
            states.append(map(lambda x : x[None, :], self.comp_init_states[i](c[i])))
        dim = states[0][0].shape[1]

        num_levels = len(states[0])

        fin_trans = []
        fin_costs = []

        trans = [[]]
        costs = [0.0]

        for k in range(3 * len(seq)):
            if n_samples == 0:
                break

            # Compute probabilities of the next words for
            # all the elements of the beam.
            beam_size = len(trans)
            last_words = (numpy.array(map(lambda t : t[-1], trans))
                    if k > 0
                    else numpy.zeros(beam_size, dtype="int64"))
            #log_probs = (numpy.log(self.comp_next_probs_0(c, k, last_words, *states)[0]) +  numpy.log(self.comp_next_probs_1(c, k, last_words, *states)[0]))/2.
            log_probs = sum(numpy.log(self.comp_next_probs[i](c[i], k, last_words, *states[i])[0]) for i in xrange(num_models))/num_models

            # Adjust log probs according to search restrictions
            if ignore_unk:
                log_probs[:,unk_id] = -numpy.inf
            # TODO: report me in the paper!!!
            if k < minlen:
                log_probs[:,eos_id] = -numpy.inf

            # Find the best options by calling argpartition of flatten array
            next_costs = numpy.array(costs)[:, None] - log_probs
            flat_next_costs = next_costs.flatten()
            best_costs_indices = argpartition(
                    flat_next_costs.flatten(),
                    n_samples)[:n_samples]

            # Decypher flatten indices
            voc_size = log_probs.shape[1]
            trans_indices = best_costs_indices / voc_size
            word_indices = best_costs_indices % voc_size
            costs = flat_next_costs[best_costs_indices]

            # Form a beam for the next iteration
            new_trans = [[]] * n_samples
            new_costs = numpy.zeros(n_samples)
            new_states = []
            for i in xrange(num_models):
                new_states.append([numpy.zeros((n_samples, dim), dtype="float32") for level
                    in range(num_levels)])
            inputs = numpy.zeros(n_samples, dtype="int64")
            for i, (orig_idx, next_word, next_cost) in enumerate(
                    zip(trans_indices, word_indices, costs)):
                new_trans[i] = trans[orig_idx] + [next_word]
                new_costs[i] = next_cost
                for level in range(num_levels):
                    for j in xrange(num_models):
                        new_states[j][level][i] = states[j][level][orig_idx]
                inputs[i] = next_word
            for i in xrange(num_models):
                new_states[i]=self.comp_next_states[i](c[i], k, inputs, *new_states[i])

            # Filter the sequences that end with end-of-sequence character
            trans = []
            costs = []
            indices = []
            for i in range(n_samples):
                if new_trans[i][-1] != eos_id:
                    trans.append(new_trans[i])
                    costs.append(new_costs[i])
                    indices.append(i)
                else:
                    n_samples -= 1
                    fin_trans.append(new_trans[i])
                    fin_costs.append(new_costs[i])
            for i in xrange(num_models):
                states[i]=map(lambda x : x[indices], new_states[i])

        # Dirty tricks to obtain any translation
        if not len(fin_trans):
            if ignore_unk:
                logger.warning("Did not manage without UNK")
                return self.search(seq, n_samples, eos_id=eos_id, unk_id=unk_id, ignore_unk=False, minlen=minlen, final=final)
            elif not final:
                logger.warning("No appropriate translations: using larger vocabulary")
                raise RuntimeError
            else:
                logger.warning("No appropriate translation: return empty translation")
                fin_trans=[[]]
                fin_costs = [0.0]
                

        fin_trans = numpy.array(fin_trans)[numpy.argsort(fin_costs)]
        fin_costs = numpy.array(sorted(fin_costs))
        return fin_trans, fin_costs
Beispiel #8
0
    def search_with_truth(self,
                          seq,
                          truth,
                          n_samples,
                          ignore_unk=False,
                          minlen=1,
                          idict=None):
        for ww in truth:
            print idict[ww],
        print ''
        c = self.comp_repr(seq)[0]
        # one representation at each encoding time step
        # c.shape[0] = len(seq)

        # states is a dim_dimensional vector, output of initialization unit
        states = map(lambda x: x[None, :], self.comp_init_states(c))
        # dimension of hidden layer
        dim = states[0].shape[1]

        # always 1 in case of non-deep GRU
        num_levels = len(states)

        fin_trans = []
        fin_costs = []

        trans = [[]]
        costs = [0.0]

        # maximum translation length allowed is 3*len(source)
        for k in range(3 * len(seq)):
            if n_samples == 0:
                # all translation ended
                break

            # Compute probabilities of the next words for
            # all the elements of the beam.
            beam_size = len(trans)
            last_words = (numpy.array(map(lambda t: t[-1], trans))
                          if k > 0 else numpy.zeros(beam_size, dtype="int64"))
            log_probs = numpy.log(
                self.comp_next_probs(c, k, last_words, *states)[0])

            print str(k) + '\t' + '|',
            if k > 0 and k <= len(truth):
                if truth[k - 1] < 30000:
                    print idict[truth[k - 1]] + '\t' + '|',
                else:
                    print '<EOS>' + '\t',
                for ww in last_words:
                    print idict[ww] + ' ',
                print ''
            else:
                print last_words

            # Adjust log probs according to search restrictions
            if ignore_unk:
                log_probs[:, self.unk_id] = -numpy.inf
            # TODO: report me in the paper!!!
            if k < minlen:
                log_probs[:, self.eos_id] = -numpy.inf

            # Find the best options by calling argpartition of flatten array
            next_costs = numpy.array(costs)[:, None] - log_probs
            flat_next_costs = next_costs.flatten()
            best_costs_indices = argpartition(flat_next_costs.flatten(),
                                              n_samples)[:n_samples]

            # Decypher flatten indices
            voc_size = log_probs.shape[1]
            # which beam?
            trans_indices = best_costs_indices / voc_size
            # which word?
            word_indices = best_costs_indices % voc_size
            costs = flat_next_costs[best_costs_indices]

            # Form a beam for the next iteration
            new_trans = [[]] * (n_samples)
            new_costs = numpy.zeros(n_samples)
            new_states = [
                numpy.zeros((n_samples, dim), dtype="float32")
                for level in range(num_levels)
            ]
            inputs = numpy.zeros(n_samples, dtype="int64")
            for i, (orig_idx, next_word, next_cost) in enumerate(
                    zip(trans_indices, word_indices, costs)):
                new_trans[i] = trans[orig_idx] + [next_word]
                new_costs[i] = next_cost
                for level in range(num_levels):
                    new_states[level][i] = states[level][orig_idx]
                inputs[i] = next_word

            new_states = self.comp_next_states(c, k, inputs, *new_states)

            # Filter the sequences that end with end-of-sequence character
            trans = []
            costs = []
            indices = []
            for i in range(n_samples):
                if new_trans[i][-1] != self.enc_dec.state['null_sym_target']:
                    trans.append(new_trans[i])
                    costs.append(new_costs[i])
                    indices.append(i)
                else:
                    n_samples -= 1
                    fin_trans.append(new_trans[i])
                    fin_costs.append(new_costs[i])

            # beam size is naturally reduced when multiple best
            # new trans came from same beam
            states = map(lambda x: x[indices], new_states)

        # Dirty tricks to obtain any translation
        if not len(fin_trans):
            if ignore_unk:
                logger.warning("Did not manage without UNK")
                return self.search_with_truth(seq, truth, n_samples, False,
                                              minlen, idict)
            elif n_samples < 500:
                logger.warning(
                    "Still no translations: try beam size {}".format(
                        n_samples * 2))
                return self.search_with_truth(seq, truth, n_samples * 2, False,
                                              minlen, idict)
            else:
                logger.error("Translation failed")

        fin_trans = numpy.array(fin_trans)[numpy.argsort(fin_costs)]
        fin_costs = numpy.array(sorted(fin_costs))
        return fin_trans, fin_costs
Beispiel #9
0
    def search(self, seq, n_samples, ignore_unk=False, minlen=1, getRep=False):
        cdata = self.comp_repr(seq)
        #print len(cdata)
        c = cdata[0]

        forward_rester = cdata[1]
        forward_updater = cdata[2]
        backward_rester = cdata[3]
        backward_updater = cdata[4]

        max_forward_rester = numpy.amax(forward_rester, axis=1)
        max_backward_rester = numpy.amax(backward_rester, axis=1)

        max_forward_updater = numpy.amax(forward_updater, axis=1)
        max_backward_updater = numpy.amax(backward_updater, axis=1)

        for_retend = []
        back_retend = []

        for_uptend = []
        back_uptend = []

        for i in range(0, max_forward_rester.shape[0] - 1):
            for_retend.append(max_forward_rester[i + 1] -
                              max_forward_rester[i])

        for i in range(0, max_backward_rester.shape[0] - 1):
            back_retend.append(max_backward_rester[i] -
                               max_backward_rester[i + 1])

        for_retend = numpy.array(for_retend)
        back_retend = numpy.array(back_retend)

        for i in range(0, max_forward_updater.shape[0] - 1):
            for_uptend.append(max_forward_updater[i + 1] -
                              max_forward_updater[i])

        for i in range(0, max_backward_updater.shape[0] - 1):
            back_uptend.append(max_backward_updater[i] -
                               max_backward_updater[i + 1])

        for_uptend = numpy.array(for_uptend)
        back_uptend = numpy.array(back_uptend)

        print(for_retend + back_retend) / 2.0
        print(for_uptend + back_uptend) / 2.0

        print("----------------------------------------------")

        avg_forward_rester = numpy.sum(forward_rester,
                                       axis=1) / forward_rester.shape[1]
        avg_backward_rester = numpy.sum(backward_rester,
                                        axis=1) / backward_rester.shape[1]

        avg_forward_updater = numpy.sum(forward_updater,
                                        axis=1) / forward_updater.shape[1]
        avg_backward_updater = numpy.sum(backward_updater,
                                         axis=1) / backward_updater.shape[1]

        for_retend = []
        back_retend = []

        for_uptend = []
        back_uptend = []

        for i in range(0, avg_forward_rester.shape[0] - 1):
            for_retend.append(avg_forward_rester[i + 1] -
                              avg_forward_rester[i])

        for i in range(0, avg_backward_rester.shape[0] - 1):
            back_retend.append(avg_backward_rester[i] -
                               avg_backward_rester[i + 1])

        for_retend = numpy.array(for_retend)
        back_retend = numpy.array(back_retend)

        for i in range(0, avg_forward_updater.shape[0] - 1):
            for_uptend.append(avg_forward_updater[i + 1] -
                              avg_forward_updater[i])

        for i in range(0, avg_backward_updater.shape[0] - 1):
            back_uptend.append(avg_backward_updater[i] -
                               avg_backward_updater[i + 1])

        for_uptend = numpy.array(for_uptend)
        back_uptend = numpy.array(back_uptend)

        print(for_retend + back_retend) / 2.0
        print(for_uptend + back_uptend) / 2.0

        if getRep:
            return c

        # print "c shape is %s " % (str(c.shape))
        states = map(lambda x: x[None, :], self.comp_init_states(c))
        dim = states[0].shape[1]

        num_levels = len(states)

        fin_trans = []
        fin_costs = []
        fin_align = []

        trans = [[]]
        costs = [0.0]

        dec_rester = [[]] * n_samples
        dec_updater = [[]] * n_samples

        fin_dec_rester = []
        fin_dec_updater = []

        align = []
        for i in range(n_samples):
            align.append(numpy.array([numpy.zeros(len(seq))]))

        for k in range(3 * len(seq)):
            if n_samples == 0:
                break

            # Compute probabilities of the next words for
            # all the elements of the beam.
            beam_size = len(trans)
            last_words = (numpy.array(map(lambda t: t[-1], trans))
                          if k > 0 else numpy.zeros(beam_size, dtype="int64"))

            ans = self.comp_next_probs(c, k, last_words, *states)

            probs = ans[0]
            alignments = ans[1]
            log_probs = numpy.log(probs)

            trester = ans[2]
            tupdater = ans[3]

            trester = numpy.sum(trester, axis=1) / trester.shape[1]
            tupdater = numpy.sum(tupdater, axis=1) / tupdater.shape[1]

            #print "___________________"
            #print tupdater

            # Adjust log probs according to search restrictions
            if ignore_unk:
                log_probs[:, self.unk_id] = -numpy.inf
            # TODO: report me in the paper!!!
            if k < minlen:
                log_probs[:, self.eos_id] = -numpy.inf

            # Find the best options by calling argpartition of flatten array
            next_costs = numpy.array(costs)[:, None] - log_probs
            flat_next_costs = next_costs.flatten()
            best_costs_indices = argpartition(flat_next_costs.flatten(),
                                              n_samples)[:n_samples]

            # Decypher flatten indices
            voc_size = log_probs.shape[1]
            trans_indices = best_costs_indices / voc_size
            word_indices = best_costs_indices % voc_size
            costs = flat_next_costs[best_costs_indices]
            #print best_costs_indices

            # Form a beam for the next iteration

            new_rester = [[]] * n_samples
            new_updater = [[]] * n_samples
            new_align = [[]] * n_samples

            new_trans = [[]] * n_samples
            new_costs = numpy.zeros(n_samples)
            new_states = [
                numpy.zeros((n_samples, dim), dtype="float32")
                for level in range(num_levels)
            ]
            inputs = numpy.zeros(n_samples, dtype="int64")
            for i, (orig_idx, next_word, next_cost) in enumerate(
                    zip(trans_indices, word_indices, costs)):
                new_trans[i] = trans[orig_idx] + [next_word]
                new_costs[i] = next_cost

                #dec_rester[i] = dec_rester[i] + [trester[orig_idx]]
                #print orig_idx
                #align[i] = numpy.concatenate((align[i] , [alignments[:,orig_idx]]), axis=0)
                new_align[i] = numpy.concatenate(
                    (align[orig_idx], [alignments[:, orig_idx]]), axis=0)
                new_rester[i] = dec_rester[orig_idx] + [trester[orig_idx]]
                new_updater[i] = dec_updater[orig_idx] + [tupdater[orig_idx]]

                for level in range(num_levels):
                    new_states[level][i] = states[level][orig_idx]
                inputs[i] = next_word
            new_states = self.comp_next_states(c, k, inputs, *new_states)

            # Filter the sequences that end with end-of-sequence character
            trans = []
            costs = []
            indices = []
            align = []
            dec_rester = []
            dec_updater = []
            for i in range(n_samples):
                if new_trans[i][-1] != self.enc_dec.state['null_sym_target']:
                    trans.append(new_trans[i])
                    costs.append(new_costs[i])
                    align.append(new_align[i])
                    dec_rester.append(new_rester[i])
                    dec_updater.append(new_updater[i])
                    indices.append(i)
                else:
                    n_samples -= 1
                    fin_trans.append(new_trans[i])
                    fin_costs.append(new_costs[i])
                    fin_align.append(new_align[i])
                    fin_dec_rester.append(new_rester[i])
                    fin_dec_updater.append(new_updater[i])
            states = map(lambda x: x[indices], new_states)

        for i in range(len(fin_align)):
            talign = fin_align[i]
            fin_align[i] = talign[1:, :]

        #print fin_align

        # Dirty tricks to obtain any translation
        if not len(fin_trans):
            if ignore_unk:
                logger.warning("Did not manage without UNK")
                return self.search(seq, n_samples, False, minlen)
            elif n_samples < 500:
                logger.warning(
                    "Still no translations: try beam size {}".format(
                        n_samples * 2))
                return self.search(seq, n_samples * 2, False, minlen)
            else:
                logger.error("Translation failed")

        tfin_align = []
        index = numpy.argsort(fin_costs)

        for i in range(0, len(index)):
            tfin_align.append(fin_align[index[i]])

        fin_dec_rester = numpy.array(fin_dec_rester)[numpy.argsort(fin_costs)]
        fin_dec_updater = numpy.array(fin_dec_updater)[numpy.argsort(
            fin_costs)]

        fin_trans = numpy.array(fin_trans)[numpy.argsort(fin_costs)]
        fin_costs = numpy.array(sorted(fin_costs))
        return fin_trans, fin_costs, tfin_align, fin_dec_rester, fin_dec_updater
Beispiel #10
0
    def search(self, seq, n_samples, ignore_unk=False, minlen=1):
        c = self.comp_repr(seq)[0]
        states = map(lambda x: x[None, :], self.comp_init_states(c))
        dim = states[0].shape[1]
        # added by Zhaopeng Tu, 2015-11-02
        if self.enc_dec.state['maintain_coverage']:
            coverage_dim = self.enc_dec.state['coverage_dim']
            if self.enc_dec.state[
                    'use_linguistic_coverage'] and self.enc_dec.state[
                        'coverage_accumulated_operation'] == 'subtractive':
                coverages = numpy.ones((c.shape[0], 1, coverage_dim),
                                       dtype='float32')
            else:
                coverages = numpy.zeros((c.shape[0], 1, coverage_dim),
                                        dtype='float32')
            fin_coverages = []
        else:
            coverages = None

        if self.enc_dec.state['maintain_coverage'] and self.enc_dec.state[
                'use_linguistic_coverage'] and self.enc_dec.state[
                    'use_fertility_model']:
            fertility = self.comp_fert(c)
        else:
            fertility = None

        num_levels = len(states)

        fin_trans = []
        fin_costs = []
        fin_aligns = []

        trans = [[]]
        aligns = [[]]
        costs = [0.0]

        for k in range(3 * len(seq)):
            if n_samples == 0:
                break

            # Compute probabilities of the next words for
            # all the elements of the beam.
            beam_size = len(trans)
            last_words = (numpy.array(map(lambda t: t[-1], trans))
                          if k > 0 else numpy.zeros(beam_size, dtype="int64"))
            results = self.comp_next_probs(c,
                                           k,
                                           last_words,
                                           *states,
                                           coverage_before=coverages,
                                           fertility=fertility)
            log_probs = numpy.log(results[0])
            # alignment shape: (source_len, beam_size)
            alignment = results[1]

            # Adjust log probs according to search restrictions
            if ignore_unk:
                log_probs[:, self.unk_id] = -numpy.inf
            # TODO: report me in the paper!!!
            if k < minlen:
                log_probs[:, self.eos_id] = -numpy.inf

            # Find the best options by calling argpartition of flatten array
            next_costs = numpy.array(costs)[:, None] - log_probs
            flat_next_costs = next_costs.flatten()
            best_costs_indices = argpartition(flat_next_costs.flatten(),
                                              n_samples)[:n_samples]

            # Decypher flatten indices
            voc_size = log_probs.shape[1]
            trans_indices = best_costs_indices / voc_size
            word_indices = best_costs_indices % voc_size
            costs = flat_next_costs[best_costs_indices]

            # Form a beam for the next iteration
            new_trans = [[]] * n_samples

            new_aligns = [[]] * n_samples
            new_costs = numpy.zeros(n_samples)
            new_states = [
                numpy.zeros((n_samples, dim), dtype="float32")
                for level in range(num_levels)
            ]
            inputs = numpy.zeros(n_samples, dtype="int64")
            if self.enc_dec.state['maintain_coverage']:
                new_coverages = numpy.zeros(
                    (c.shape[0], n_samples, coverage_dim), dtype='float32')
            else:
                new_coverages = None
            for i, (orig_idx, next_word, next_cost) in enumerate(
                    zip(trans_indices, word_indices, costs)):
                new_trans[i] = trans[orig_idx] + [next_word]
                # alignment shape: (source_len, beam_size)
                new_aligns[i] = aligns[orig_idx] + [alignment[:, orig_idx]]
                new_costs[i] = next_cost
                for level in range(num_levels):
                    new_states[level][i] = states[level][orig_idx]
                inputs[i] = next_word
                if self.enc_dec.state['maintain_coverage']:
                    new_coverages[:, i, :] = coverages[:, orig_idx, :]
            new_states = self.comp_next_states(c,
                                               k,
                                               inputs,
                                               *new_states,
                                               coverage_before=new_coverages,
                                               fertility=fertility)
            if self.enc_dec.state['maintain_coverage']:
                new_coverages = new_states[-1]
                new_states = new_states[:-1]

            # Filter the sequences that end with end-of-sequence character
            trans = []
            aligns = []
            costs = []
            indices = []
            for i in range(n_samples):
                if new_trans[i][-1] != self.enc_dec.state['null_sym_target']:
                    trans.append(new_trans[i])
                    aligns.append(new_aligns[i])
                    costs.append(new_costs[i])
                    indices.append(i)
                else:
                    n_samples -= 1
                    fin_trans.append(new_trans[i])
                    fin_aligns.append(new_aligns[i])
                    fin_costs.append(new_costs[i])
                    if self.enc_dec.state['maintain_coverage']:
                        fin_coverages.append(new_coverages[:, i, 0])
            states = map(lambda x: x[indices], new_states)

            if self.enc_dec.state['maintain_coverage']:
                coverages = numpy.zeros((c.shape[0], n_samples, coverage_dim),
                                        dtype='float32')
                for i in xrange(n_samples):
                    coverages[:, i, :] = new_coverages[:, indices[i], :]

        # Dirty tricks to obtain any translation
        if not len(fin_trans):
            if ignore_unk:
                logger.warning("Did not manage without UNK")
                return self.search(seq, n_samples, False, minlen)
            elif n_samples < 100:
                logger.warning(
                    "Still no translations: try beam size {}".format(
                        n_samples * 2))
                return self.search(seq, n_samples * 2, False, minlen)
            else:
                fin_trans = trans
                fin_aligns = aligns
                fin_costs = costs
                if self.enc_dec.state['maintain_coverage']:
                    fin_coverages = coverages[:, :, 0].transpose().tolist()
                logger.error("Translation failed")

        fin_trans = numpy.array(fin_trans)[numpy.argsort(fin_costs)]
        fin_aligns = numpy.array(fin_aligns)[numpy.argsort(fin_costs)]
        if self.enc_dec.state['maintain_coverage']:
            fin_coverages = numpy.array(fin_coverages)[numpy.argsort(
                fin_costs)]
        fin_costs = numpy.array(sorted(fin_costs))

        if self.enc_dec.state['maintain_coverage']:
            if self.enc_dec.state[
                    'use_linguistic_coverage'] and self.enc_dec.state[
                        'use_fertility_model']:
                return fin_trans, fin_aligns, fin_costs, fin_coverages, fertility
            else:
                return fin_trans, fin_aligns, fin_costs, fin_coverages
        else:
            return fin_trans, fin_aligns, fin_costs
Beispiel #11
0
    def search(self, seq, n_samples, ignore_unk=False, minlen=1):
        c = self.comp_repr(seq)[0]
        states = map(lambda x : x[None, :], self.comp_init_states(c))
        dim = states[0].shape[1]
        # added by Zhaopeng Tu, 2015-11-02
        if self.enc_dec.state['maintain_coverage']:
            coverage_dim = self.enc_dec.state['coverage_dim']
            if self.enc_dec.state['use_linguistic_coverage'] and self.enc_dec.state['coverage_accumulated_operation'] == 'subtractive':
                coverages = numpy.ones((c.shape[0], 1, coverage_dim), dtype='float32')
            else:
                coverages = numpy.zeros((c.shape[0], 1, coverage_dim), dtype='float32')
            fin_coverages = []
        else:
            coverages = None
        
        if self.enc_dec.state['maintain_coverage'] and self.enc_dec.state['use_linguistic_coverage'] and self.enc_dec.state['use_fertility_model']:
            fertility = self.comp_fert(c)
        else:
            fertility = None

        num_levels = len(states)

        fin_trans = []
        fin_costs = []
        fin_aligns = []

        trans = [[]]
        aligns = [[]]
        costs = [0.0]

        for k in range(3 * len(seq)):
            if n_samples == 0:
                break

            # Compute probabilities of the next words for
            # all the elements of the beam.
            beam_size = len(trans)
            last_words = (numpy.array(map(lambda t : t[-1], trans))
                    if k > 0
                    else numpy.zeros(beam_size, dtype="int64"))
            results = self.comp_next_probs(c, k, last_words, *states, coverage_before=coverages, fertility=fertility)
            log_probs = numpy.log(results[0])
            # alignment shape: (source_len, beam_size)
            alignment = results[1]

            # Adjust log probs according to search restrictions
            if ignore_unk:
                log_probs[:,self.unk_id] = -numpy.inf
            # TODO: report me in the paper!!!
            if k < minlen:
                log_probs[:,self.eos_id] = -numpy.inf

            # Find the best options by calling argpartition of flatten array
            next_costs = numpy.array(costs)[:, None] - log_probs
            flat_next_costs = next_costs.flatten()
            best_costs_indices = argpartition(
                    flat_next_costs.flatten(),
                    n_samples)[:n_samples]

            # Decypher flatten indices
            voc_size = log_probs.shape[1]
            trans_indices = best_costs_indices / voc_size
            word_indices = best_costs_indices % voc_size
            costs = flat_next_costs[best_costs_indices]

            # Form a beam for the next iteration
            new_trans = [[]] * n_samples

            new_aligns = [[]] * n_samples
            new_costs = numpy.zeros(n_samples)
            new_states = [numpy.zeros((n_samples, dim), dtype="float32") for level
                    in range(num_levels)]
            inputs = numpy.zeros(n_samples, dtype="int64")
            if self.enc_dec.state['maintain_coverage']:
                new_coverages = numpy.zeros((c.shape[0], n_samples, coverage_dim), dtype='float32')
            else:
                new_coverages = None
            for i, (orig_idx, next_word, next_cost) in enumerate(
                    zip(trans_indices, word_indices, costs)):
                new_trans[i] = trans[orig_idx] + [next_word]
                # alignment shape: (source_len, beam_size)
                new_aligns[i] = aligns[orig_idx] + [alignment[:,orig_idx]]
                new_costs[i] = next_cost
                for level in range(num_levels):
                    new_states[level][i] = states[level][orig_idx]
                inputs[i] = next_word
                if self.enc_dec.state['maintain_coverage']:
                    new_coverages[:,i,:] = coverages[:,orig_idx,:]
            new_states = self.comp_next_states(c, k, inputs, *new_states, coverage_before=new_coverages, fertility=fertility)
            if self.enc_dec.state['maintain_coverage']:
                new_coverages = new_states[-1]
                new_states = new_states[:-1]

            # Filter the sequences that end with end-of-sequence character
            trans = []
            aligns = []
            costs = []
            indices = []
            for i in range(n_samples):
                if new_trans[i][-1] != self.enc_dec.state['null_sym_target']:
                    trans.append(new_trans[i])
                    aligns.append(new_aligns[i])
                    costs.append(new_costs[i])
                    indices.append(i)
                else:
                    n_samples -= 1
                    fin_trans.append(new_trans[i])
                    fin_aligns.append(new_aligns[i])
                    fin_costs.append(new_costs[i])
                    if self.enc_dec.state['maintain_coverage']:
                        fin_coverages.append(new_coverages[:,i,0])
            states = map(lambda x : x[indices], new_states)

            if self.enc_dec.state['maintain_coverage']:
                coverages = numpy.zeros((c.shape[0], n_samples, coverage_dim), dtype='float32')
                for i in xrange(n_samples):
                    coverages[:,i,:] = new_coverages[:, indices[i], :]

        # Dirty tricks to obtain any translation
        if not len(fin_trans):
            if ignore_unk:
                logger.warning("Did not manage without UNK")
                return self.search(seq, n_samples, False, minlen)
            elif n_samples < 100:
                logger.warning("Still no translations: try beam size {}".format(n_samples * 2))
                return self.search(seq, n_samples * 2, False, minlen)
            else:
                fin_trans = trans
                fin_aligns = aligns
                fin_costs = costs
                if self.enc_dec.state['maintain_coverage']:
                    fin_coverages = coverages[:,:,0].transpose().tolist()
                logger.error("Translation failed")

        fin_trans = numpy.array(fin_trans)[numpy.argsort(fin_costs)]
        fin_aligns = numpy.array(fin_aligns)[numpy.argsort(fin_costs)]
        if self.enc_dec.state['maintain_coverage']:
            fin_coverages = numpy.array(fin_coverages)[numpy.argsort(fin_costs)]
        fin_costs = numpy.array(sorted(fin_costs))

        if self.enc_dec.state['maintain_coverage']:
            if self.enc_dec.state['use_linguistic_coverage'] and self.enc_dec.state['use_fertility_model']:
                return fin_trans, fin_aligns, fin_costs, fin_coverages, fertility
            else:
                return fin_trans, fin_aligns, fin_costs, fin_coverages
        else:
            return fin_trans, fin_aligns, fin_costs
Beispiel #12
0
    def search(self, sen, seq, n_samples, ignore_unk=False, minlen=1):
        src_seq = sen.split(' ')
        uni_trans_set = self.get_uni_trans(src_seq)

        #print >> sys.stderr, uni_trans_set

        c = self.comp_repr(seq)[0]
        states = map(lambda x: x[None, :], self.comp_init_states(c))
        dim = states[0].shape[1]

        num_levels = len(states)

        fin_trans = []
        fin_costs = []
        fin_str_trans = []
        #fin_infos = []

        fin_aligns = []
        fin_lm_costs = []
        fin_tm_costs = []
        fin_rnn_costs = []
        fin_unk_nums = []

        trans = [[]]
        costs = [0.0]
        str_trans = [[]]
        #infos = [[]]

        lm_costs = [[]]
        tm_costs = [[]]
        rnn_costs = [[]]
        unk_nums = [[]]
        aligns = [[]]

        for k in range(3 * len(seq)):
            if n_samples == 0:
                break

            # Compute probabilities of the next words for
            # all the elements of the beam.
            beam_size = len(trans)
            last_words = (numpy.array(map(lambda t: t[-1], trans))
                          if k > 0 else numpy.zeros(beam_size, dtype="int64"))

            next_probs, aln_score_mat = self.comp_next_probs(
                c, k, last_words, *states)
            log_probs = numpy.log(next_probs)

            # Adjust log probs according to search restrictions
            if ignore_unk:
                log_probs[:, self.unk_id] = -numpy.inf
            if k < minlen:
                log_probs[:, self.eos_id] = -numpy.inf

            # Find the best options by calling argpartition of flatten array

            next_costs = numpy.array(costs)[:,
                                            None] - log_probs * self.weight_rnn

            flat_next_costs = next_costs.flatten()
            cands_costs_indices = argpartition(
                flat_next_costs.flatten(), n_samples * 100)[:n_samples * 100]

            # Decypher flatten indices
            voc_size = log_probs.shape[1]
            cands_trans_indices = cands_costs_indices / voc_size
            cands_word_indices = cands_costs_indices % voc_size
            cands_costs = flat_next_costs[cands_costs_indices]
            cands_lm_costs = numpy.zeros(len(cands_costs))
            cands_tm_costs = numpy.zeros(len(cands_costs))
            cands_unk_nums = numpy.zeros(len(cands_costs))
            cands_rnn_costs = (-1 * log_probs).flatten()[cands_costs_indices]

            unk_trans = {}
            #add SMT feature scores to costs
            for i, (orig_idx, next_word, next_cost) in enumerate(
                    zip(cands_trans_indices, cands_word_indices, cands_costs)):

                sorted_aln_idx = numpy.argsort(-aln_score_mat[:, orig_idx])[:3]
                aln_score_array = []
                for aln_idx in sorted_aln_idx:
                    aln_score_array.append(
                        [aln_idx, aln_score_mat[aln_idx, orig_idx]])

                lm_score = -self.get_lm_score(str_trans[orig_idx] +
                                              self.trg_i2w([next_word]))
                #tm_score, unk_tm_num = self.get_tm_score(src_seq, aln_score_mat[:,orig_idx], self.trg_i2w([next_word])[0])
                tm_score, unk_tm_num, _ = self.get_tm_score_new(
                    src_seq, aln_score_array,
                    self.trg_i2w([next_word])[0])
                tm_score = -tm_score

                if next_word == self.unk_id:
                    #lm_score = numpy.inf
                    #tm_score = numpy.inf
                    unk_trans[orig_idx] = 'UNK'
                    _unk_score = tm_score

                    for t in uni_trans_set:
                        _ls = -self.get_lm_score(str_trans[orig_idx] + [t])
                        #_ts, _unk_tm_num = self.get_tm_score(src_seq, aln_score_mat[:,orig_idx], t)
                        _ts, _unk_tm_num, match_idx = self.get_tm_score_new(
                            src_seq, aln_score_array, t)
                        _ts = -_ts
                        if match_idx == 0 and _ls * self.weight_lm + _ts * self.weight_tm < lm_score * self.weight_lm + tm_score * self.weight_tm:
                            lm_score = _ls
                            tm_score = _ts
                            unk_tm_num = _unk_tm_num
                            unk_trans[orig_idx] = t

                #cands_rnn_costs[i] = cands_costs[i]
                cands_costs[
                    i] += lm_score * self.weight_lm + tm_score * self.weight_tm + self.weight_wp
                cands_lm_costs[i] = lm_score
                cands_tm_costs[i] = tm_score
                cands_unk_nums[i] = unk_tm_num

            best_costs_indices = argpartition(cands_costs.flatten(),
                                              n_samples)[:n_samples]
            trans_indices = cands_trans_indices[best_costs_indices]
            word_indices = cands_word_indices[best_costs_indices]
            costs = cands_costs[best_costs_indices]
            _lm_costs = cands_lm_costs[best_costs_indices]
            _tm_costs = cands_tm_costs[best_costs_indices]
            _unk_nums = cands_unk_nums[best_costs_indices]
            _rnn_costs = cands_rnn_costs[best_costs_indices]

            # Form a beam for the next iteration
            new_trans = [[]] * n_samples
            new_str_trans = [[]] * n_samples
            new_costs = numpy.zeros(n_samples)
            new_lm_costs = [[]] * n_samples
            new_tm_costs = [[]] * n_samples
            new_rnn_costs = [[]] * n_samples
            new_unk_nums = [[]] * n_samples
            new_aligns = [[]] * n_samples
            new_states = [
                numpy.zeros((n_samples, dim), dtype="float32")
                for level in range(num_levels)
            ]
            inputs = numpy.zeros(n_samples, dtype="int64")
            for i, (orig_idx, next_word, next_cost, _lm_score, _tm_score,
                    _unk_num, _rnn_cost) in enumerate(
                        zip(trans_indices, word_indices, costs, _lm_costs,
                            _tm_costs, _unk_nums, _rnn_costs)):
                new_trans[i] = trans[orig_idx] + [next_word]
                if next_word == self.unk_id:
                    new_str_trans[i] = str_trans[orig_idx] + [
                        unk_trans[orig_idx]
                    ]
                else:
                    new_str_trans[i] = str_trans[orig_idx] + self.trg_i2w(
                        [next_word])
                new_costs[i] = next_cost
                new_lm_costs[i] = lm_costs[orig_idx] + [_lm_score]
                new_tm_costs[i] = tm_costs[orig_idx] + [_tm_score]
                new_unk_nums[i] = unk_nums[orig_idx] + [_unk_num]
                new_rnn_costs[i] = rnn_costs[orig_idx] + [_rnn_cost]
                new_aligns[i] = aligns[orig_idx] + [aln_score_mat[:, orig_idx]]
                for level in range(num_levels):
                    new_states[level][i] = states[level][orig_idx]
                inputs[i] = next_word
            new_states = self.comp_next_states(c, k, inputs, *new_states)

            # Filter the sequences that end with end-of-sequence character
            trans = []
            costs = []
            str_trans = []
            #infos = []

            lm_costs = []
            tm_costs = []
            unk_nums = []
            rnn_costs = []
            aligns = []
            indices = []
            for i in range(n_samples):
                if new_trans[i][-1] != self.enc_dec.state['null_sym_target']:
                    trans.append(new_trans[i])
                    costs.append(new_costs[i])
                    str_trans.append(new_str_trans[i])
                    #infos.append(new_infos[i])
                    lm_costs.append(new_lm_costs[i])
                    tm_costs.append(new_tm_costs[i])
                    rnn_costs.append(new_rnn_costs[i])
                    unk_nums.append(new_unk_nums[i])
                    aligns.append(new_aligns[i])
                    indices.append(i)
                else:
                    n_samples -= 1
                    fin_trans.append(new_trans[i])
                    fin_costs.append(new_costs[i])
                    fin_str_trans.append(new_str_trans[i])
                    #fin_infos.append(new_infos[i])
                    fin_lm_costs.append(new_lm_costs[i])
                    fin_tm_costs.append(new_tm_costs[i])
                    fin_rnn_costs.append(new_rnn_costs[i])
                    fin_unk_nums.append(new_unk_nums[i])
                    fin_aligns.append(new_aligns[i])

            states = map(lambda x: x[indices], new_states)

        if not len(fin_trans):
            if ignore_unk:
                logger.warning("Did not manage without UNK")
                return self.search(sen, seq, n_samples, False, minlen)
            elif n_samples < 50:
                logger.warning(
                    "Still no translations: try beam size {}".format(
                        n_samples * 2))
                return self.search(sen, seq, n_samples * 2, False, minlen)
            elif n_samples < 100:
                logger.warning(
                    "Still no translations: try beam size {}, and --ignore UNK"
                    .format(n_samples))
                return self.search(sen, seq, n_samples, True, minlen)
            else:
                logger.err(
                    "cannot find translations, return an unreliable result")
                fin_trans = trans
                fin_str_trans = str_trans
                fin_costs = costs
                fin_lm_costs = lm_costs
                fin_tm_costs = tm_costs
                fin_rnn_costs = rnn_costs
                fin_unk_nums = unk_nums
                fin_aligns = aligns

            #else:
            #    logger.error("Translation failed")

        fin_trans = numpy.array(fin_trans)[numpy.argsort(fin_costs)]
        fin_aligns = numpy.array(fin_aligns)[numpy.argsort(fin_costs)]
        fin_str_trans = numpy.array(fin_str_trans)[numpy.argsort(fin_costs)]
        fin_tm_costs = numpy.array(fin_tm_costs)[numpy.argsort(fin_costs)]
        fin_lm_costs = numpy.array(fin_lm_costs)[numpy.argsort(fin_costs)]
        fin_unk_nums = numpy.array(fin_unk_nums)[numpy.argsort(fin_costs)]
        fin_rnn_costs = numpy.array(fin_rnn_costs)[numpy.argsort(fin_costs)]
        fin_costs = numpy.array(sorted(fin_costs))

        return fin_trans, fin_costs, fin_aligns, fin_lm_costs, fin_tm_costs, fin_str_trans, fin_unk_nums, fin_rnn_costs
Beispiel #13
0
    def search(self,
               seq,
               n_samples,
               ignore_unk=False,
               minlen=1,
               compute_alignment=False,
               have_source=False):

        x = []
        last_split = -1
        for i in xrange(len(seq)):
            if seq[i] == self.split_id:
                tmp = copy.deepcopy(seq[last_split + 1:i + 1])
                x.append(tmp)
                last_split = i
        assert self.num_systems == len(x)
        for i in xrange(self.num_systems):
            x[i][-1] = self.source_eos_id
        c = self.comp_repr(*x)  #[0]
        '''
        print len(c)
        for i in c:
            print i.shape
        '''
        #print self.get_sample(1,5,1,*x)
        states = map(lambda x: x[None, :], self.comp_init_states(*c))
        #c = numpy.concatenate(c, axis=0)
        dim = states[0].shape[1]

        num_levels = len(states)

        fin_trans = []
        fin_costs = []

        trans = [[]]
        costs = [0.0]

        if have_source:
            minlen = (len(x[0]) - 1) / 2
            #print minlen
        else:
            minlen = (len(seq) - self.num_systems) / self.num_systems / 2

        if compute_alignment:
            fin_aligns = []
            aligns = [[]]

        for k in range(6 * minlen):
            if n_samples == 0:
                break

            # Compute probabilities of the next words for
            # all the elements of the beam.
            beam_size = len(trans)
            last_words = (numpy.array(map(lambda t: t[-1], trans))
                          if k > 0 else numpy.zeros(beam_size, dtype="int64"))

            if compute_alignment:
                align = self.comp_align(k, last_words, *(states + c))
            log_probs = numpy.log(
                self.comp_next_probs(k, last_words, *(states + c))[0])

            #print log_probs

            # Adjust log probs according to search restrictions
            if ignore_unk:
                log_probs[:, self.unk_id] = -numpy.inf
            # TODO: report me in the paper!!!
            if k < minlen:
                log_probs[:, self.eos_id] = -numpy.inf

            # Find the best options by calling argpartition of flatten array
            next_costs = numpy.array(costs)[:, None] - log_probs
            flat_next_costs = next_costs.flatten()
            best_costs_indices = argpartition(flat_next_costs.flatten(),
                                              n_samples)[:n_samples]

            #print best_costs_indices

            # Decypher flatten indices
            voc_size = log_probs.shape[1]
            trans_indices = best_costs_indices / voc_size
            word_indices = best_costs_indices % voc_size
            costs = flat_next_costs[best_costs_indices]

            #print trans_indices
            #print word_indices

            # Form a beam for the next iteration
            new_trans = [[]] * n_samples
            new_costs = numpy.zeros(n_samples)
            new_states = [
                numpy.zeros((n_samples, dim), dtype="float32")
                for level in range(num_levels)
            ]
            if compute_alignment:
                new_aligns = [[]] * n_samples
            inputs = numpy.zeros(n_samples, dtype="int64")
            for i, (orig_idx, next_word, next_cost) in enumerate(
                    zip(trans_indices, word_indices, costs)):
                new_trans[i] = trans[orig_idx] + [next_word]
                new_costs[i] = next_cost
                if compute_alignment:
                    new_aligns[i] = aligns[orig_idx] + [align[:, orig_idx]]
                for level in range(num_levels):
                    new_states[level][i] = states[level][orig_idx]
                inputs[i] = next_word
            new_states = self.comp_next_states(k, inputs, *(new_states + c))

            # Filter the sequences that end with end-of-sequence character
            trans = []
            costs = []
            aligns = []
            indices = []
            for i in range(n_samples):
                if new_trans[i][-1] != self.enc_dec.state['null_sym_target']:
                    trans.append(new_trans[i])
                    costs.append(new_costs[i])
                    if compute_alignment:
                        aligns.append(new_aligns[i])
                    indices.append(i)
                else:
                    n_samples -= 1
                    fin_trans.append(new_trans[i])
                    fin_costs.append(new_costs[i])
                    if compute_alignment:
                        fin_aligns.append(new_aligns[i])
            states = map(lambda x: x[indices], new_states)

        # Dirty tricks to obtain any translation
        if not len(fin_trans):
            if ignore_unk:
                logger.warning("Did not manage without UNK")
                return self.search(seq, n_samples, False, minlen)
            elif n_samples < 100:
                logger.warning(
                    "Still no translations: try beam size {}".format(
                        n_samples * 2))
                return self.search(seq, n_samples * 2, False, minlen)
            else:
                logger.error("Translation failed")

        fin_trans = numpy.array(fin_trans)[numpy.argsort(fin_costs)]
        if compute_alignment:
            fin_aligns = numpy.array(fin_aligns)[numpy.argsort(fin_costs)]
        fin_costs = numpy.array(sorted(fin_costs))
        if compute_alignment:
            return fin_trans, fin_aligns, fin_costs
        else:
            return fin_trans, fin_costs
Beispiel #14
0
    def search(self, seqin, seq, n_samples, ignore_unk=False, minlen=1):
        src_seq = seqin.split(' ')
        c = self.comp_repr(seq)[0]
        states = map(lambda x: x[None, :], self.comp_init_states(c))
        dim = states[0].shape[1]

        num_levels = len(states)

        fin_trans = []
        fin_costs = []
        fin_aligns = []

        trans = [[]]
        costs = [0.0]
        aligns = [[]]

        for k in range(3 * len(seq)):
            if n_samples == 0:
                break

            # Compute probabilities of the next words for
            # all the elements of the beam.
            beam_size = len(trans)
            last_words = (numpy.array(map(lambda t: t[-1], trans))
                          if k > 0 else numpy.zeros(beam_size, dtype="int64"))

            next_probs, aln_score_mat = self.comp_next_probs(
                c, k, last_words, *states)
            log_probs = numpy.log(next_probs)

            # Adjust log probs according to search restrictions
            if ignore_unk:
                log_probs[:, self.unk_id] = -numpy.inf

            if k < minlen:
                log_probs[:, self.eos_id] = -numpy.inf

            # Find the best options by calling argpartition of flatten array
            next_costs = numpy.array(costs)[:,
                                            None] - log_probs * self.rnn_weight

            flat_next_costs = next_costs.flatten()
            cands_costs_indices = argpartition(
                flat_next_costs.flatten(), n_samples * 100)[:n_samples * 100]

            # Decypher flatten indices
            voc_size = log_probs.shape[1]
            cands_trans_indices = cands_costs_indices / voc_size
            cands_word_indices = cands_costs_indices % voc_size
            cands_costs = flat_next_costs[cands_costs_indices]

            #add SMT feature scores to costs
            for i, (orig_idx, next_word, next_cost) in enumerate(
                    zip(cands_trans_indices, cands_word_indices, cands_costs)):
                lm_score = self.get_lm_score(trans[orig_idx] + [next_word])
                tm_score = self.get_tm_score(src_seq, aln_score_mat[:,
                                                                    orig_idx],
                                             self.trg_i2w([next_word])[0])
                cands_costs[i] += -1.0 * lm_score + -1.0 * tm_score

            best_costs_indices = argpartition(cands_costs.flatten(),
                                              n_samples)[:n_samples]
            trans_indices = cands_trans_indices[best_costs_indices]
            word_indices = cands_word_indices[best_costs_indices]
            costs = cands_costs[best_costs_indices]

            # Form a beam for the next iteration
            new_trans = [[]] * n_samples
            new_costs = numpy.zeros(n_samples)
            new_aligns = [[]] * n_samples
            new_states = [
                numpy.zeros((n_samples, dim), dtype="float32")
                for level in range(num_levels)
            ]
            inputs = numpy.zeros(n_samples, dtype="int64")
            for i, (orig_idx, next_word, next_cost) in enumerate(
                    zip(trans_indices, word_indices, costs)):
                new_trans[i] = trans[orig_idx] + [next_word]
                new_costs[i] = next_cost
                new_aligns[i] = aligns[orig_idx] + [aln_score_mat[:, orig_idx]]
                for level in range(num_levels):
                    new_states[level][i] = states[level][orig_idx]
                inputs[i] = next_word
            new_states = self.comp_next_states(c, k, inputs, *new_states)

            # Filter the sequences that end with end-of-sequence character
            trans = []
            costs = []
            aligns = []
            indices = []
            for i in range(n_samples):
                if new_trans[i][-1] != self.enc_dec.state['null_sym_target']:
                    trans.append(new_trans[i])
                    costs.append(new_costs[i])
                    aligns.append(new_aligns[i])
                    indices.append(i)
                else:
                    n_samples -= 1
                    fin_trans.append(new_trans[i])
                    fin_costs.append(new_costs[i])
                    fin_aligns.append(new_aligns[i])
            states = map(lambda x: x[indices], new_states)

        if not len(fin_trans):
            if ignore_unk:
                logger.warning("Did not manage without UNK")
                return self.search(seqin, seq, n_samples, False, minlen)
            elif n_samples < 100:
                logger.warning(
                    "Still no translations: try beam size {}".format(
                        n_samples * 2))
                return self.search(seqin, seq, n_samples * 2, False, minlen)
            else:
                logger.error("Translation failed")

        fin_trans = numpy.array(fin_trans)[numpy.argsort(fin_costs)]
        fin_aligns = numpy.array(fin_aligns)[numpy.argsort(fin_costs)]
        fin_costs = numpy.array(sorted(fin_costs))

        return fin_trans, fin_costs, fin_aligns