コード例 #1
0
ファイル: search.py プロジェクト: npow/hed-dlg
    def search(self, context, beam_size=1, ignore_unk=False, \
               min_length=1, max_length=100, normalize_by_length=True, verbose=False):
        if not self.compiled:
            self.compile()

        # Convert to column vector
        context = numpy.array(context, dtype='int32')[:, None]
        prev_hd = numpy.zeros((beam_size, self.qdim), dtype='float32')
        prev_hs = numpy.zeros((beam_size, self.sdim), dtype='float32')
         
        # Compute the context encoding and get
        # the last hierarchical state
        h, hs = self.compute_encoding(context)
        prev_hs[:] = hs[-1]
         
        fin_beam_gen = []
        fin_beam_costs = []
         
        beam_gen = [[] for i in range(beam_size)] 
        costs = [0.0 for i in range(beam_size)]

        for k in range(max_length):
            if len(fin_beam_gen) >= beam_size:
                break
             
            if verbose:
                logger.info("Beam search at step %d" % k)
             
            prev_words = (numpy.array(map(lambda bg : bg[-1], beam_gen))
                    if k > 0
                    else numpy.zeros(beam_size, dtype="int32") + self.eos_sym)

            outputs, hd = self.next_probs_predictor(prev_hs, prev_words, prev_hd)
            log_probs = numpy.log(outputs)
             
            # Adjust log probs according to search restrictions
            if ignore_unk:
                log_probs[:, self.unk_sym] = -numpy.inf 
            if k <= min_length:
                log_probs[:, self.eos_sym] = -numpy.inf

            next_costs = numpy.array(costs)[:, None] - log_probs
            
            # Pick only on the first line (for the beginning of sampling)
            # This will avoid duplicate <s> token.
            if k == 0:
                flat_next_costs = next_costs[:1, :].flatten()
            else:
                # Set the next cost to infinite for finished sentences (they will be replaced)
                # by other sentences in the beam
                indices = [i for i, bg in enumerate(beam_gen) if bg[-1] == self.eos_sym]
                next_costs[indices, :] = numpy.inf 
                flat_next_costs = next_costs.flatten()
             
            best_costs_indices = argpartition(
                    flat_next_costs.flatten(),
                    beam_size)[:beam_size]            
             

            # Decypher flatten indices
            voc_size = log_probs.shape[1]
            trans_indices = best_costs_indices / voc_size
            word_indices = best_costs_indices % voc_size 
            costs = flat_next_costs[best_costs_indices]
             
            new_beam_gen = [[] for i in range(beam_size)] 
            new_costs = numpy.zeros(beam_size)
            new_prev_hd = numpy.zeros((beam_size, self.qdim), dtype="float32")
            
            for i, (orig_idx, next_word, next_cost) in enumerate(
                        zip(trans_indices, word_indices, costs)):
                new_beam_gen[i] = beam_gen[orig_idx] + [next_word]
                new_costs[i] = next_cost
                new_prev_hd[i] = hd[orig_idx]
            
            # Save the previous hidden states
            prev_hd = new_prev_hd
            beam_gen = new_beam_gen 
            costs = new_costs 

            for i in range(beam_size):
                # We finished sampling?
                if beam_gen[i][-1] == self.eos_sym:
                    if verbose:
                        logger.debug("Adding sentence {} from beam {}".format(new_beam_gen[i], i))
                     
                    # Add without start and end-of-sentence
                    fin_beam_gen.append(beam_gen[i]) 
                    if normalize_by_length:
                        costs[i] /= len(beam_gen[i])
                    fin_beam_costs.append(costs[i])
        
        # If we have not sampled anything
        # then force include stuff
        if len(fin_beam_gen) == 0:
            fin_beam_gen = beam_gen
            if normalize_by_length:
                costs = [costs[i]/len(beam_gen[i]) for i in range(len(beam_gen))]
            fin_beam_costs = costs 
            

        # Here we could have more than beam_size samples.
        # This is because we allow to sample beam_size terms
        # even if one sentence in the beam has been terminated </s>
        fin_beam_gen = numpy.array(fin_beam_gen)[numpy.argsort(fin_beam_costs)]
        fin_beam_costs = numpy.array(sorted(fin_beam_costs))
        return fin_beam_gen[:beam_size], fin_beam_costs[:beam_size]
コード例 #2
0
    def search(self, context, beam_size=1, ignore_unk=False, \
               min_length=1, max_length=100, normalize_by_length=True, verbose=False):
        if not self.compiled:
            self.compile()

        # Convert to column vector
        context = numpy.array(context, dtype='int32')[:, None]
        prev_hd = numpy.zeros((beam_size, self.qdim), dtype='float32')
        prev_hs = numpy.zeros((beam_size, self.sdim), dtype='float32')

        # Compute the context encoding and get
        # the last hierarchical state
        h, hs = self.compute_encoding(context)
        prev_hs[:] = hs[-1]

        fin_beam_gen = []
        fin_beam_costs = []
        fin_beam_ranks = []

        beam_gen = [[] for i in range(beam_size)]
        costs = [0.0 for i in range(beam_size)]

        for k in range(max_length):
            if len(fin_beam_gen) >= beam_size:
                break

            if verbose:
                logger.info("Beam search at step %d" % k)

            prev_words = (numpy.array(map(lambda bg: bg[-1], beam_gen))
                          if k > 0 else numpy.zeros(beam_size, dtype="int32") +
                          self.eoq_sym)

            outputs, hd = self.next_probs_predictor(prev_hs, prev_words,
                                                    prev_hd)
            log_probs = numpy.log(outputs)

            # Adjust log probs according to search restrictions
            if ignore_unk:
                log_probs[:, self.unk_sym] = -numpy.inf
            if k <= min_length:
                log_probs[:, self.eoq_sym] = -numpy.inf

            next_costs = numpy.array(costs)[:, None] - log_probs

            # Pick only on the first line (for the beginning of sampling)
            # This will avoid duplicate <s> token.
            if k == 0:
                flat_next_costs = next_costs[:1, :].flatten()
            else:
                # Set the next cost to infinite for finished sentences (they will be replaced)
                # by other sentences in the beam
                indices = [
                    i for i, bg in enumerate(beam_gen)
                    if bg[-1] == self.eoq_sym
                ]
                next_costs[indices, :] = numpy.inf
                flat_next_costs = next_costs.flatten()

            best_costs_indices = argpartition(flat_next_costs.flatten(),
                                              beam_size)[:beam_size]

            # Decypher flatten indices
            voc_size = log_probs.shape[1]
            trans_indices = best_costs_indices / voc_size
            word_indices = best_costs_indices % voc_size
            costs = flat_next_costs[best_costs_indices]

            new_beam_gen = [[] for i in range(beam_size)]
            new_costs = numpy.zeros(beam_size)
            new_prev_hd = numpy.zeros((beam_size, self.qdim), dtype="float32")

            for i, (orig_idx, next_word, next_cost) in enumerate(
                    zip(trans_indices, word_indices, costs)):
                new_beam_gen[i] = beam_gen[orig_idx] + [next_word]
                new_costs[i] = next_cost
                new_prev_hd[i] = hd[orig_idx]

            # Save the previous hidden states
            prev_hd = new_prev_hd
            beam_gen = new_beam_gen
            costs = new_costs

            for i in range(beam_size):
                # We finished sampling?
                if beam_gen[i][-1] == self.eoq_sym:
                    if verbose:
                        logger.debug("Adding sentence {} from beam {}".format(
                            new_beam_gen[i], i))

                    new_session = numpy.vstack([
                        context,
                        numpy.array(new_beam_gen[i], dtype='int32')[:, None]
                    ])
                    ranks = self.rank_prediction(new_session, len(new_session))

                    fin_beam_ranks.append(numpy.ravel(ranks)[-1])
                    fin_beam_gen.append(beam_gen[i])
                    if normalize_by_length:
                        fin_beam_costs.append(costs[i] / len(beam_gen[i]))

        # If we have not sampled anything
        # then force include stuff
        if len(fin_beam_gen) == 0:
            fin_beam_gen = beam_gen
            fin_beam_ranks = [0]
            fin_beam_costs = [
                costs[i] / len(beam_gen[i]) for i in range(len(costs))
            ]

        # Here we could have more than beam_size samples.
        # This is because we allow to sample beam_size terms
        # even if one sentence in the beam has been terminated </s>
        fin_beam_ranks = numpy.array(fin_beam_ranks)[numpy.argsort(
            fin_beam_costs)]
        fin_beam_gen = numpy.array(fin_beam_gen)[numpy.argsort(fin_beam_costs)]
        fin_beam_costs = numpy.array(sorted(fin_beam_costs))
        return fin_beam_gen[:
                            beam_size], fin_beam_costs[:
                                                       beam_size], fin_beam_ranks[:
                                                                                  beam_size]
コード例 #3
0
    def search(self,
               seq,
               n_samples=1,
               ignore_unk=False,
               minlen=1,
               normalize_by_length=True,
               session=False):
        # Make seq a column vector
        def _is_finished(beam_gen):
            if session and beam_gen[-1] == self.eos_sym:
                return True
            if not session and beam_gen[-1] == self.eoq_sym:
                return True
            return False

        seq = numpy.array(seq)

        if seq.ndim == 1:
            seq = numpy.array([seq], dtype='int32').T
        else:
            seq = seq.T

        assert seq.ndim == 2
        h, hr, hs = self.compute_encoding(seq)

        # Initializing starting points with the last encoding of the sequence
        prev_words = numpy.zeros(
            (seq.shape[1], ), dtype='int32') + self.eoq_sym
        prev_hd = numpy.zeros((seq.shape[1], self.qdim), dtype='float32')
        prev_hs = numpy.zeros((seq.shape[1], self.sdim), dtype='float32')

        prev_hs[:] = hs[-1]

        fin_beam_gen = []
        fin_beam_costs = []
        fin_beam_ranks = []

        beam_gen = [[]]
        costs = [0.0]

        max_step = 30
        for k in range(max_step):
            logger.info("Beam search at step %d" % k)
            if n_samples == 0:
                break

            # prev_hd = prev_hd[:beam_size]
            # prev_hs = prev_hs[:beam_size]
            beam_size = len(beam_gen)
            prev_words = (numpy.array(map(lambda bg: bg[-1], beam_gen))
                          if k > 0 else numpy.zeros(1, dtype="int32") +
                          self.eoq_sym)

            assert prev_hs.shape[0] == prev_hd.shape[0]
            assert prev_words.shape[0] == prev_hs.shape[0]

            repeat = numpy.repeat(seq, beam_size, axis=1)
            whole_context = numpy.vstack(
                [repeat, numpy.array(beam_gen, dtype='int32').T])
            h, hr, hs = self.compute_encoding(whole_context)

            outputs, hd = self.next_probs_predictor(hs[-1], prev_words,
                                                    prev_hd)
            log_probs = numpy.log(outputs)

            # Adjust log probs according to search restrictions
            if ignore_unk:
                log_probs[:, self.unk_sym] = -numpy.inf

            if k <= minlen:
                log_probs[:, self.eos_sym] = -numpy.inf
                log_probs[:, self.eoq_sym] = -numpy.inf

            # Artificially not reproduce same words
            for i in range(n_samples):
                if k > 0:
                    log_probs[i, beam_gen[i][1:]] = -numpy.inf

            # Find the best options by calling argpartition of flatten array
            next_costs = numpy.array(costs)[:, None] - log_probs
            flat_next_costs = next_costs.flatten()
            best_costs_indices = argpartition(flat_next_costs.flatten(),
                                              n_samples)[:n_samples]

            # Decypher flatten indices
            voc_size = log_probs.shape[1]
            trans_indices = best_costs_indices / voc_size
            word_indices = best_costs_indices % voc_size
            costs = flat_next_costs[best_costs_indices]

            # Form a beam for the next iteration
            new_beam_gen = [[]] * n_samples
            new_costs = numpy.zeros(n_samples)

            new_prev_hs = numpy.zeros((n_samples, self.sdim), dtype="float32")
            new_prev_hs[:] = hs[-1]
            new_prev_hd = numpy.zeros((n_samples, self.qdim), dtype="float32")

            for i, (orig_idx, next_word, next_cost) in enumerate(
                    zip(trans_indices, word_indices, costs)):

                new_beam_gen[i] = beam_gen[orig_idx] + [next_word]
                new_costs[i] = next_cost
                new_prev_hd[i] = hd[orig_idx]

            beam_gen = []
            costs = []
            indices = []

            for i in range(n_samples):
                # We finished sampling?
                if not _is_finished(new_beam_gen[i]):
                    beam_gen.append(new_beam_gen[i])
                    costs.append(new_costs[i])
                    indices.append(i)
                else:
                    n_samples -= 1

                    # Concatenate sequence and predict rank
                    concat_seq = numpy.vstack(
                        [seq,
                         numpy.array([new_beam_gen[i]], dtype='int32').T])
                    ranks = self.rank_prediction(concat_seq)

                    fin_beam_ranks.append(numpy.ravel(ranks)[-1])
                    fin_beam_gen.append(new_beam_gen[i])
                    if normalize_by_length:
                        fin_beam_costs.append(new_costs[i] /
                                              len(new_beam_gen[i]))

            # Filter out the finished states
            prev_hd = new_prev_hd[indices]
            prev_hs = new_prev_hs[indices]

        fin_beam_gen = numpy.array(fin_beam_gen)[numpy.argsort(fin_beam_costs)]
        fin_beam_ranks = numpy.array(fin_beam_ranks)[numpy.argsort(
            fin_beam_costs)]
        fin_beam_costs = numpy.array(sorted(fin_beam_costs))

        return fin_beam_gen, fin_beam_costs, fin_beam_ranks
コード例 #4
0
ファイル: search.py プロジェクト: cttsai/Agtarbidir
    def _search(self,
                seq_origin,
                n_samples,
                ignore_unk=False,
                minlen=1,
                debug=False,
                training=False):
        ## batch size 1
        '''
        seq = seq_origin
        seqs = numpy.array([seq]*n_samples)
        fin_trans = numpy.array([ [5, 6],[14, 15],])
        #fin_trans = numpy.array([[5, 6]])
        x,x_mask,y,y_mask,_,_ = prepare_reorderdata_minibatch(seqs,fin_trans)
        print x, y
        cost = self.enc_dec.fn_sent_cost(x,x_mask,y,y_mask)
        print 'prob',numpy.log(self.enc_dec.fn_prob(x,x_mask,y,y_mask)+1e-8)
        print 'static cost', cost
        print 'sent static cost', cost.sum(axis=0)
        print 'h_ ', self.enc_dec.fn_proj(x,x_mask,y,y_mask)[:,:,:5]
        print 'x_ ', self.enc_dec.fn_proj_x(x,x_mask,y,y_mask)
        '''
        seq = numpy.array(seq_origin)[:, None]
        mask = numpy.ones(seq.shape, dtype=config.floatX)

        ## to calculate the h and c of encoder, its shape is (d,), where d=4*layers*dim
        h_, c_ = self.comp_repr_enc(seq, mask)  ## h_.shape=(1,d)
        if debug:
            print "self.enc_dec.layers", self.enc_dec.layers
        ## wrapper hc into shape (1,1,d)
        new_h_ = numpy.tile(h_[0], (1, 1, 1))
        new_c_ = numpy.tile(c_[0], (1, 1, 1))

        fin_trans = []
        fin_costs = []
        trans = [[]]  #*n_samples## it is the beam, 2d list
        costs = [0.0]
        for k in range(3 * len(seq)):
            if n_samples == 0:
                break
            # Compute probabilities of the next words for
            # all the elements of the beam.
            beam_size = len(trans)
            last_words = (numpy.array(map(lambda t: t[-1], trans))[None, :]
                          if k > 0 else numpy.zeros(
                              (1, beam_size), dtype="int64"))
            if debug:
                print "last_words, k", last_words, k
            ## given h,c and the last words of trans in beam, to calculate the log_probs with shape(n,v)
            mask = numpy.ones((1, len(last_words[0])), dtype=config.floatX)
            if debug:
                print 'mask', mask
                print "new_h_.shape", new_h_.shape
                print "new_c_.shape", new_h_.shape
                print "last_words, k", last_words, k
                print self.enc_dec.decoder.dbg(last_words, mask, new_h_[0],
                                               new_c_[0], 0.0)
            log_probs, h_, c_, proj_x = self.comp_next_probs_hc(last_words,mask,new_h_[0],new_c_[0], 1.0) \
                if k>0 else self.comp_next_probs_hc(last_words,mask,new_h_[0],new_c_[0], 0.0)
            log_probs = numpy.log(log_probs[0])

            if debug:
                print 'new_h_[0,:,:5]', new_h_[0, :, :5]
                print 'h_[0,:,:5]', h_[0, :, :5]
                print 'proj_x', proj_x
                print 'log_probs', log_probs

            if k > 0 and 0:
                last_words[0][-1] = 5
                print last_words
                log_probs_new, _, _, _ = self.comp_next_probs_hc(
                    last_words, mask, new_h_, new_c_, 1.0)
                log_probs_new = numpy.log(log_probs_new[0])
                print 'log_probs', log_probs_new

            if debug:
                print 'shape log_probs', log_probs.shape  ## its shape is (n,v), v is the vocab size
            # Adjust log probs according to search restrictions
            if ignore_unk:
                log_probs[:, self.unk_id] = -numpy.inf
            # TODO: report me in the paper!!!
            if k < minlen:
                log_probs[:, self.eos_id] = -numpy.inf

            # Find the best options by calling argpartition of flatten array
            next_costs = numpy.array(costs)[:, None] - log_probs

            if debug:
                print "next_costs.shape", next_costs.shape
            #print next_costs
            flat_next_costs = next_costs.flatten()
            best_costs_indices = argpartition(flat_next_costs.flatten(),
                                              n_samples)[:n_samples]

            # Decypher flatten indices
            voc_size = log_probs.shape[1]

            ## trans_indices is indicate the previous trans id.
            trans_indices = best_costs_indices / voc_size
            word_indices = best_costs_indices % voc_size
            if debug:
                print 'best_costs_indices', best_costs_indices
                print 'trans_indices', trans_indices
                print 'word_indices', word_indices
            costs = flat_next_costs[best_costs_indices]
            #print costs
            # Form a beam for the next iteration
            new_trans = [[]] * n_samples
            new_costs = numpy.zeros(n_samples)
            inputs = numpy.zeros(n_samples, dtype="int64")
            for i, (orig_idx, next_word, next_cost) in enumerate(
                    zip(trans_indices, word_indices, costs)):
                new_trans[i] = trans[orig_idx] + [next_word]
                new_costs[i] = next_cost
                inputs[i] = next_word

            #print 'new_costs', new_costs
            # Filter the sequences that end with end-of-sequence character
            trans = []
            costs = []
            indices = []
            for i in range(n_samples):
                if new_trans[i][-1] != self.eos_id:
                    trans.append(new_trans[i])
                    costs.append(new_costs[i])
                    indices.append(i)
                else:
                    n_samples -= 1
                    fin_trans.append(new_trans[i])
                    fin_costs.append(new_costs[i])
            if debug:
                print 'new_trans', new_trans
                print 'trans', trans

                print 'h_.shape', h_.shape
                print 'indices', indices
                print 'trans_indices', trans_indices

            pre_t_indices = trans_indices[indices]
            wrapper_fn = lambda x: (x[pre_t_indices])[None, :]
            #print 'pre_t_indices',pre_t_indices
            #print 'h_[0,:,:5]',h_[0,:,:5]
            new_h_ = wrapper_fn(h_[0])
            new_c_ = wrapper_fn(c_[0])

            if debug:
                print 'new_h_.shape', new_h_.shape
            if k == 1:
                pass  #break

        if debug:
            print 'fin_trans', fin_trans
        # Dirty tricks to obtain any translation
        if not len(fin_trans):
            if ignore_unk:
                logger.warning("Did not manage without UNK")
                return self.search(seq, n_samples, False, minlen)
            elif n_samples < 4 and not training:
                logger.warning(
                    "Still no translations: try beam size {}".format(
                        n_samples * 2))
                print 'seq is empty?', seq_origin
                return self._search(seq_origin, n_samples * 2, False, minlen)
            else:
                logger.warning("Translation failed: cannot end with EOS")
                if training:
                    return [[]], [0.0], "NO TRANS"
                else:  ## testing output the partial translation and add eos to the end heuristically
                    logger.warning(
                        "Translation failed: cannot end with EOS, but output the trans in the beam"
                    )
                    for x in trans:
                        x.append(self.eos_id)
                    fin_trans = numpy.array(trans)[numpy.argsort(
                        costs)][:self.beamsize]
                    fin_costs = numpy.array(sorted(costs)[:self.beamsize])
                    best_trans = fin_trans[0][:-1]
                    if self.enc_dec.reverse_trg: best_trans = best_trans[::-1]
                    best_trans = self.to_words(best_trans, self.t_index2word)
                    return list(fin_trans), fin_costs, best_trans
        fin_trans = numpy.array(fin_trans)[numpy.argsort(fin_costs)]
        fin_costs = numpy.array(sorted(fin_costs))
        #'''
        #        print 'fin_trans',fin_trans
        #print 'fin_costs',fin_costs
        seq = seq_origin
        seqs = numpy.array([seq] * len(fin_trans))
        #print seqs.shape,seqs
        x, x_mask, y, y_mask, _, _ = prepare_reorderdata_minibatch(
            seqs, fin_trans)
        #print "x", x, x_mask
        #print "y", y, y_mask
        cost = self.enc_dec.fn_sent_cost(x, x_mask, y, y_mask)
        #print 'static cost', cost
        #print 'sent static cost', cost.sum(axis=0)
        #'''
        #print 'trans id', fin_trans[0][:-1]
        #print "dict", self.t_index2word
        best_trans = fin_trans[0][:-1]
        if self.enc_dec.reverse_trg: best_trans = best_trans[::-1]
        best_trans = self.to_words(best_trans, self.t_index2word)
        return list(fin_trans), fin_costs, best_trans
コード例 #5
0
ファイル: sample.py プロジェクト: sordonia/rnn-lm
    def search(self, seq, n_samples=1, ignore_unk=False, minlen=1, normalize_by_length=True):
        # Make seq a column vector
        seq = numpy.array(seq)
        
        if seq.ndim == 1:
            seq = numpy.array([seq], dtype='int32').T
        else:
            seq = seq.T

        assert seq.ndim == 2
        h, hr, hs = self.compute_encoding(seq)
         
        # Initializing starting points with the last encoding of the sequence 
        prev_words = numpy.zeros((seq.shape[1],), dtype='int32') + self.eoq_sym
        prev_hd = numpy.zeros((seq.shape[1], self.qdim), dtype='float32')
        prev_hs = numpy.zeros((seq.shape[1], self.sdim), dtype='float32')
         
        prev_hs[:] = hs[-1]
         
        fin_beam_gen = []
        fin_beam_costs = []
        fin_beam_ranks = []

        beam_gen = [[]] 
        costs = [0.0]

        max_step = 50
        for k in range(max_step):
            logger.info("Beam search at step %d" % k)
            if n_samples == 0:
                break

            beam_size = len(beam_gen)
            prev_words = (numpy.array(map(lambda bg : bg[-1], beam_gen))
                    if k > 0
                    else numpy.zeros(1, dtype="int32") + self.eoq_sym)
             
            assert prev_hs.shape[0] == prev_hd.shape[0]
            assert prev_words.shape[0] == prev_hs.shape[0]
            
            repeat = numpy.repeat(seq, beam_size, axis=1)
            whole_context = numpy.vstack([repeat, numpy.array(beam_gen,dtype='int32').T])
            h, hr, hs = self.compute_encoding(whole_context)

            outputs, hd = self.next_probs_predictor(hs[-1], prev_words, prev_hd)
            log_probs = numpy.log(outputs)

            # Adjust log probs according to search restrictions
            if ignore_unk:
                log_probs[:, self.unk_sym] = -numpy.inf

            if k <= minlen:
                log_probs[:, self.eos_sym] = -numpy.inf
                log_probs[:, self.eoq_sym] = -numpy.inf 
             
            # Find the best options by calling argpartition of flatten array
            next_costs = numpy.array(costs)[:, None] - log_probs
            flat_next_costs = next_costs.flatten()
            best_costs_indices = argpartition(
                    flat_next_costs.flatten(),
                    n_samples)[:n_samples]

            # Decypher flatten indices
            voc_size = log_probs.shape[1]
            trans_indices = best_costs_indices / voc_size
            word_indices = best_costs_indices % voc_size
            costs = flat_next_costs[best_costs_indices]

            # Form a beam for the next iteration
            new_beam_gen = [[]] * n_samples 
            new_costs = numpy.zeros(n_samples)
            
            new_prev_hs = numpy.zeros((n_samples, self.sdim), dtype="float32")
            new_prev_hs[:] = hs[-1]
            new_prev_hd = numpy.zeros((n_samples, self.qdim), dtype="float32")
            
            for i, (orig_idx, next_word, next_cost) in enumerate(
                    zip(trans_indices, word_indices, costs)):
                
                new_beam_gen[i] = beam_gen[orig_idx] + [next_word]
                new_costs[i] = next_cost
                new_prev_hd[i] = hd[orig_idx]
             
            beam_gen = []
            costs = []
            indices = []
            
            for i in range(n_samples):
                # We finished sampling?
                if new_beam_gen[i][-1] != self.eos_sym: 
                    beam_gen.append(new_beam_gen[i])
                    costs.append(new_costs[i])
                    indices.append(i)
                else:
                    n_samples -= 1
                     
                    # Concatenate sequence and predict rank 
                    concat_seq = numpy.vstack([seq, numpy.array([new_beam_gen[i]], dtype='int32').T])
                    ranks = self.rank_prediction(concat_seq)
                    
                    fin_beam_ranks.append(numpy.ravel(ranks)[-1])
                    fin_beam_gen.append(new_beam_gen[i])
                    if normalize_by_length:
                        fin_beam_costs.append(new_costs[i]/len(new_beam_gen[i]))

            # Filter out the finished states 
            prev_hd = new_prev_hd[indices]
            prev_hs = new_prev_hs[indices]
         
        fin_beam_gen = numpy.array(fin_beam_gen)[numpy.argsort(fin_beam_costs)]
        fin_beam_ranks = numpy.array(fin_beam_ranks)[numpy.argsort(fin_beam_costs)]
        fin_beam_costs = numpy.array(sorted(fin_beam_costs))
         
        return fin_beam_gen, fin_beam_costs, fin_beam_ranks
コード例 #6
0
ファイル: sample.py プロジェクト: tangyaohua/ProperNouns
    def search(self, seq, n_samples, ignore_unk=False, minlen=1):
        c = self.comp_repr(seq)[0]
        states = map(lambda x : x[None, :], self.comp_init_states(c))
        dim = states[0].shape[1]

        num_levels = len(states)

        fin_trans = []
        fin_costs = []

        trans = [[]]
        costs = [0.0]

        for k in range(3 * len(seq)):
            if n_samples == 0:
                break

            # Compute probabilities of the next words for
            # all the elements of the beam.
            beam_size = len(trans)
            last_words = (numpy.array(map(lambda t : t[-1], trans))
                    if k > 0
                    else numpy.zeros(beam_size, dtype="int64"))
            log_probs = numpy.log(self.comp_next_probs(c, k, last_words, *states)[0])

            # Adjust log probs according to search restrictions
            if ignore_unk:
                log_probs[:,self.unk_id] = -numpy.inf
            # TODO: report me in the paper!!!
            if k < minlen:
                log_probs[:,self.eos_id] = -numpy.inf

            # Find the best options by calling argpartition of flatten array
            next_costs = numpy.array(costs)[:, None] - log_probs
            flat_next_costs = next_costs.flatten()
            best_costs_indices = argpartition(
                    flat_next_costs.flatten(),
                    n_samples)[:n_samples]

            # Decypher flatten indices
            voc_size = log_probs.shape[1]
            trans_indices = best_costs_indices / voc_size
            word_indices = best_costs_indices % voc_size
            costs = flat_next_costs[best_costs_indices]

            # Form a beam for the next iteration
            new_trans = [[]] * n_samples
            new_costs = numpy.zeros(n_samples)
            new_states = [numpy.zeros((n_samples, dim), dtype="float32") for level
                    in range(num_levels)]
            inputs = numpy.zeros(n_samples, dtype="int64")
            for i, (orig_idx, next_word, next_cost) in enumerate(
                    zip(trans_indices, word_indices, costs)):
                new_trans[i] = trans[orig_idx] + [next_word]
                new_costs[i] = next_cost
                for level in range(num_levels):
                    new_states[level][i] = states[level][orig_idx]
                inputs[i] = next_word
            new_states = self.comp_next_states(c, k, inputs, *new_states)

            # Filter the sequences that end with end-of-sequence character
            trans = []
            costs = []
            indices = []
            for i in range(n_samples):
                if new_trans[i][-1] != self.enc_dec.state['null_sym_target']:
                    trans.append(new_trans[i])
                    costs.append(new_costs[i])
                    indices.append(i)
                else:
                    n_samples -= 1
                    fin_trans.append(new_trans[i])
                    fin_costs.append(new_costs[i])
            states = map(lambda x : x[indices], new_states)

        # Dirty tricks to obtain any translation
        if not len(fin_trans):
            if ignore_unk:
                logger.warning("Did not manage without UNK")
                return self.search(seq, n_samples, False, minlen)
            elif n_samples < 500:
                logger.warning("Still no translations: try beam size {}".format(n_samples * 2))
                return self.search(seq, n_samples * 2, False, minlen)
            else:
                logger.error("Translation failed")

        fin_trans = numpy.array(fin_trans)[numpy.argsort(fin_costs)]
        fin_costs = numpy.array(sorted(fin_costs))
        return fin_trans, fin_costs