Beispiel #1
0
    def nucleus_sample(self,
                       text,
                       max_len=32,
                       end_word=None,
                       repitition_penalty=1.0,
                       temperature=1.0,
                       top_k=0,
                       top_p=1.0):
        with paddle.no_grad():
            # 终止标志
            if end_word is not None:
                stop_id = self.tokenizer.encode(end_word)
                length = len(stop_id)

            # 初始预测
            ids = self.tokenizer.encode(text)
            input_id = paddle.to_tensor(
                np.array(ids).reshape(1, -1).astype('int64'))
            output, cached_kvs = self.model(input_id, use_cache=True)
            next_token_logits = output[0, -1, :]
            for id in set(ids):
                next_token_logits[id] /= repitition_penalty
            next_token_logits = next_token_logits / temperature
            next_token_logits[self.tokenizer.encoder['<unk>']] = -float('Inf')
            filtered_logits = self.top_k_top_p_filtering(next_token_logits,
                                                         top_k=top_k,
                                                         top_p=top_p)
            next_token = paddle.multinomial(paddle.nn.functional.softmax(
                filtered_logits, axis=-1),
                                            num_samples=1).numpy()
            ids += [int(next_token)]

            # 使用缓存进行继续预测
            for i in range(max_len - 1):
                input_id = paddle.to_tensor(
                    np.array([next_token]).reshape(1, -1).astype('int64'))
                output, cached_kvs = self.model(input_id,
                                                use_cache=True,
                                                cache=cached_kvs)
                next_token_logits = output[0, -1, :]
                for id in set(ids):
                    next_token_logits[id] /= repitition_penalty
                next_token_logits = next_token_logits / temperature
                next_token_logits[
                    self.tokenizer.encoder['<unk>']] = -float('Inf')
                filtered_logits = self.top_k_top_p_filtering(next_token_logits,
                                                             top_k=top_k,
                                                             top_p=top_p)
                next_token = paddle.multinomial(paddle.nn.functional.softmax(
                    filtered_logits, axis=-1),
                                                num_samples=1).numpy()
                ids += [int(next_token)]

                # 根据终止标志停止预测
                if (end_word is not None) and (ids[-length:] == stop_id):
                    break

            return self.tokenizer.decode(ids)
    def test_fixed_random_number(self):
        # Test GPU Fixed random number, which is generated by 'curandStatePhilox4_32_10_t'
        if not paddle.is_compiled_with_cuda():
            return

        # Different GPU generatte different random value. Only test V100 here.
        if not "V100" in paddle.device.cuda.get_device_name():
            return

        print("Test Fixed Random number on V100 GPU------>")
        paddle.disable_static()
        paddle.set_device('gpu')
        paddle.seed(100)

        x = paddle.randint(0, 100, [1024, 10000]).astype('float32')
        y = paddle.multinomial(x, 1, replacement=False).numpy()
        self.assertEqual(np.sum(y), 5187793)
        self.assertEqual(np.mean(y), 5066.2041015625)
        expect = [9982, 1655, 4741, 1323, 9319, 3298, 6473, 7477, 2507, 2628]
        self.assertTrue(np.array_equal(y[100:110, :].flatten(), expect))

        y = paddle.multinomial(x, 5000, replacement=False).numpy()
        self.assertEqual(np.sum(y), 25603962316)
        self.assertEqual(np.mean(y), 5000.77388984375)
        expect = [7300, 6055, 8714, 5401, 7360, 161, 5035, 7002, 6788, 2916]
        self.assertTrue(np.array_equal(y[100, 1000:1010], expect))

        y = paddle.multinomial(x, 5000, replacement=False).numpy()
        self.assertEqual(np.sum(y), 25592855710)
        self.assertEqual(np.mean(y), 4998.604630859375)
        expect = [5700, 6567, 4399, 5688, 7472, 545, 6894, 526, 2124, 385]
        self.assertTrue(np.array_equal(y[300, 3000:3010], expect))

        y = paddle.multinomial(x, 20000, replacement=True).numpy()
        self.assertEqual(np.sum(y), 102371362581)
        self.assertEqual(np.mean(y), 4998.60168852539)
        self.assertEqual(np.std(y), 2886.316308500771)
        expect = [7630, 8235, 8445, 3275, 5580, 4591, 1331, 342, 1662, 7156]
        self.assertTrue(np.array_equal(y[100, 0:10], expect))

        y = paddle.multinomial(x, 20000, replacement=True).numpy()
        self.assertEqual(np.sum(y), 102400672117)
        self.assertEqual(np.mean(y), 5000.032818212891)
        self.assertEqual(np.std(y), 2886.913426124017)
        expect = [4159, 7849, 9305, 5759, 4422, 122, 345, 2897, 5200, 5911]
        self.assertTrue(np.array_equal(y[100, 0:10], expect))

        paddle.enable_static()
Beispiel #3
0
 def test_alias(self):
     paddle.set_device('npu:0')
     x = paddle.rand([4])
     out1 = paddle.multinomial(x, num_samples=10, replacement=True)
     out2 = paddle.tensor.multinomial(x, num_samples=10, replacement=True)
     out3 = paddle.tensor.random.multinomial(x,
                                             num_samples=10,
                                             replacement=True)
Beispiel #4
0
 def sample(self, labels):
     """Random sample neg_samples
     """
     n_sample = self.n_sample
     n_tries = 1 * n_sample
     neg_samples = paddle.multinomial(self.new_prob,
                                      num_samples=n_sample,
                                      replacement=self.unique is False)
     true_log_probs = paddle.gather(self.log_q, labels)
     samp_log_probs = paddle.gather(self.log_q, neg_samples)
     return true_log_probs, samp_log_probs, neg_samples
Beispiel #5
0
    def decode(self, inputs, caches):
        tgt_ids = inputs['tgt_ids']
        tgt_pos = inputs['tgt_pos']
        tgt_generation_mask = inputs['tgt_generation_mask']
        predictions = tgt_ids

        # TODO
        step = 0
        while step < self.max_dec_len:
            # [-1, 1]
            append_mask = paddle.cast(
                tgt_ids != self.eos_id, dtype=tgt_generation_mask.dtype)
            tgt_generation_mask = paddle.concat(
                [tgt_generation_mask, paddle.unsqueeze(append_mask, 1)],
                axis=-1)
            tgt_sent = paddle.ones(
                [tgt_generation_mask.shape[0], 1], dtype=tgt_ids.dtype)

            # [-1, 1, hidden_size]
            out, caches = self.plato2_encoder(caches, tgt_ids, tgt_sent,
                                              tgt_pos, tgt_generation_mask)
            out = paddle.squeeze(out, axis=1)

            # [-1, hidden_size]
            trans = self.logits_fc_layer(out)
            trans = self.gelu_layer(trans)
            trans = self.logits_layer_norm(trans)

            # [-1, vocab_size]
            logits = paddle.matmul(
                trans,
                self.plato2_encoder.word_embedding_layer.weight,
                transpose_y=True) + self.logits_bias
            logits[:, self.unk_id] = -1e9
            logits[:, self.bos_id] = -1e9
            logits[:, self.mask_id] = -1e9
            if step < self.min_dec_len:
                logits[:, self.eos_id] = -1e9
            logits = logits * append_mask + (1 - append_mask) * self.after_eos
            probs = self.softmax(logits)

            # [-1, topk]
            topk_probs, _ = paddle.topk(probs, k=self.topk)
            mask = paddle.cast(probs >= topk_probs[:, -1:], 'float32')
            sums = paddle.sum(topk_probs, axis=-1, keepdim=True)
            new_probs = probs * mask / sums
            # [-1, 1]
            sampling_ids = paddle.multinomial(new_probs)

            step = step + 1
            tgt_ids = sampling_ids
            tgt_pos = tgt_pos + 1
            predictions = paddle.concat([predictions, tgt_ids], axis=1)
        return predictions
    def test_dygraph3(self):
        # replacement is False. number of samples must be less than number of categories.
        paddle.disable_static()
        x_numpy = np.random.rand(1000)
        x = paddle.to_tensor(x_numpy)
        out = paddle.multinomial(x, num_samples=100, replacement=False)

        unique_out = np.unique(out.numpy())
        self.assertEqual(
            len(unique_out), 100,
            "replacement is False. categories can't be sampled repeatedly")
        paddle.enable_static()
    def test_input_probs_dim(self):
        def test_dim_larger_than_2():
            x = paddle.rand([2, 3, 3])
            paddle.multinomial(x)

        self.assertRaises(ValueError, test_dim_larger_than_2)

        def test_dim_less_than_1():
            x_np = np.random.random([])
            x = paddle.to_tensor(x_np)
            paddle.multinomial(x)

        self.assertRaises(ValueError, test_dim_less_than_1)

        with self.assertRaises(ValueError):
            y = paddle.multinomial(paddle.to_tensor([1., 2., -3.]))

        with self.assertRaises(ValueError):
            prob = paddle.rand([20, 1000])
            prob[1:0] = 0
            y = paddle.multinomial(prob)
    def test_dygraph(self):
        # input probability is a vector, and replacement is True
        paddle.disable_static()
        x_numpy = np.random.rand(4)
        x = paddle.to_tensor(x_numpy)
        out = paddle.multinomial(x, num_samples=100000, replacement=True)
        paddle.enable_static()

        sample_prob = sample_output_one_dimension(out.numpy(), 4)
        prob = x_numpy / x_numpy.sum(axis=-1, keepdims=True)
        self.assertTrue(
            np.allclose(sample_prob, prob, rtol=0, atol=0.01),
            "sample_prob: " + str(sample_prob) + "\nprob: " + str(prob))
    def sample(self, labels):
        n_sample = self.n_sample
        n_tries = 2 * n_sample
        batch_size = labels.shape[0]

        with paddle.no_grad():
            neg_samples = paddle.unique(
                paddle.multinomial(self.dist, n_tries, replacement=True))
            true_log_probs = paddle.gather(self.log_q, labels.flatten())
            true_log_probs = paddle.reshape(true_log_probs,
                                            shape=[batch_size, -1])
            samp_log_probs = paddle.gather(self.log_q, neg_samples)
            return true_log_probs, samp_log_probs, neg_samples
Beispiel #10
0
    def test_static(self):
        paddle.set_device('npu:0')
        startup_program = fluid.Program()
        train_program = fluid.Program()
        with fluid.program_guard(train_program, startup_program):
            x = fluid.data('x', shape=[4], dtype='float32')
            out = paddle.multinomial(x, num_samples=100000, replacement=True)

            place = fluid.NPUPlace(0)
            exe = fluid.Executor(place)

        exe.run(startup_program)
        x_np = np.random.rand(4).astype('float32')
        out = exe.run(train_program, feed={'x': x_np}, fetch_list=[out])

        sample_prob = sample_output_one_dimension(out, 4)
        prob = x_np / x_np.sum(axis=-1, keepdims=True)
        self.assertTrue(
            np.allclose(sample_prob, prob, rtol=0, atol=0.01),
            "sample_prob: " + str(sample_prob) + "\nprob: " + str(prob))
Beispiel #11
0
    def forward(self, trg):
        # Encoder
        latent_z = paddle.normal(shape=(trg.shape[0], self.latent_size))
        dec_first_hidden_cell = self.fc(latent_z)
        dec_first_hidden, dec_first_cell = paddle.split(
            dec_first_hidden_cell, 2, axis=-1)
        if self.num_layers > 1:
            dec_first_hidden = paddle.split(dec_first_hidden, self.num_layers)
            dec_first_cell = paddle.split(dec_first_cell, self.num_layers)
        else:
            dec_first_hidden = [dec_first_hidden]
            dec_first_cell = [dec_first_cell]
        dec_initial_states = [[h, c]
                              for h, c in zip(dec_first_hidden, dec_first_cell)]

        output_fc = lambda x: F.one_hot(
            paddle.multinomial(
                F.softmax(paddle.squeeze(
                    self.decoder.output_fc(x),[1]))),num_classes=self.vocab_size)

        latent_z = nn.BeamSearchDecoder.tile_beam_merge_with_batch(
            latent_z, self.beam_size)

        decoder = nn.BeamSearchDecoder(
            cell=self.decoder.lstm.cell,
            start_token=self.start_token,
            end_token=self.end_token,
            beam_size=self.beam_size,
            embedding_fn=self.decoder.trg_embedder,
            output_fn=output_fc)

        outputs, _ = nn.dynamic_decode(
            decoder,
            inits=dec_initial_states,
            max_step_num=self.max_out_len,
            latent_z=latent_z)
        return outputs
Beispiel #12
0
    def decode(self, x_tree_vecs, prob_decode):
        """
        Decode tree structre from tree latent space.
        Args:
            x_tree_mess(tensor): tree latent represenation.
            prob_decode(bool): using bernoulli distribution in tree decode if prob_decode=true.
        Returns:
            root node and all nodes.
        """
        assert x_tree_vecs.shape[0] == 1
        stack = []
        init_hiddens = paddle.zeros([1, self.hidden_size])
        zero_pad = paddle.zeros([1, 1, self.hidden_size])
        contexts = paddle.zeros([1]).astype('int64')

        root_score = self.aggregate(init_hiddens, contexts, x_tree_vecs, 'word')
        root_wid = paddle.argmax(root_score, axis=1)
        root_wid = int(root_wid.numpy())

        root = MolTreeNode(self.vocab.get_smiles(root_wid))
        root.wid = root_wid
        root.idx = 0
        stack.append((root, self.vocab.get_slots(root.wid)))

        all_nodes = [root]
        h = {}
        for step in range(MAX_DECODE_LEN):
            node_x, fa_slot = stack[-1]
            cur_h_nei = [h[(node_y.idx, node_x.idx)] for node_y in node_x.neighbors]
            if len(cur_h_nei) > 0:
                cur_h_nei = paddle.reshape(paddle.stack(cur_h_nei, axis=0), shape=[1, -1, self.hidden_size])
            else:
                cur_h_nei = zero_pad

            cur_x = paddle.to_tensor([node_x.wid])
            cur_x = self.embedding(cur_x)
            cur_h = paddle.sum(cur_h_nei, axis=1)
            stop_hiddens = paddle.concat([cur_x, cur_h], axis=1)
            stop_hiddens = F.relu(self.U_i(stop_hiddens))
            stop_score = self.aggregate(stop_hiddens, contexts, x_tree_vecs, 'stop')

            if prob_decode:
                backtrack = (paddle.bernoulli(F.sigmoid(stop_score)).item() == 0)
            else:
                backtrack = (float(stop_score.numpy()) < 0)

            if not backtrack:
                new_h = GRU(cur_x, cur_h_nei, self.W_z, self.W_r, self.U_r, self.W_h)
                pred_score = self.aggregate(new_h, contexts, x_tree_vecs, 'word')

                if prob_decode:
                    sort_wid = paddle.multinomial(F.softmax(pred_score, axis=1).squeeze(), 5)
                else:
                    sort_wid = paddle.argsort(
                        pred_score, axis=1, descending=True)
                    sort_wid = sort_wid.squeeze()

                next_wid = None
                for wid in sort_wid[:5]:
                    slots = self.vocab.get_slots(wid)
                    node_y = MolTreeNode(self.vocab.get_smiles(wid))
                    if have_slots(fa_slot, slots) and can_assemble(node_x, node_y):
                        next_wid = wid
                        next_slots = slots
                        break

                if next_wid is None:
                    backtrack = True
                else:
                    node_y = MolTreeNode(self.vocab.get_smiles(next_wid))
                    node_y.wid = int(next_wid.numpy())
                    node_y.idx = len(all_nodes)
                    node_y.neighbors.append(node_x)
                    h[(node_x.idx, node_y.idx)] = new_h[0]
                    stack.append((node_y, next_slots))
                    all_nodes.append(node_y)

            if backtrack:
                if len(stack) == 1:
                    break

                node_fa, _ = stack[-2]
                cur_h_nei = [h[(node_y.idx, node_x.idx)] for node_y in node_x.neighbors if node_y.idx != node_fa.idx]
                if len(cur_h_nei) > 0:
                    cur_h_nei = paddle.reshape(paddle.stack(cur_h_nei, axis=0), shape=[1, -1, self.hidden_size])
                else:
                    cur_h_nei = zero_pad

                new_h = GRU(cur_x, cur_h_nei, self.W_z, self.W_r, self.U_r, self.W_h)
                h[(node_x.idx, node_fa.idx)] = new_h[0]
                node_fa.neighbors.append(node_x)
                stack.pop()

        return root, all_nodes
    def sample(self, n_batch, max_len=100, z=None, temp=1.0):
        """Generating n_batch samples in eval mode (`z` could be
        not on same device)
        :param n_batch: number of sentences to generate
        :param max_len: max len of samples
        :param z: (n_batch, d_z) of floats, latent vector z or None
        :param temp: temperature of softmax
        :return: list of tensors of strings, samples sequence x
        """

        if z is None:
            z = self.sample_z_prior(n_batch)
        z_0 = z.unsqueeze(1)

        # Initial values
        h = self.decoder_lat(z)
        h = paddle.expand(h.unsqueeze(0), \
            shape=[self.decoder_rnn.num_layers, h.unsqueeze(0).shape[1], h.unsqueeze(0).shape[2]])

        w = paddle.expand(paddle.to_tensor(self.bos), shape=[n_batch])
        x = paddle.expand(paddle.to_tensor([self.pad]),
                          shape=[n_batch, max_len])

        x[:, 0] = self.bos

        end_pads = paddle.expand(paddle.to_tensor([max_len]), shape=[n_batch])
        eos_mask = paddle.zeros([n_batch], dtype='bool')

        # Generating cycle
        for i in range(1, max_len):
            x_emb = self.x_emb(w).unsqueeze(1)
            x_input = paddle.concat([x_emb, z_0], axis=-1)

            o, h = self.decoder_rnn(x_input, h)
            y = self.decoder_fc(o.squeeze(1))
            y = F.softmax(y / temp, axis=-1)

            w = paddle.multinomial(y, 1)[:, 0]
            #w = paddle.argmax(y, 1)

            # convert to numpy in order to slice the mask
            x = x.numpy()
            eos_mask = eos_mask.numpy()
            w = w.numpy()
            end_pads = end_pads.numpy()

            x[~eos_mask, i] = w[~eos_mask]
            i_eos_mask = ~eos_mask & (w == self.eos)
            end_pads[i_eos_mask] = i + 1
            eos_mask = eos_mask | i_eos_mask

            # convert back to tensor
            x = paddle.to_tensor(x)
            w = paddle.to_tensor(w)
            eos_mask = paddle.to_tensor(eos_mask)
            end_pads = paddle.to_tensor(end_pads)

        # Converting `x` to list of tensors
        new_x = []
        for i in range(x.shape[0]):
            new_x.append(x[i, :int(end_pads[i])])

        return [self.tensor2string(i_x) for i_x in new_x]
Beispiel #14
0
    def dfs_assemble(self, y_tree_mess, x_mol_vecs, all_nodes, cur_mol, global_amap, fa_amap, cur_node, fa_node,
                     prob_decode, check_aroma):
        """DFS in subgraph assembly"""
        fa_nid = fa_node.nid if fa_node is not None else -1
        prev_nodes = [fa_node] if fa_node is not None else []

        children = [nei for nei in cur_node.neighbors if nei.nid != fa_nid]
        neighbors = [nei for nei in children if nei.mol.GetNumAtoms() > 1]
        neighbors = sorted(neighbors, key=lambda x: x.mol.GetNumAtoms(), reverse=True)
        singletons = [nei for nei in children if nei.mol.GetNumAtoms() == 1]
        neighbors = singletons + neighbors

        cur_amap = [(fa_nid, a2, a1) for nid, a1, a2 in fa_amap if nid == cur_node.nid]
        cands, aroma_score = enum_assemble(cur_node, neighbors, prev_nodes, cur_amap)
        if len(cands) == 0 or (sum(aroma_score) < 0 and check_aroma):
            return None, cur_mol

        cand_smiles, cand_amap = zip(*cands)

        aroma_score = paddle.to_tensor(aroma_score)
        cands = [(smiles, all_nodes, cur_node) for smiles in cand_smiles]

        if len(cands) > 1:
            jtmpn_holder = JTMPN.tensorize(cands, y_tree_mess[1])
            fatoms = jtmpn_holder['fatoms']
            fbonds = jtmpn_holder['fbonds']
            agraph = jtmpn_holder['agraph']
            bgraph = jtmpn_holder['bgraph']
            scope = jtmpn_holder['scope']
            cand_vecs = self.jtmpn(fatoms, fbonds, agraph, bgraph, scope, y_tree_mess[0])
            scores = paddle.mv(cand_vecs, x_mol_vecs) + aroma_score
        else:
            scores = paddle.to_tensor([1.0])

        if prob_decode:
            probs = paddle.squeeze(F.softmax(paddle.reshape(scores, shape=[1, -1]), axis=1)) + 1e-7
            cand_idx = paddle.multinomial(probs, probs.numel())
        else:
            cand_idx = paddle.argsort(scores, descending=True)

        backup_mol = Chem.RWMol(cur_mol)
        pre_mol = cur_mol
        for i in range(cand_idx.numel()):
            cur_mol = Chem.RWMol(backup_mol)
            pred_amap = cand_amap[int(cand_idx[i].numpy())]
            new_global_amap = copy.deepcopy(global_amap)

            for nei_id, ctr_atom, nei_atom in pred_amap:
                if nei_id == fa_nid:
                    continue
                new_global_amap[nei_id][nei_atom] = new_global_amap[cur_node.nid][ctr_atom]

            cur_mol = attach_mols(cur_mol, children, [], new_global_amap)
            new_mol = cur_mol.GetMol()
            new_mol = Chem.MolFromSmiles(Chem.MolToSmiles(new_mol))

            if new_mol is None:
                continue

            has_error = False
            for nei_node in children:
                if nei_node.is_leaf:
                    continue
                tmp_mol, tmp_mol2 = self.dfs_assemble(y_tree_mess, x_mol_vecs, all_nodes, cur_mol, new_global_amap,
                                                      pred_amap, nei_node, cur_node, prob_decode, check_aroma)
                if tmp_mol is None:
                    has_error = True
                    if i == 0: pre_mol = tmp_mol2
                    break
                cur_mol = tmp_mol
            if not has_error: return cur_mol, cur_mol
        return None, pre_mol
Beispiel #15
0
    def sample(self,
               input_ids,
               logits_processors,
               max_length,
               pad_token_id,
               eos_token_id,
               top_k=None,
               top_p=None,
               temperature=None,
               min_tokens_to_keep=1,
               **model_kwargs):
        def TopKProcess(probs, top_k, min_tokens_to_keep):
            top_k = min(max(top_k, min_tokens_to_keep), probs.shape[-1])
            # Remove all tokens with a probability less than the last token of the top-k
            topk_probs, _ = paddle.topk(probs, k=top_k)
            probs = paddle.where(probs >= topk_probs[:, -1:], probs,
                                 paddle.full_like(probs, 0.0))
            return probs

        def TopPProcess(probs, top_p, min_tokens_to_keep):
            sorted_probs = paddle.sort(probs, descending=True)
            sorted_indices = paddle.argsort(probs, descending=True)
            cumulative_probs = paddle.cumsum(sorted_probs, axis=-1)

            # Remove tokens with cumulative probs above the top_p, But keep at
            # least min_tokens_to_keep tokens
            sorted_indices_to_remove = cumulative_probs > top_p
            if min_tokens_to_keep > 1:
                # Set 'min_tokens_to_keep - 1' because the first token is kept
                sorted_indices_to_remove[:, :min_tokens_to_keep - 1] = 0
            # Keep the first token
            sorted_indices_to_remove = paddle.cast(sorted_indices_to_remove,
                                                   dtype='int64')
            sorted_indices_to_remove[:, 1:] = (
                sorted_indices_to_remove[:, :-1].clone())
            sorted_indices_to_remove[:, 0] = 0

            # Scatter sorted tensors to original indexing
            sorted_indices = sorted_indices + paddle.arange(
                probs.shape[0]).unsqueeze(-1) * probs.shape[-1]
            condition = paddle.scatter(sorted_indices_to_remove.flatten(),
                                       sorted_indices.flatten(),
                                       sorted_indices_to_remove.flatten())
            condition = paddle.cast(condition, 'bool').reshape(probs.shape)
            probs = paddle.where(condition, paddle.full_like(probs, 0.0),
                                 probs)
            return probs

        batch_size, cur_len = input_ids.shape
        origin_len = cur_len
        unfinished_flag = paddle.full([batch_size, 1], True, dtype='bool')
        scores = paddle.full([batch_size, 1],
                             0.0,
                             dtype=paddle.get_default_dtype())

        while cur_len < max_length:
            # prepare model inputs & get model output
            model_inputs = self.prepare_inputs_for_generation(
                input_ids, **model_kwargs)
            outputs = self(**model_inputs)
            logits = outputs[0] if isinstance(outputs, tuple) else outputs
            # [batch_size, vocab_size]
            logits = logits[:, -1, :]

            # pre-process distribution
            logits = self.adjust_logits_during_generation(logits)
            logits = logits_processors(input_ids, logits)

            # sample
            origin_probs = F.softmax(logits)
            origin_probs = paddle.log(origin_probs)
            if temperature is not None and temperature != 1.0:
                logits = logits / temperature
            probs = F.softmax(logits)
            if top_k is not None and top_k != 0:
                probs = TopKProcess(probs, top_k, min_tokens_to_keep)
            if top_p is not None and top_p < 1.0:
                probs = TopPProcess(probs, top_p, min_tokens_to_keep)
            next_tokens = paddle.multinomial(probs)
            next_scores = paddle.index_sample(origin_probs, next_tokens)

            if eos_token_id is not None:
                next_tokens = paddle.where(
                    unfinished_flag, next_tokens,
                    paddle.full_like(next_tokens, pad_token_id))

            scores = self.update_scores_for_generation(scores, next_scores,
                                                       cur_len - origin_len,
                                                       unfinished_flag)

            cur_len += 1
            input_ids = paddle.concat([input_ids, next_tokens], axis=1)

            if eos_token_id is not None:
                unfinished_flag = paddle.logical_and(
                    unfinished_flag, next_tokens != eos_token_id)

            # Stop when there is a </s> in all sentences
            if not paddle.any(unfinished_flag):
                break
            model_kwargs = self.update_model_kwargs_for_generation(
                outputs, model_kwargs)
        return input_ids[:, origin_len:], scores
Beispiel #16
0
 def test_dim_less_than_1():
     x_np = np.random.random([])
     x = paddle.to_tensor(x_np)
     out = paddle.multinomial(x)
Beispiel #17
0
 def test_dim_larger_than_2():
     x = paddle.rand([2, 3, 3])
     out = paddle.multinomial(x)
Beispiel #18
0
 def test_num_sample_less_than_0():
     x = paddle.rand([4])
     out = paddle.multinomial(x, num_samples=-2)
 def test_alias(self):
     paddle.disable_static()
     x = paddle.rand([4])
     paddle.multinomial(x, num_samples=10, replacement=True)
     paddle.tensor.multinomial(x, num_samples=10, replacement=True)
     paddle.tensor.random.multinomial(x, num_samples=10, replacement=True)
 def test_samples_larger_than_categories():
     x = paddle.rand([4])
     paddle.multinomial(x, num_samples=5, replacement=False)