Esempio n. 1
0
 def forward(self, input, mask=None):
     """
     Args:
         input (obj: `paddle.Tensor`) of shape (batch, seq_len, input_size): Tensor containing the features of the input sequence.
         mask (obj: `paddle.Tensor`, optional, defaults to `None`) of shape (batch, seq_len) :
             Tensor is a bool tensor, whose each element identifies whether the input word id is pad token or not.
     """
     forward_input, backward_input = paddle.chunk(input, chunks=2, axis=2)
     # elementwise-sum forward_x and backward_x
     # Shape: (batch_size, max_seq_len, hidden_size)
     h = paddle.add_n([forward_input, backward_input])
     # Shape: (batch_size, hidden_size, 1)
     att_weight = self.att_weight.tile(
         repeat_times=(paddle.shape(h)[0], 1, 1))
     # Shape: (batch_size, max_seq_len, 1)
     att_score = paddle.bmm(paddle.tanh(h), att_weight)
     if mask is not None:
         # mask, remove the effect of 'PAD'
         mask = paddle.cast(mask, dtype='float32')
         mask = mask.unsqueeze(axis=-1)
         inf_tensor = paddle.full(
             shape=mask.shape, dtype='float32', fill_value=-INF)
         att_score = paddle.multiply(att_score, mask) + paddle.multiply(
             inf_tensor, (1 - mask))
     # Shape: (batch_size, max_seq_len, 1)
     att_weight = F.softmax(att_score, axis=1)
     # Shape: (batch_size, lstm_hidden_size)
     reps = paddle.bmm(h.transpose(perm=(0, 2, 1)),
                       att_weight).squeeze(axis=-1)
     reps = paddle.tanh(reps)
     return reps, att_weight
Esempio n. 2
0
    def test_simple_pylayer_return_none_with_no_grad(self):
        class tanh(PyLayer):
            @staticmethod
            def forward(ctx, x1, x2, func1, func2=paddle.square):
                ctx.func = func2
                y1 = func1(x1)
                y2 = func1(x2)
                ctx.save_for_backward(y1, y2)
                return 1, None, y1, y2, ''

            @staticmethod
            def backward(ctx, dy1, dy2):
                y1, y2 = ctx.saved_tensor()
                re1 = dy1 * (1 - ctx.func(y1))
                re2 = dy2 * (1 - paddle.square(y2))
                return re1, None

        input1 = paddle.randn([2, 3]).astype("float64")
        input2 = input1.detach().clone()
        input3 = input1.detach().clone()
        input4 = input1.detach().clone()
        input1.stop_gradient = False
        input2.stop_gradient = False
        input3.stop_gradient = True
        input4.stop_gradient = True
        z = tanh.apply(input1, input3, paddle.tanh, paddle.square)
        z = z[2] + z[3]
        z.mean().backward()

        z2 = paddle.tanh(input2) + paddle.tanh(input4)
        z2.mean().backward()

        self.assertTrue(
            np.max(np.abs((input1.grad.numpy() -
                           input2.grad.numpy()))) < 1e-10)
Esempio n. 3
0
def build_program():
    main_program = paddle.static.Program()
    startup_program = paddle.static.Program()

    with paddle.static.program_guard(main_program, startup_program):
        with paddle.static.device_guard('cpu'):
            data = paddle.ones([4, 64], dtype='float32', name='data')

        # data -> [memcpy_h2d] -> data' -> [matmul] -> out ->[add] -> add_out
        with paddle.static.device_guard('gpu'):
            weight = paddle.randn([64, 64], name='weight')  # gpu
            matmul_out = paddle.matmul(data, weight, name='matmul_out')  # gpus
            bias = paddle.ones([4, 64], dtype='float32', name='bias')
            add_out = paddle.add(matmul_out, bias, name='add_out')

        # add_out -> [memcpy_d2h] -> add_out' -> [sub] -> sub_out -> [tanh] -> tanh_out
        with paddle.static.device_guard('cpu'):
            sub_out = paddle.subtract(add_out, data, name='sub_out')
            tanh_out = paddle.tanh(sub_out, name='tanh_out')

        with paddle.static.device_guard('gpu'):
            bias_1 = paddle.add(bias, sub_out, name='bias_1')
            out_before = paddle.tanh(bias_1, name='out_before')
            out_last = paddle.subtract(tanh_out, data, name='out_last')

            out = paddle.add(out_before, out_last, name='out')
            mean = paddle.mean(out, name='mean_out')

    return main_program, startup_program, [mean]
    def forward(self, inputs):
        #deal with features with different length
        #1. padding to same lenght, make a tensor
        #2. make a mask tensor with the same shpae with 1
        #3. compute output using mask tensor, s.t. output is nothing todo with padding
        assert (len(inputs) == self.feature_num
                ), "Input tensor does not contain {} features".format(
                    self.feature_num)
        att_outs = []
        for i in range(len(inputs)):
            ###1. fc
            m = getattr(self, "fc_feature{}".format(i))
            output_fc = m(inputs[i][0])
            output_fc = paddle.tanh(output_fc)

            ###2. bi_lstm
            m = getattr(self, "bi_lstm{}".format(i))
            lstm_out, _ = m(inputs=output_fc, sequence_length=inputs[i][1])

            lstm_dropout = self.dropout(lstm_out)

            ###3. att_fc
            m = getattr(self, "att_fc{}".format(i))
            lstm_weight = m(lstm_dropout)

            ###4. softmax replace start, for it's relevant to sum in time step
            lstm_exp = paddle.exp(lstm_weight)
            lstm_mask = paddle.mean(inputs[i][2], axis=2)
            lstm_exp_with_mask = paddle.multiply(x=lstm_exp,
                                                 y=lstm_mask,
                                                 axis=0)
            lstm_sum_with_mask = paddle.sum(lstm_exp_with_mask, axis=1)
            exponent = -1
            lstm_denominator = paddle.pow(lstm_sum_with_mask, exponent)
            lstm_softmax = paddle.multiply(x=lstm_exp,
                                           y=lstm_denominator,
                                           axis=0)
            lstm_weight = lstm_softmax
            ###softmax replace end

            lstm_scale = paddle.multiply(x=lstm_dropout, y=lstm_weight, axis=0)

            ###5. sequence_pool's replace start, for it's relevant to sum in time step
            lstm_scale_with_mask = paddle.multiply(x=lstm_scale,
                                                   y=lstm_mask,
                                                   axis=0)
            fea_lens = inputs[i][1]
            fea_len = int(fea_lens[0])
            lstm_pool = paddle.sum(lstm_scale_with_mask, axis=1)
            ###sequence_pool's replace end
            att_outs.append(lstm_pool)
        att_out = paddle.concat(att_outs, axis=1)
        fc_out1 = self.fc_out1(att_out)
        fc_out1_act = self.relu(fc_out1)
        fc_out2 = self.fc_out2(fc_out1_act)
        fc_out2_act = paddle.tanh(fc_out2)
        fc_logit = self.fc_logit(fc_out2_act)
        output = self.sigmoid(fc_logit)
        return fc_logit, output
Esempio n. 5
0
 def forward(self, inputs):
     outputs1 = self.fc1(inputs)
     outputs1 = paddle.tanh(outputs1)
     outputs2 = self.fc2(outputs1)
     outputs2 = paddle.tanh(outputs2)
     outputs_final = self.fc3(outputs2)
     outputs_final = F.sigmoid(outputs_final)
     return outputs_final
Esempio n. 6
0
 def forward(self, text):
     # Shape: (batch_size, num_tokens, embedding_dim)
     embedded_text = self.embedder(text)
     # Shape: (batch_size, len(ngram_filter_sizes) * num_filter)
     encoder_out = paddle.tanh(self.encoder(embedded_text))
     # Shape: (batch_size, fc_hidden_size)
     fc_out = paddle.tanh(self.fc(encoder_out))
     # Shape: (batch_size, num_classes)
     logits = self.output_layer(fc_out)
     return logits
Esempio n. 7
0
    def forward(self, input_embedding, init_hidden=None, init_cell=None):
        cell_array = []
        hidden_array = []

        for i in range(self._num_layers):
            hidden_array.append(init_hidden[i])
            cell_array.append(init_cell[i])

        res = []
        for index in range(self._num_steps):
            step_input = input_embedding[:, index, :]
            for k in range(self._num_layers):
                pre_hidden = hidden_array[k]
                pre_cell = cell_array[k]
                weight_1 = self.weight_1_arr[k]
                bias = self.bias_arr[k]

                nn = paddle.concat(x=[step_input, pre_hidden], axis=1)
                gate_input = paddle.matmul(x=nn, y=weight_1)

                gate_input = paddle.add(x=gate_input, y=bias)
                i, j, f, o = paddle.split(x=gate_input,
                                          num_or_sections=4,
                                          axis=-1)
                c = pre_cell * paddle.nn.functional.sigmoid(
                    f) + paddle.nn.functional.sigmoid(i) * paddle.tanh(j)
                m = paddle.tanh(c) * paddle.nn.functional.sigmoid(o)
                hidden_array[k] = m
                cell_array[k] = c
                step_input = m

                if self._dropout is not None and self._dropout > 0.0:
                    step_input = paddle.nn.functional.dropout(
                        step_input,
                        dropout_prob=self._dropout,
                        dropout_implementation='upscale_in_train')
            res.append(step_input)
        real_res = paddle.concat(x=res, axis=1)
        real_res = paddle.reshape(real_res,
                                  [-1, self._num_steps, self._hidden_size])
        last_hidden = paddle.concat(x=hidden_array, axis=1)
        last_hidden = paddle.reshape(
            last_hidden, shape=[-1, self._num_layers, self._hidden_size])
        last_hidden = paddle.transpose(x=last_hidden, perm=[1, 0, 2])
        last_cell = paddle.concat(x=cell_array, axis=1)
        last_cell = paddle.reshape(
            last_cell, shape=[-1, self._num_layers, self._hidden_size])
        last_cell = paddle.transpose(x=last_cell, perm=[1, 0, 2])
        return real_res, last_hidden, last_cell
Esempio n. 8
0
    def forward(self, text, seq_len=None):
        # Shape: (batch_size, num_tokens, embedding_dim)
        embedded_text = self.embedder(text)

        # Shape: (batch_size, embedding_dim)
        summed = self.bow_encoder(embedded_text)
        encoded_text = paddle.tanh(summed)

        # Shape: (batch_size, hidden_size)
        fc1_out = paddle.tanh(self.fc1(encoded_text))
        # Shape: (batch_size, fc_hidden_size)
        fc2_out = paddle.tanh(self.fc2(fc1_out))
        # Shape: (batch_size, num_classes)
        logits = self.output_layer(fc2_out)
        return logits
Esempio n. 9
0
    def test_simple_pylayer_single_output(self):
        class tanh(PyLayer):
            @staticmethod
            def forward(ctx, x1, func1, func2=paddle.square):
                ctx.func = func2
                y1 = func1(x1)
                ctx.save_for_backward(y1)
                return y1

            @staticmethod
            def backward(ctx, dy1):
                y1, = ctx.saved_tensor()
                re1 = dy1 * (1 - ctx.func(y1))
                return re1

        input1 = paddle.randn([2, 3]).astype("float64")
        input2 = input1.detach().clone()
        input1.stop_gradient = False
        input2.stop_gradient = False
        z = tanh.apply(x1=input1, func1=paddle.tanh)
        z.mean().backward()
        z2 = paddle.tanh(input2)
        z2.mean().backward()

        self.assertTrue(
            np.max(np.abs((input1.grad.numpy() -
                           input2.grad.numpy()))) < 1e-10)
Esempio n. 10
0
    def forward(self, x, condition):
        """Compute output for a whole folded sequence.
        
        Parameters
        ----------
        x : Tensor [shape=(batch_size, channel, height, width)]
            The input.
            
        condition : Tensor [shape=(batch_size, condition_channel, height, width)]
            The local condition.

        Returns
        -------
        res : Tensor [shape=(batch_size, channel, height, width)]
            The residual output.
            
        skip : Tensor [shape=(batch_size, channel, height, width)]
            The skip output.
        """
        x_in = x
        x = self.conv(x)
        x += self.condition_proj(condition)

        content, gate = paddle.chunk(x, 2, axis=1)
        x = paddle.tanh(content) * F.sigmoid(gate)

        x = self.out_proj(x)
        res, skip = paddle.chunk(x, 2, axis=1)
        res = x_in + res
        return res, skip
Esempio n. 11
0
 def forward(self, x):
     # NOTE: manually trigger `__iter__` logic.
     params = list(self.params.__iter__())
     out = paddle.matmul(x, params[0])
     out = paddle.add(out, params[1])
     out = paddle.tanh(out)
     return out
Esempio n. 12
0
    def forward_interpet(self, text, seq_len):
        embedded_text = self.embedder(
            text)  # Shape: (batch_size, num_tokens, embedding_dim)

        # text_repr = self.lstm_encoder(embedded_text, sequence_length=seq_len) # Shape: (batch_size, num_tokens, num_directions * hidden)

        # encoded_text: tensor[batch, seq_len, num_directions * hidden]
        # last_hidden: tensor[2, batch, hiddens]
        encoded_text, (last_hidden,
                       last_cell) = self.lstm_layer(embedded_text,
                                                    sequence_length=seq_len)
        if self.direction == 'bidirect':
            text_repr = paddle.concat(
                (last_hidden[-2, :, :], last_hidden[-1, :, :]),
                axis=1)  # text_repr: tensor[batch, 2 * hidden] 双向
        else:
            text_repr = last_hidden[
                -1, :, :]  # text_repr: tensor[1, hidden_size]     单向

        fc_out = paddle.tanh(
            self.fc(text_repr))  # Shape: (batch_size, fc_hidden_size)
        logits = self.output_layer(fc_out)  # Shape: (batch_size, num_classes)
        probs = self.softmax(logits)

        return probs, text_repr, embedded_text
Esempio n. 13
0
    def forward(self, inputs):
        text = inputs[0]
        pos_tag = inputs[1]
        neg_tag = inputs[2]

        text_emb = self.text_embedding(text)
        text_emb = paddle.reshape(
            text_emb, shape=[-1, self.text_len, self.emb_dim])
        pos_tag_emb = self.tag_embedding(pos_tag)
        pos_tag_emb = paddle.reshape(pos_tag_emb, shape=[-1, self.emb_dim])
        neg_tag_emb = self.tag_embedding(neg_tag)
        neg_tag_emb = paddle.reshape(
            neg_tag_emb, shape=[-1, self.neg_size, self.emb_dim])

        conv_1d = self.conv(text_emb)
        act = paddle.tanh(conv_1d)
        maxpool = paddle.max(act, axis=1)
        maxpool = paddle.reshape(maxpool, shape=[-1, self.hid_dim])
        text_hid = self.hid_fc(maxpool)
        cos_pos = F.cosine_similarity(
            pos_tag_emb, text_hid, axis=1).reshape([-1, 1])
        # fluid.layers.Print(cos_pos)
        neg_tag_emb = paddle.max(neg_tag_emb, axis=1)
        neg_tag_emb = paddle.reshape(neg_tag_emb, shape=[-1, self.emb_dim])
        cos_neg = F.cosine_similarity(
            neg_tag_emb, text_hid, axis=1).reshape([-1, 1])
        # fluid.layers.Print(cos_neg)
        return cos_pos, cos_neg
Esempio n. 14
0
 def model(self, x, w, bias, opt):
     paddle.seed(0)
     place = paddle.CPUPlace()
     if paddle.device.is_compiled_with_cuda():
         place = paddle.CUDAPlace(0)
     exe = paddle.static.Executor(place)
     main = paddle.static.Program()
     startup = paddle.static.Program()
     with paddle.static.program_guard(main, startup):
         input_x = paddle.static.data('x', x.shape, dtype=x.dtype)
         input_x.stop_gradient = False
         params_w = paddle.static.create_parameter(shape=w.shape,
                                                   dtype=w.dtype,
                                                   is_bias=False)
         params_bias = paddle.static.create_parameter(shape=bias.shape,
                                                      dtype=bias.dtype,
                                                      is_bias=True)
         y = paddle.tanh(paddle.matmul(input_x, params_w) + params_bias)
         loss = paddle.norm(y, p=2)
         opt = opt
         _, grads = opt.minimize(loss)
         if prim_enabled():
             prim2orig(main.block(0))
     exe.run(startup)
     grads = exe.run(main,
                     feed={
                         'x': x,
                         'w': w,
                         'bias': bias
                     },
                     fetch_list=grads)
     return grads
Esempio n. 15
0
def gelu_new(x):
    """
    Implementation of the GELU activation function currently in Google BERT repo (identical to OpenAI GPT). Also see
    the Gaussian Error Linear Units paper: https://arxiv.org/abs/1606.08415
    """
    return 0.5 * x * (1.0 + paddle.tanh(
        math.sqrt(2.0 / math.pi) * (x + 0.044715 * paddle.pow(x, 3.0))))
Esempio n. 16
0
 def backward(ctx, dy):
     with paddle.set_grad_enabled(True):
         temp = ctx.inputs
         temp.stop_gradient = False
         z = paddle.tanh(temp)
         z.backward()
         self.assertTrue(temp.grad is not None)
         return paddle.to_tensor(temp.grad)
Esempio n. 17
0
 def forward(self, query, title, query_seq_len=None, title_seq_len=None):
     # Shape: (batch_size, num_tokens, embedding_dim)
     embedded_query = self.embedder(query)
     embedded_title = self.embedder(title)
     # Shape: (batch_size, embedding_dim)
     summed_query = self.bow_encoder(embedded_query)
     summed_title = self.bow_encoder(embedded_title)
     encoded_query = paddle.tanh(summed_query)
     encoded_title = paddle.tanh(summed_title)
     # Shape: (batch_size, embedding_dim*2)
     contacted = paddle.concat([encoded_query, encoded_title], axis=-1)
     # Shape: (batch_size, fc_hidden_size)
     fc_out = paddle.tanh(self.fc(contacted))
     # Shape: (batch_size, num_classes)
     logits = self.output_layer(fc_out)
     # probs = F.softmax(logits, axis=-1)
     return logits
Esempio n. 18
0
    def forward_interpreter(self,
                            query,
                            title,
                            query_seq_len,
                            title_seq_len,
                            noise=None,
                            i=None,
                            n_samples=None):
        assert query_seq_len is not None and title_seq_len is not None
        # Shape: (batch_size, num_tokens, embedding_dim)

        query_baseline = paddle.to_tensor([self.pad_token_id] *
                                          query.shape[1]).unsqueeze(0)
        title_baseline = paddle.to_tensor([self.pad_token_id] *
                                          title.shape[1]).unsqueeze(0)

        embedded_query = self.embedder(query)
        embedded_title = self.embedder(title)
        embedded_query_baseline = self.embedder(query_baseline)
        embedded_title_baseline = self.embedder(title_baseline)

        if noise is not None and noise.upper() == 'INTEGRATED':
            embedded_query = embedded_query_baseline + i / (n_samples - 1) * (
                embedded_query - embedded_query_baseline)
            embedded_title = embedded_title_baseline + i / (n_samples - 1) * (
                embedded_title - embedded_title_baseline)

        # Shape: (batch_size, lstm_hidden_size)
        query_repr = self.lstm_encoder(embedded_query,
                                       sequence_length=query_seq_len)
        title_repr = self.lstm_encoder(embedded_title,
                                       sequence_length=title_seq_len)
        # Shape: (batch_size, 2*lstm_hidden_size)
        contacted = paddle.concat([query_repr, title_repr], axis=-1)
        # Shape: (batch_size, fc_hidden_size)
        fc_out = paddle.tanh(self.fc(contacted))
        # Shape: (batch_size, num_classes)
        logits = self.output_layer(fc_out)
        probs = F.softmax(logits, axis=-1)

        q_att = paddle.matmul(fc_out,
                              embedded_query, transpose_y=True).squeeze(
                                  axis=[1])  # (bsz, query_len)
        q_att = F.softmax(q_att, axis=-1)
        t_att = paddle.matmul(fc_out,
                              embedded_title, transpose_y=True).squeeze(
                                  axis=[1])  # (bsz, title_len)
        t_att = F.softmax(t_att, axis=-1)

        addiational_info = {
            'embedded': [embedded_query, embedded_title],
            'attention': [q_att, t_att],
        }
        # return logits, addiational_info
        return probs, addiational_info
Esempio n. 19
0
    def forward(self, inputs):
        x_0 = inputs.unsqueeze(2)  # (bs, in_features, 1)
        x_l = x_0
        for i in range(self.layer_num):
            output_of_experts = []
            gating_score_of_experts = []
            for expert_id in range(self.num_experts):
                # (1) G(x_l)
                # compute the gating score by x_l
                gating_score_of_experts.append(self.gating[expert_id](
                    x_l.squeeze(2)))

                # (2) E(x_l)
                # project the input x_l to $\mathbb{R}^{r}$
                v_x = paddle.matmul(self.V_list[i][expert_id].t(),
                                    x_l)  # (bs, low_rank, 1)

                # nonlinear activation in low rank space
                v_x = paddle.tanh(v_x)
                v_x = paddle.matmul(self.C_list[i][expert_id], v_x)
                v_x = paddle.tanh(v_x)

                # project back to $\mathbb{R}^{d}$
                uv_x = paddle.matmul(self.U_list[i][expert_id],
                                     v_x)  # (bs, in_features, 1)

                dot_ = uv_x + self.bias[i]
                dot_ = x_0 * dot_  # Hadamard-product

                output_of_experts.append(dot_.squeeze(2))

            # (3) mixture of low-rank experts
            output_of_experts = paddle.stack(
                output_of_experts, axis=2)  # (bs, in_features, num_experts)
            gating_score_of_experts = paddle.stack(
                gating_score_of_experts, axis=1)  # (bs, num_experts, 1)
            moe_out = paddle.matmul(output_of_experts,
                                    F.softmax(gating_score_of_experts, axis=1))
            x_l = moe_out + x_l  # (bs, in_features, 1)

        x_l = x_l.squeeze()  # (bs, in_features)
        return x_l
Esempio n. 20
0
    def forward(self,
                query,
                processed_key,
                value,
                attention_weights_cat,
                mask=None):
        """Compute context vector and attention weights.
        
        Parameters
        -----------
        query : Tensor [shape=(batch_size, d_query)] 
            The queries.
            
        processed_key : Tensor [shape=(batch_size, time_steps_k, d_attention)] 
            The keys after linear layer.
            
        value : Tensor [shape=(batch_size, time_steps_k, d_key)] 
            The values.

        attention_weights_cat : Tensor [shape=(batch_size, time_step_k, 2)]
            Attention weights concat.
            
        mask : Tensor, optional
            The mask. Shape should be (batch_size, times_steps_q, time_steps_k) or broadcastable shape.
            Defaults to None.

        Returns
        ----------
        attention_context : Tensor [shape=(batch_size, time_steps_q, d_attention)] 
            The context vector.
            
        attention_weights : Tensor [shape=(batch_size, times_steps_q, time_steps_k)]
            The attention weights.
        """

        processed_query = self.query_layer(paddle.unsqueeze(query, axis=[1]))
        processed_attention_weights = self.location_layer(
            self.location_conv(attention_weights_cat))
        alignment = self.value(
            paddle.tanh(processed_attention_weights + processed_key +
                        processed_query))

        if mask is not None:
            alignment = alignment + (1.0 - mask) * -1e9

        attention_weights = F.softmax(alignment, axis=1)
        attention_context = paddle.matmul(attention_weights,
                                          value,
                                          transpose_x=True)

        attention_weights = paddle.squeeze(attention_weights, axis=[-1])
        attention_context = paddle.squeeze(attention_context, axis=[1])

        return attention_context, attention_weights
Esempio n. 21
0
    def get_action(self, state):
        epsilon = paddle.to_tensor(1e-7, dtype='float32')

        mean, log_std = self.forward(state)
        std = log_std.exp()
        normal = Normal(mean, std)
        z = normal.sample([1])
        action = paddle.tanh(z)
        log_prob = normal.log_prob(z) - paddle.log(1 - action.pow(2) + epsilon)
        log_prob = log_prob.sum(-1, keepdim=True)

        return action, log_prob, z, mean, log_std
Esempio n. 22
0
 def forward(self, prev_hidden, batch_H, char_onehots):
     batch_H_proj = self.i2h(batch_H)
     prev_hidden_proj = paddle.unsqueeze(self.h2h(prev_hidden), axis=1)
     res = paddle.add(batch_H_proj, prev_hidden_proj)
     res = paddle.tanh(res)
     e = self.score(res)
     alpha = F.softmax(e, axis=1)
     alpha = paddle.transpose(alpha, [0, 2, 1])
     context = paddle.squeeze(paddle.mm(alpha, batch_H), axis=1)
     concat_context = paddle.concat([context, char_onehots], 1)
     cur_hidden = self.rnn(concat_context, prev_hidden)
     return cur_hidden, alpha
Esempio n. 23
0
    def _lstm(self, inputs, hidden, cell, token_idx):
        cells = lstm_cell(self.lstm_num_layers, self.hidden_size)
        output, new_states = cells.call(inputs, states=([[hidden, cell]]))
        logits = paddle.static.nn.fc(new_states[0],
                                     self.range_tables[token_idx])

        if self.temperature is not None:
            logits = logits / self.temperature
        if self.tanh_constant is not None:
            logits = self.tanh_constant * paddle.tanh(logits)

        return logits, output, new_states
Esempio n. 24
0
    def sample(self, obs):
        act_mean, act_log_std = self.model.policy(obs)
        normal = Normal(act_mean, act_log_std.exp())
        # for reparameterization trick  (mean + std*N(0,1))
        x_t = normal.sample([1])
        action = paddle.tanh(x_t)

        log_prob = normal.log_prob(x_t)
        # Enforcing Action Bound
        log_prob -= paddle.log((1 - action.pow(2)) + 1e-6)
        log_prob = paddle.sum(log_prob, axis=-1, keepdim=True)
        return action[0], log_prob[0]
Esempio n. 25
0
 def forward(self, text, seq_len):
     # Shape: (batch_size, num_tokens, embedding_dim)
     embedded_text = self.embedder(text)
     # Shape: (batch_size, num_tokens, num_directions*rnn_hidden_size)
     # num_directions = 2 if direction is 'bidirect'
     # if not, num_directions = 1
     text_repr = self.rnn_encoder(embedded_text, sequence_length=seq_len)
     # Shape: (batch_size, fc_hidden_size)
     fc_out = paddle.tanh(self.fc(text_repr))
     # Shape: (batch_size, num_classes)
     logits = self.output_layer(fc_out)
     return logits
Esempio n. 26
0
    def forward(self, x_1, seq_len_1, x_2=None, seq_len_2=None):
        x_embed_1 = self.embedder(x_1)
        lstm_out_1, (hidden_1, _) = self.lstm(x_embed_1,
                                              sequence_length=seq_len_1)
        out_1 = paddle.concat((hidden_1[-2, :, :], hidden_1[-1, :, :]), axis=1)
        if x_2 is not None:
            x_embed_2 = self.embedder(x_2)
            lstm_out_2, (hidden_2, _) = self.lstm(x_embed_2,
                                                  sequence_length=seq_len_2)
            out_2 = paddle.concat((hidden_2[-2, :, :], hidden_2[-1, :, :]),
                                  axis=1)
            out = paddle.concat(
                x=[out_1, out_2, out_1 + out_2,
                   paddle.abs(out_1 - out_2)],
                axis=1)
            out = paddle.tanh(self.fc_1(out))
        else:
            out = paddle.tanh(self.fc(out_1))
        logits = self.output_layer(out)

        return logits
Esempio n. 27
0
    def _test(self, run_mlu=True):
        main_prog = paddle.static.Program()
        startup_prog = paddle.static.Program()
        main_prog.random_seed = SEED
        startup_prog.random_seed = SEED
        np.random.seed(SEED)

        a_np = np.random.random(size=(32, 32)).astype('float32')
        b_np = np.random.random(size=(32, 32)).astype('float32')
        label_np = np.random.randint(2, size=(32, 1)).astype('int64')

        with paddle.static.program_guard(main_prog, startup_prog):
            a = paddle.static.data(name="a", shape=[32, 32], dtype='float32')
            b = paddle.static.data(name="b", shape=[32, 32], dtype='float32')
            label = paddle.static.data(name="label",
                                       shape=[32, 1],
                                       dtype='int64')

            c = paddle.multiply(a, b)
            d = paddle.tanh(c)

            fc_1 = fluid.layers.fc(input=d, size=128)
            prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')

            cost = fluid.layers.cross_entropy(input=prediction, label=label)
            loss = fluid.layers.reduce_mean(cost)
            sgd = fluid.optimizer.SGD(learning_rate=0.01)
            sgd.minimize(loss)

        if run_mlu:
            place = paddle.MLUPlace(0)
        else:
            place = paddle.CPUPlace()

        exe = paddle.static.Executor(place)
        exe.run(startup_prog)

        print("Start run on {}".format(place))
        for epoch in range(100):

            pred_res, loss_res = exe.run(main_prog,
                                         feed={
                                             "a": a_np,
                                             "b": b_np,
                                             "label": label_np
                                         },
                                         fetch_list=[prediction, loss])
            if epoch % 10 == 0:
                print("Epoch {} | Prediction[0]: {}, Loss: {}".format(
                    epoch, pred_res[0], loss_res))

        return pred_res, loss_res
Esempio n. 28
0
    def get_coord_features(self, points, batchsize, rows, cols):
        if self.cpu_mode:
            coords = []
            for i in range(batchsize):
                norm_delimeter = (1.0 if self.use_disks else
                                  self.spatial_scale * self.norm_radius)
                coords.append(
                    self._get_dist_maps(points[i].numpy().astype("float32"),
                                        rows, cols, norm_delimeter))
            coords = paddle.to_tensor(np.stack(coords,
                                               axis=0)).astype("float32")
        else:
            num_points = points.shape[1] // 2
            points = points.reshape([-1, points.shape[2]])
            points, points_order = paddle.split(points, [2, 1], axis=1)
            invalid_points = paddle.max(points, axis=1, keepdim=False) < 0
            row_array = paddle.arange(start=0,
                                      end=rows,
                                      step=1,
                                      dtype="float32")
            col_array = paddle.arange(start=0,
                                      end=cols,
                                      step=1,
                                      dtype="float32")

            coord_rows, coord_cols = paddle.meshgrid(row_array, col_array)
            coords = paddle.unsqueeze(paddle.stack([coord_rows, coord_cols],
                                                   axis=0),
                                      axis=0).tile([points.shape[0], 1, 1, 1])

            add_xy = (points * self.spatial_scale).reshape(
                [points.shape[0], points.shape[1], 1, 1])
            coords = coords - add_xy
            if not self.use_disks:
                coords = coords / (self.norm_radius * self.spatial_scale)

            coords = coords * coords
            coords[:, 0] += coords[:, 1]
            coords = coords[:, :1]
            invalid_points = invalid_points.numpy()

            coords[invalid_points, :, :, :] = 1e6
            coords = coords.reshape([-1, num_points, 1, rows, cols])
            coords = paddle.min(coords, axis=1)
            coords = coords.reshape([-1, 2, rows, cols])

        if self.use_disks:
            coords = (coords <= (self.norm_radius * self.spatial_scale)**
                      2).astype("float32")
        else:
            coords = paddle.tanh(paddle.sqrt(coords) * 2)
        return coords
Esempio n. 29
0
 def forward(self, inputs):
     pad_input = F.pad2d(inputs, [3, 3, 3, 3], mode="reflect")
     y = self.conv0(pad_input)
     y = self.conv1(y)
     y = self.conv2(y)
     for resnet_block in self.resnet_blocks:
         y = resnet_block(y)
     y = self.deconv0(y)
     y = self.deconv1(y)
     y = F.pad2d(y, [3, 3, 3, 3], mode="reflect")
     y = self.conv3(y)
     y = paddle.tanh(y)
     return y
Esempio n. 30
0
 def forward(self, text, seq_len):
     mask = text != self.padding_idx
     embedded_text = self.embedder(text)
     # Encode text, shape: (batch, max_seq_len, num_directions * hidden_size)
     encoded_text, (last_hidden, last_cell) = self.bilstm(
         embedded_text, sequence_length=seq_len)
     # Shape: (batch_size, lstm_hidden_size)
     hidden, att_weights = self.attention(encoded_text, mask)
     # Shape: (batch_size, fc_hidden_size)
     fc_out = paddle.tanh(self.fc(hidden))
     # Shape: (batch_size, num_classes)
     logits = self.output_layer(fc_out)
     return logits