def forward(self, input, mask=None): """ Args: input (obj: `paddle.Tensor`) of shape (batch, seq_len, input_size): Tensor containing the features of the input sequence. mask (obj: `paddle.Tensor`, optional, defaults to `None`) of shape (batch, seq_len) : Tensor is a bool tensor, whose each element identifies whether the input word id is pad token or not. """ forward_input, backward_input = paddle.chunk(input, chunks=2, axis=2) # elementwise-sum forward_x and backward_x # Shape: (batch_size, max_seq_len, hidden_size) h = paddle.add_n([forward_input, backward_input]) # Shape: (batch_size, hidden_size, 1) att_weight = self.att_weight.tile( repeat_times=(paddle.shape(h)[0], 1, 1)) # Shape: (batch_size, max_seq_len, 1) att_score = paddle.bmm(paddle.tanh(h), att_weight) if mask is not None: # mask, remove the effect of 'PAD' mask = paddle.cast(mask, dtype='float32') mask = mask.unsqueeze(axis=-1) inf_tensor = paddle.full( shape=mask.shape, dtype='float32', fill_value=-INF) att_score = paddle.multiply(att_score, mask) + paddle.multiply( inf_tensor, (1 - mask)) # Shape: (batch_size, max_seq_len, 1) att_weight = F.softmax(att_score, axis=1) # Shape: (batch_size, lstm_hidden_size) reps = paddle.bmm(h.transpose(perm=(0, 2, 1)), att_weight).squeeze(axis=-1) reps = paddle.tanh(reps) return reps, att_weight
def test_simple_pylayer_return_none_with_no_grad(self): class tanh(PyLayer): @staticmethod def forward(ctx, x1, x2, func1, func2=paddle.square): ctx.func = func2 y1 = func1(x1) y2 = func1(x2) ctx.save_for_backward(y1, y2) return 1, None, y1, y2, '' @staticmethod def backward(ctx, dy1, dy2): y1, y2 = ctx.saved_tensor() re1 = dy1 * (1 - ctx.func(y1)) re2 = dy2 * (1 - paddle.square(y2)) return re1, None input1 = paddle.randn([2, 3]).astype("float64") input2 = input1.detach().clone() input3 = input1.detach().clone() input4 = input1.detach().clone() input1.stop_gradient = False input2.stop_gradient = False input3.stop_gradient = True input4.stop_gradient = True z = tanh.apply(input1, input3, paddle.tanh, paddle.square) z = z[2] + z[3] z.mean().backward() z2 = paddle.tanh(input2) + paddle.tanh(input4) z2.mean().backward() self.assertTrue( np.max(np.abs((input1.grad.numpy() - input2.grad.numpy()))) < 1e-10)
def build_program(): main_program = paddle.static.Program() startup_program = paddle.static.Program() with paddle.static.program_guard(main_program, startup_program): with paddle.static.device_guard('cpu'): data = paddle.ones([4, 64], dtype='float32', name='data') # data -> [memcpy_h2d] -> data' -> [matmul] -> out ->[add] -> add_out with paddle.static.device_guard('gpu'): weight = paddle.randn([64, 64], name='weight') # gpu matmul_out = paddle.matmul(data, weight, name='matmul_out') # gpus bias = paddle.ones([4, 64], dtype='float32', name='bias') add_out = paddle.add(matmul_out, bias, name='add_out') # add_out -> [memcpy_d2h] -> add_out' -> [sub] -> sub_out -> [tanh] -> tanh_out with paddle.static.device_guard('cpu'): sub_out = paddle.subtract(add_out, data, name='sub_out') tanh_out = paddle.tanh(sub_out, name='tanh_out') with paddle.static.device_guard('gpu'): bias_1 = paddle.add(bias, sub_out, name='bias_1') out_before = paddle.tanh(bias_1, name='out_before') out_last = paddle.subtract(tanh_out, data, name='out_last') out = paddle.add(out_before, out_last, name='out') mean = paddle.mean(out, name='mean_out') return main_program, startup_program, [mean]
def forward(self, inputs): #deal with features with different length #1. padding to same lenght, make a tensor #2. make a mask tensor with the same shpae with 1 #3. compute output using mask tensor, s.t. output is nothing todo with padding assert (len(inputs) == self.feature_num ), "Input tensor does not contain {} features".format( self.feature_num) att_outs = [] for i in range(len(inputs)): ###1. fc m = getattr(self, "fc_feature{}".format(i)) output_fc = m(inputs[i][0]) output_fc = paddle.tanh(output_fc) ###2. bi_lstm m = getattr(self, "bi_lstm{}".format(i)) lstm_out, _ = m(inputs=output_fc, sequence_length=inputs[i][1]) lstm_dropout = self.dropout(lstm_out) ###3. att_fc m = getattr(self, "att_fc{}".format(i)) lstm_weight = m(lstm_dropout) ###4. softmax replace start, for it's relevant to sum in time step lstm_exp = paddle.exp(lstm_weight) lstm_mask = paddle.mean(inputs[i][2], axis=2) lstm_exp_with_mask = paddle.multiply(x=lstm_exp, y=lstm_mask, axis=0) lstm_sum_with_mask = paddle.sum(lstm_exp_with_mask, axis=1) exponent = -1 lstm_denominator = paddle.pow(lstm_sum_with_mask, exponent) lstm_softmax = paddle.multiply(x=lstm_exp, y=lstm_denominator, axis=0) lstm_weight = lstm_softmax ###softmax replace end lstm_scale = paddle.multiply(x=lstm_dropout, y=lstm_weight, axis=0) ###5. sequence_pool's replace start, for it's relevant to sum in time step lstm_scale_with_mask = paddle.multiply(x=lstm_scale, y=lstm_mask, axis=0) fea_lens = inputs[i][1] fea_len = int(fea_lens[0]) lstm_pool = paddle.sum(lstm_scale_with_mask, axis=1) ###sequence_pool's replace end att_outs.append(lstm_pool) att_out = paddle.concat(att_outs, axis=1) fc_out1 = self.fc_out1(att_out) fc_out1_act = self.relu(fc_out1) fc_out2 = self.fc_out2(fc_out1_act) fc_out2_act = paddle.tanh(fc_out2) fc_logit = self.fc_logit(fc_out2_act) output = self.sigmoid(fc_logit) return fc_logit, output
def forward(self, inputs): outputs1 = self.fc1(inputs) outputs1 = paddle.tanh(outputs1) outputs2 = self.fc2(outputs1) outputs2 = paddle.tanh(outputs2) outputs_final = self.fc3(outputs2) outputs_final = F.sigmoid(outputs_final) return outputs_final
def forward(self, text): # Shape: (batch_size, num_tokens, embedding_dim) embedded_text = self.embedder(text) # Shape: (batch_size, len(ngram_filter_sizes) * num_filter) encoder_out = paddle.tanh(self.encoder(embedded_text)) # Shape: (batch_size, fc_hidden_size) fc_out = paddle.tanh(self.fc(encoder_out)) # Shape: (batch_size, num_classes) logits = self.output_layer(fc_out) return logits
def forward(self, input_embedding, init_hidden=None, init_cell=None): cell_array = [] hidden_array = [] for i in range(self._num_layers): hidden_array.append(init_hidden[i]) cell_array.append(init_cell[i]) res = [] for index in range(self._num_steps): step_input = input_embedding[:, index, :] for k in range(self._num_layers): pre_hidden = hidden_array[k] pre_cell = cell_array[k] weight_1 = self.weight_1_arr[k] bias = self.bias_arr[k] nn = paddle.concat(x=[step_input, pre_hidden], axis=1) gate_input = paddle.matmul(x=nn, y=weight_1) gate_input = paddle.add(x=gate_input, y=bias) i, j, f, o = paddle.split(x=gate_input, num_or_sections=4, axis=-1) c = pre_cell * paddle.nn.functional.sigmoid( f) + paddle.nn.functional.sigmoid(i) * paddle.tanh(j) m = paddle.tanh(c) * paddle.nn.functional.sigmoid(o) hidden_array[k] = m cell_array[k] = c step_input = m if self._dropout is not None and self._dropout > 0.0: step_input = paddle.nn.functional.dropout( step_input, dropout_prob=self._dropout, dropout_implementation='upscale_in_train') res.append(step_input) real_res = paddle.concat(x=res, axis=1) real_res = paddle.reshape(real_res, [-1, self._num_steps, self._hidden_size]) last_hidden = paddle.concat(x=hidden_array, axis=1) last_hidden = paddle.reshape( last_hidden, shape=[-1, self._num_layers, self._hidden_size]) last_hidden = paddle.transpose(x=last_hidden, perm=[1, 0, 2]) last_cell = paddle.concat(x=cell_array, axis=1) last_cell = paddle.reshape( last_cell, shape=[-1, self._num_layers, self._hidden_size]) last_cell = paddle.transpose(x=last_cell, perm=[1, 0, 2]) return real_res, last_hidden, last_cell
def forward(self, text, seq_len=None): # Shape: (batch_size, num_tokens, embedding_dim) embedded_text = self.embedder(text) # Shape: (batch_size, embedding_dim) summed = self.bow_encoder(embedded_text) encoded_text = paddle.tanh(summed) # Shape: (batch_size, hidden_size) fc1_out = paddle.tanh(self.fc1(encoded_text)) # Shape: (batch_size, fc_hidden_size) fc2_out = paddle.tanh(self.fc2(fc1_out)) # Shape: (batch_size, num_classes) logits = self.output_layer(fc2_out) return logits
def test_simple_pylayer_single_output(self): class tanh(PyLayer): @staticmethod def forward(ctx, x1, func1, func2=paddle.square): ctx.func = func2 y1 = func1(x1) ctx.save_for_backward(y1) return y1 @staticmethod def backward(ctx, dy1): y1, = ctx.saved_tensor() re1 = dy1 * (1 - ctx.func(y1)) return re1 input1 = paddle.randn([2, 3]).astype("float64") input2 = input1.detach().clone() input1.stop_gradient = False input2.stop_gradient = False z = tanh.apply(x1=input1, func1=paddle.tanh) z.mean().backward() z2 = paddle.tanh(input2) z2.mean().backward() self.assertTrue( np.max(np.abs((input1.grad.numpy() - input2.grad.numpy()))) < 1e-10)
def forward(self, x, condition): """Compute output for a whole folded sequence. Parameters ---------- x : Tensor [shape=(batch_size, channel, height, width)] The input. condition : Tensor [shape=(batch_size, condition_channel, height, width)] The local condition. Returns ------- res : Tensor [shape=(batch_size, channel, height, width)] The residual output. skip : Tensor [shape=(batch_size, channel, height, width)] The skip output. """ x_in = x x = self.conv(x) x += self.condition_proj(condition) content, gate = paddle.chunk(x, 2, axis=1) x = paddle.tanh(content) * F.sigmoid(gate) x = self.out_proj(x) res, skip = paddle.chunk(x, 2, axis=1) res = x_in + res return res, skip
def forward(self, x): # NOTE: manually trigger `__iter__` logic. params = list(self.params.__iter__()) out = paddle.matmul(x, params[0]) out = paddle.add(out, params[1]) out = paddle.tanh(out) return out
def forward_interpet(self, text, seq_len): embedded_text = self.embedder( text) # Shape: (batch_size, num_tokens, embedding_dim) # text_repr = self.lstm_encoder(embedded_text, sequence_length=seq_len) # Shape: (batch_size, num_tokens, num_directions * hidden) # encoded_text: tensor[batch, seq_len, num_directions * hidden] # last_hidden: tensor[2, batch, hiddens] encoded_text, (last_hidden, last_cell) = self.lstm_layer(embedded_text, sequence_length=seq_len) if self.direction == 'bidirect': text_repr = paddle.concat( (last_hidden[-2, :, :], last_hidden[-1, :, :]), axis=1) # text_repr: tensor[batch, 2 * hidden] 双向 else: text_repr = last_hidden[ -1, :, :] # text_repr: tensor[1, hidden_size] 单向 fc_out = paddle.tanh( self.fc(text_repr)) # Shape: (batch_size, fc_hidden_size) logits = self.output_layer(fc_out) # Shape: (batch_size, num_classes) probs = self.softmax(logits) return probs, text_repr, embedded_text
def forward(self, inputs): text = inputs[0] pos_tag = inputs[1] neg_tag = inputs[2] text_emb = self.text_embedding(text) text_emb = paddle.reshape( text_emb, shape=[-1, self.text_len, self.emb_dim]) pos_tag_emb = self.tag_embedding(pos_tag) pos_tag_emb = paddle.reshape(pos_tag_emb, shape=[-1, self.emb_dim]) neg_tag_emb = self.tag_embedding(neg_tag) neg_tag_emb = paddle.reshape( neg_tag_emb, shape=[-1, self.neg_size, self.emb_dim]) conv_1d = self.conv(text_emb) act = paddle.tanh(conv_1d) maxpool = paddle.max(act, axis=1) maxpool = paddle.reshape(maxpool, shape=[-1, self.hid_dim]) text_hid = self.hid_fc(maxpool) cos_pos = F.cosine_similarity( pos_tag_emb, text_hid, axis=1).reshape([-1, 1]) # fluid.layers.Print(cos_pos) neg_tag_emb = paddle.max(neg_tag_emb, axis=1) neg_tag_emb = paddle.reshape(neg_tag_emb, shape=[-1, self.emb_dim]) cos_neg = F.cosine_similarity( neg_tag_emb, text_hid, axis=1).reshape([-1, 1]) # fluid.layers.Print(cos_neg) return cos_pos, cos_neg
def model(self, x, w, bias, opt): paddle.seed(0) place = paddle.CPUPlace() if paddle.device.is_compiled_with_cuda(): place = paddle.CUDAPlace(0) exe = paddle.static.Executor(place) main = paddle.static.Program() startup = paddle.static.Program() with paddle.static.program_guard(main, startup): input_x = paddle.static.data('x', x.shape, dtype=x.dtype) input_x.stop_gradient = False params_w = paddle.static.create_parameter(shape=w.shape, dtype=w.dtype, is_bias=False) params_bias = paddle.static.create_parameter(shape=bias.shape, dtype=bias.dtype, is_bias=True) y = paddle.tanh(paddle.matmul(input_x, params_w) + params_bias) loss = paddle.norm(y, p=2) opt = opt _, grads = opt.minimize(loss) if prim_enabled(): prim2orig(main.block(0)) exe.run(startup) grads = exe.run(main, feed={ 'x': x, 'w': w, 'bias': bias }, fetch_list=grads) return grads
def gelu_new(x): """ Implementation of the GELU activation function currently in Google BERT repo (identical to OpenAI GPT). Also see the Gaussian Error Linear Units paper: https://arxiv.org/abs/1606.08415 """ return 0.5 * x * (1.0 + paddle.tanh( math.sqrt(2.0 / math.pi) * (x + 0.044715 * paddle.pow(x, 3.0))))
def backward(ctx, dy): with paddle.set_grad_enabled(True): temp = ctx.inputs temp.stop_gradient = False z = paddle.tanh(temp) z.backward() self.assertTrue(temp.grad is not None) return paddle.to_tensor(temp.grad)
def forward(self, query, title, query_seq_len=None, title_seq_len=None): # Shape: (batch_size, num_tokens, embedding_dim) embedded_query = self.embedder(query) embedded_title = self.embedder(title) # Shape: (batch_size, embedding_dim) summed_query = self.bow_encoder(embedded_query) summed_title = self.bow_encoder(embedded_title) encoded_query = paddle.tanh(summed_query) encoded_title = paddle.tanh(summed_title) # Shape: (batch_size, embedding_dim*2) contacted = paddle.concat([encoded_query, encoded_title], axis=-1) # Shape: (batch_size, fc_hidden_size) fc_out = paddle.tanh(self.fc(contacted)) # Shape: (batch_size, num_classes) logits = self.output_layer(fc_out) # probs = F.softmax(logits, axis=-1) return logits
def forward_interpreter(self, query, title, query_seq_len, title_seq_len, noise=None, i=None, n_samples=None): assert query_seq_len is not None and title_seq_len is not None # Shape: (batch_size, num_tokens, embedding_dim) query_baseline = paddle.to_tensor([self.pad_token_id] * query.shape[1]).unsqueeze(0) title_baseline = paddle.to_tensor([self.pad_token_id] * title.shape[1]).unsqueeze(0) embedded_query = self.embedder(query) embedded_title = self.embedder(title) embedded_query_baseline = self.embedder(query_baseline) embedded_title_baseline = self.embedder(title_baseline) if noise is not None and noise.upper() == 'INTEGRATED': embedded_query = embedded_query_baseline + i / (n_samples - 1) * ( embedded_query - embedded_query_baseline) embedded_title = embedded_title_baseline + i / (n_samples - 1) * ( embedded_title - embedded_title_baseline) # Shape: (batch_size, lstm_hidden_size) query_repr = self.lstm_encoder(embedded_query, sequence_length=query_seq_len) title_repr = self.lstm_encoder(embedded_title, sequence_length=title_seq_len) # Shape: (batch_size, 2*lstm_hidden_size) contacted = paddle.concat([query_repr, title_repr], axis=-1) # Shape: (batch_size, fc_hidden_size) fc_out = paddle.tanh(self.fc(contacted)) # Shape: (batch_size, num_classes) logits = self.output_layer(fc_out) probs = F.softmax(logits, axis=-1) q_att = paddle.matmul(fc_out, embedded_query, transpose_y=True).squeeze( axis=[1]) # (bsz, query_len) q_att = F.softmax(q_att, axis=-1) t_att = paddle.matmul(fc_out, embedded_title, transpose_y=True).squeeze( axis=[1]) # (bsz, title_len) t_att = F.softmax(t_att, axis=-1) addiational_info = { 'embedded': [embedded_query, embedded_title], 'attention': [q_att, t_att], } # return logits, addiational_info return probs, addiational_info
def forward(self, inputs): x_0 = inputs.unsqueeze(2) # (bs, in_features, 1) x_l = x_0 for i in range(self.layer_num): output_of_experts = [] gating_score_of_experts = [] for expert_id in range(self.num_experts): # (1) G(x_l) # compute the gating score by x_l gating_score_of_experts.append(self.gating[expert_id]( x_l.squeeze(2))) # (2) E(x_l) # project the input x_l to $\mathbb{R}^{r}$ v_x = paddle.matmul(self.V_list[i][expert_id].t(), x_l) # (bs, low_rank, 1) # nonlinear activation in low rank space v_x = paddle.tanh(v_x) v_x = paddle.matmul(self.C_list[i][expert_id], v_x) v_x = paddle.tanh(v_x) # project back to $\mathbb{R}^{d}$ uv_x = paddle.matmul(self.U_list[i][expert_id], v_x) # (bs, in_features, 1) dot_ = uv_x + self.bias[i] dot_ = x_0 * dot_ # Hadamard-product output_of_experts.append(dot_.squeeze(2)) # (3) mixture of low-rank experts output_of_experts = paddle.stack( output_of_experts, axis=2) # (bs, in_features, num_experts) gating_score_of_experts = paddle.stack( gating_score_of_experts, axis=1) # (bs, num_experts, 1) moe_out = paddle.matmul(output_of_experts, F.softmax(gating_score_of_experts, axis=1)) x_l = moe_out + x_l # (bs, in_features, 1) x_l = x_l.squeeze() # (bs, in_features) return x_l
def forward(self, query, processed_key, value, attention_weights_cat, mask=None): """Compute context vector and attention weights. Parameters ----------- query : Tensor [shape=(batch_size, d_query)] The queries. processed_key : Tensor [shape=(batch_size, time_steps_k, d_attention)] The keys after linear layer. value : Tensor [shape=(batch_size, time_steps_k, d_key)] The values. attention_weights_cat : Tensor [shape=(batch_size, time_step_k, 2)] Attention weights concat. mask : Tensor, optional The mask. Shape should be (batch_size, times_steps_q, time_steps_k) or broadcastable shape. Defaults to None. Returns ---------- attention_context : Tensor [shape=(batch_size, time_steps_q, d_attention)] The context vector. attention_weights : Tensor [shape=(batch_size, times_steps_q, time_steps_k)] The attention weights. """ processed_query = self.query_layer(paddle.unsqueeze(query, axis=[1])) processed_attention_weights = self.location_layer( self.location_conv(attention_weights_cat)) alignment = self.value( paddle.tanh(processed_attention_weights + processed_key + processed_query)) if mask is not None: alignment = alignment + (1.0 - mask) * -1e9 attention_weights = F.softmax(alignment, axis=1) attention_context = paddle.matmul(attention_weights, value, transpose_x=True) attention_weights = paddle.squeeze(attention_weights, axis=[-1]) attention_context = paddle.squeeze(attention_context, axis=[1]) return attention_context, attention_weights
def get_action(self, state): epsilon = paddle.to_tensor(1e-7, dtype='float32') mean, log_std = self.forward(state) std = log_std.exp() normal = Normal(mean, std) z = normal.sample([1]) action = paddle.tanh(z) log_prob = normal.log_prob(z) - paddle.log(1 - action.pow(2) + epsilon) log_prob = log_prob.sum(-1, keepdim=True) return action, log_prob, z, mean, log_std
def forward(self, prev_hidden, batch_H, char_onehots): batch_H_proj = self.i2h(batch_H) prev_hidden_proj = paddle.unsqueeze(self.h2h(prev_hidden), axis=1) res = paddle.add(batch_H_proj, prev_hidden_proj) res = paddle.tanh(res) e = self.score(res) alpha = F.softmax(e, axis=1) alpha = paddle.transpose(alpha, [0, 2, 1]) context = paddle.squeeze(paddle.mm(alpha, batch_H), axis=1) concat_context = paddle.concat([context, char_onehots], 1) cur_hidden = self.rnn(concat_context, prev_hidden) return cur_hidden, alpha
def _lstm(self, inputs, hidden, cell, token_idx): cells = lstm_cell(self.lstm_num_layers, self.hidden_size) output, new_states = cells.call(inputs, states=([[hidden, cell]])) logits = paddle.static.nn.fc(new_states[0], self.range_tables[token_idx]) if self.temperature is not None: logits = logits / self.temperature if self.tanh_constant is not None: logits = self.tanh_constant * paddle.tanh(logits) return logits, output, new_states
def sample(self, obs): act_mean, act_log_std = self.model.policy(obs) normal = Normal(act_mean, act_log_std.exp()) # for reparameterization trick (mean + std*N(0,1)) x_t = normal.sample([1]) action = paddle.tanh(x_t) log_prob = normal.log_prob(x_t) # Enforcing Action Bound log_prob -= paddle.log((1 - action.pow(2)) + 1e-6) log_prob = paddle.sum(log_prob, axis=-1, keepdim=True) return action[0], log_prob[0]
def forward(self, text, seq_len): # Shape: (batch_size, num_tokens, embedding_dim) embedded_text = self.embedder(text) # Shape: (batch_size, num_tokens, num_directions*rnn_hidden_size) # num_directions = 2 if direction is 'bidirect' # if not, num_directions = 1 text_repr = self.rnn_encoder(embedded_text, sequence_length=seq_len) # Shape: (batch_size, fc_hidden_size) fc_out = paddle.tanh(self.fc(text_repr)) # Shape: (batch_size, num_classes) logits = self.output_layer(fc_out) return logits
def forward(self, x_1, seq_len_1, x_2=None, seq_len_2=None): x_embed_1 = self.embedder(x_1) lstm_out_1, (hidden_1, _) = self.lstm(x_embed_1, sequence_length=seq_len_1) out_1 = paddle.concat((hidden_1[-2, :, :], hidden_1[-1, :, :]), axis=1) if x_2 is not None: x_embed_2 = self.embedder(x_2) lstm_out_2, (hidden_2, _) = self.lstm(x_embed_2, sequence_length=seq_len_2) out_2 = paddle.concat((hidden_2[-2, :, :], hidden_2[-1, :, :]), axis=1) out = paddle.concat( x=[out_1, out_2, out_1 + out_2, paddle.abs(out_1 - out_2)], axis=1) out = paddle.tanh(self.fc_1(out)) else: out = paddle.tanh(self.fc(out_1)) logits = self.output_layer(out) return logits
def _test(self, run_mlu=True): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() main_prog.random_seed = SEED startup_prog.random_seed = SEED np.random.seed(SEED) a_np = np.random.random(size=(32, 32)).astype('float32') b_np = np.random.random(size=(32, 32)).astype('float32') label_np = np.random.randint(2, size=(32, 1)).astype('int64') with paddle.static.program_guard(main_prog, startup_prog): a = paddle.static.data(name="a", shape=[32, 32], dtype='float32') b = paddle.static.data(name="b", shape=[32, 32], dtype='float32') label = paddle.static.data(name="label", shape=[32, 1], dtype='int64') c = paddle.multiply(a, b) d = paddle.tanh(c) fc_1 = fluid.layers.fc(input=d, size=128) prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax') cost = fluid.layers.cross_entropy(input=prediction, label=label) loss = fluid.layers.reduce_mean(cost) sgd = fluid.optimizer.SGD(learning_rate=0.01) sgd.minimize(loss) if run_mlu: place = paddle.MLUPlace(0) else: place = paddle.CPUPlace() exe = paddle.static.Executor(place) exe.run(startup_prog) print("Start run on {}".format(place)) for epoch in range(100): pred_res, loss_res = exe.run(main_prog, feed={ "a": a_np, "b": b_np, "label": label_np }, fetch_list=[prediction, loss]) if epoch % 10 == 0: print("Epoch {} | Prediction[0]: {}, Loss: {}".format( epoch, pred_res[0], loss_res)) return pred_res, loss_res
def get_coord_features(self, points, batchsize, rows, cols): if self.cpu_mode: coords = [] for i in range(batchsize): norm_delimeter = (1.0 if self.use_disks else self.spatial_scale * self.norm_radius) coords.append( self._get_dist_maps(points[i].numpy().astype("float32"), rows, cols, norm_delimeter)) coords = paddle.to_tensor(np.stack(coords, axis=0)).astype("float32") else: num_points = points.shape[1] // 2 points = points.reshape([-1, points.shape[2]]) points, points_order = paddle.split(points, [2, 1], axis=1) invalid_points = paddle.max(points, axis=1, keepdim=False) < 0 row_array = paddle.arange(start=0, end=rows, step=1, dtype="float32") col_array = paddle.arange(start=0, end=cols, step=1, dtype="float32") coord_rows, coord_cols = paddle.meshgrid(row_array, col_array) coords = paddle.unsqueeze(paddle.stack([coord_rows, coord_cols], axis=0), axis=0).tile([points.shape[0], 1, 1, 1]) add_xy = (points * self.spatial_scale).reshape( [points.shape[0], points.shape[1], 1, 1]) coords = coords - add_xy if not self.use_disks: coords = coords / (self.norm_radius * self.spatial_scale) coords = coords * coords coords[:, 0] += coords[:, 1] coords = coords[:, :1] invalid_points = invalid_points.numpy() coords[invalid_points, :, :, :] = 1e6 coords = coords.reshape([-1, num_points, 1, rows, cols]) coords = paddle.min(coords, axis=1) coords = coords.reshape([-1, 2, rows, cols]) if self.use_disks: coords = (coords <= (self.norm_radius * self.spatial_scale)** 2).astype("float32") else: coords = paddle.tanh(paddle.sqrt(coords) * 2) return coords
def forward(self, inputs): pad_input = F.pad2d(inputs, [3, 3, 3, 3], mode="reflect") y = self.conv0(pad_input) y = self.conv1(y) y = self.conv2(y) for resnet_block in self.resnet_blocks: y = resnet_block(y) y = self.deconv0(y) y = self.deconv1(y) y = F.pad2d(y, [3, 3, 3, 3], mode="reflect") y = self.conv3(y) y = paddle.tanh(y) return y
def forward(self, text, seq_len): mask = text != self.padding_idx embedded_text = self.embedder(text) # Encode text, shape: (batch, max_seq_len, num_directions * hidden_size) encoded_text, (last_hidden, last_cell) = self.bilstm( embedded_text, sequence_length=seq_len) # Shape: (batch_size, lstm_hidden_size) hidden, att_weights = self.attention(encoded_text, mask) # Shape: (batch_size, fc_hidden_size) fc_out = paddle.tanh(self.fc(hidden)) # Shape: (batch_size, num_classes) logits = self.output_layer(fc_out) return logits