def forward(self, input): output1 = self.conv1_fpn(input[0]) output2 = self.conv2_fpn(input[1]) up2 = F.upsample(output2, size=paddle.shape(output1)[-2:], mode='nearest') output1 = paddle.add(output1, up2) output1 = self.conv3_fpn(output1) return output1, output2
def forward(self, inputs, start_level, end_level): for i in range(start_level + 1, end_level): upsample = getattr(self, 'up_' + str(i - start_level)) project = getattr(self, 'proj_' + str(i - start_level)) inputs[i] = project(inputs[i]) inputs[i] = upsample(inputs[i]) node = getattr(self, 'node_' + str(i - start_level)) inputs[i] = node(paddle.add(inputs[i], inputs[i - 1]))
def forward(self, inputs): x = self.expand_conv(inputs) x = self.bottleneck_conv(x) if self.if_se: x = self.mid_se(x) x = self.linear_conv(x) if self.if_shortcut: x = paddle.add(inputs, x) return x
def forward(self, input, pre_hidden): tmp_i2h = paddle.fluid.layers.nn.mul(input, self._i2h_w) tmp_h2h = paddle.fluid.layers.nn.mul(pre_hidden, self._h2h_w) hidden = paddle.add(tmp_h2h, tmp_i2h) hidden = self._helper.append_activation(hidden, act='tanh') out = paddle.fluid.layers.nn.mul(hidden, self._h2o_w) softmax_out = paddle.nn.functional.softmax(out) reduce_out = paddle.fluid.layers.nn.reduce_sum(softmax_out) return reduce_out, hidden
def forward(self, inputs): conv0 = inputs short = self._short(inputs) if self.relu_first: conv0 = F.relu(conv0) conv1 = self._conv1(conv0) conv2 = F.relu(conv1) conv2 = self._conv2(conv2) pool = self._pool(conv2) return paddle.add(x=short, y=pool)
def forward(self, inputs): y = self.conv0(inputs) conv1 = self.conv1(y) if self.shortcut: short = inputs else: short = self.short(inputs) y = paddle.add(x=short, y=conv1) y = F.relu(y) return y
def forward(self, x): identity = x x = self.expand_conv(x) x = self.bottleneck_conv(x) if self.if_se: x = self.mid_se(x) x = self.linear_conv(x) if self.if_shortcut: x = paddle.add(identity, x) return x
def test_dygraph(self): with fluid.dygraph.guard(): np_x = np.array([2, 3, 4]).astype('float64') np_y = np.array([1, 5, 2]).astype('float64') x = fluid.dygraph.to_variable(np_x) y = fluid.dygraph.to_variable(np_y) z = paddle.add(x, y) np_z = z.numpy() z_expected = np.array([3., 8., 6.]) self.assertEqual((np_z == z_expected).all(), True)
def forward(self, x): identity = x if self.expand: x = self.expand_conv(x) x = self.bottleneck_conv(x) if self.use_se: x = self.mid_se(x) x = self.linear_conv(x) if self.use_res_connect: x = paddle.add(identity, x) return x
def forward(self, x): identity = x x = self.conv0(x) x = self.conv1(x) if self.shortcut: short = identity else: short = self.short(identity) x = paddle.add(x=x, y=short) x = self.relu(x) return x
def forward(self, x): out = None if not self.equalInOut: x = self.relu1(self.bn1(x)) else: out = self.relu1(self.bn1(x)) out = self.relu2(self.bn2(self.conv1(out if self.equalInOut else x))) if self.droprate > 0: out = F.dropout(out, p=self.droprate, training=self.training) out = self.conv2(out) return paddle.add(x if self.equalInOut else self.convShortcut(x), out)
def forward(self, x): residual = x x = self.conv1(x) x = self.conv2(x) if self.has_se: x = self.se(x) x = paddle.add(x=residual, y=x) x = self.relu(x) return x
def create_loss(batch_size, margin, cos_pos, cos_neg): loss_part1 = paddle.subtract( paddle.full(shape=[batch_size, 1], fill_value=margin, dtype='float32'), cos_pos) loss_part2 = paddle.add(loss_part1, cos_neg) loss_part3 = paddle.maximum( paddle.full(shape=[batch_size, 1], fill_value=0.0, dtype='float32'), loss_part2) avg_cost = paddle.mean(loss_part3) return avg_cost
def forward(self, inputs, residual=None): if residual is None: residual = inputs out = self.conv1(inputs) out = F.relu(out) out = self.conv2(out) out = paddle.add(x=out, y=residual) out = F.relu(out) return out
def create_loss(prediction): pos = paddle.slice(prediction, axes=[0, 1], starts=[0, 0], ends=[64, 1]) neg = paddle.slice(prediction, axes=[0, 1], starts=[64, 0], ends=[128, 1]) loss_part1 = paddle.subtract( paddle.full(shape=[64, 1], fill_value=1.0, dtype='float32'), pos) loss_part2 = paddle.add(loss_part1, neg) loss_part3 = paddle.maximum( paddle.full(shape=[64, 1], fill_value=0.0, dtype='float32'), loss_part2) avg_cost = paddle.mean(loss_part3) return avg_cost
def forward(self, indices, segments, positions): # word embeddings word_embeddings_weights = paddle.transpose( self.word_embeddings_weights, [1, 0]) input_embeddings = paddle.gather(word_embeddings_weights, indices, axis=0) # position_embeddings position_embeddings = self.position_embeddings(positions) # token_type_embeddings token_type_embeddings = paddle.fluid.input.one_hot(segments, depth=2) token_type_embeddings = paddle.matmul(token_type_embeddings, self.token_embeddings_weights) embeddings = paddle.add(input_embeddings, position_embeddings) embeddings = paddle.add(embeddings, token_type_embeddings) embeddings = self.layer_norm(embeddings) embeddings = self.dropout(embeddings) return embeddings, self.word_embeddings_weights
def forward(self, inputs): y = self._batch_norm(inputs) y = self.conv0(y) y = self.prelu(y) conv1 = self.conv1(y) if self.shortcut: short = self.short(inputs) else: short = inputs y = paddle.add(x=short, y=conv1) return y
def forward(self, pixel_embed, patch_embed): # inner pixel_embed = paddle.add( pixel_embed, self.drop_path(self.attn_in(self.norm_in(pixel_embed)))) pixel_embed = paddle.add( pixel_embed, self.drop_path(self.mlp_in(self.norm_mlp_in(pixel_embed)))) # outer B, N, C = patch_embed.shape norm1_proj = self.norm1_proj(pixel_embed) norm1_proj = norm1_proj.reshape( (B, N - 1, norm1_proj.shape[1] * norm1_proj.shape[2])) patch_embed[:, 1:] = paddle.add(patch_embed[:, 1:], self.proj(norm1_proj)) patch_embed = paddle.add( patch_embed, self.drop_path(self.attn_out(self.norm_out(patch_embed)))) patch_embed = paddle.add( patch_embed, self.drop_path(self.mlp(self.norm_mlp(patch_embed)))) return pixel_embed, patch_embed
def forward(self, prev_hidden, batch_H, char_onehots): batch_H_proj = self.i2h(batch_H) prev_hidden_proj = paddle.unsqueeze(self.h2h(prev_hidden), axis=1) res = paddle.add(batch_H_proj, prev_hidden_proj) res = paddle.tanh(res) e = self.score(res) alpha = F.softmax(e, axis=1) alpha = paddle.transpose(alpha, [0, 2, 1]) context = paddle.squeeze(paddle.mm(alpha, batch_H), axis=1) concat_context = paddle.concat([context, char_onehots], 1) cur_hidden = self.rnn(concat_context, prev_hidden) return cur_hidden, alpha
def test_elementwise_add(self): with _test_eager_guard(): paddle.set_device("cpu") np_x = np.ones([4, 16, 16, 32]).astype('float32') np_y = np.ones([4, 16, 16, 32]).astype('float32') x = paddle.to_tensor(np_x) y = paddle.to_tensor(np_y) out = paddle.add(x, y) out_arr = out.numpy() out_arr_expected = np.add(np_x, np_y) self.assertTrue(np.array_equal(out_arr, out_arr_expected))
def forward(self, input, label): x_emb = self.embedding(input) fc = paddle.matmul(x_emb, self.softmax_weight) fc = paddle.add(fc, self.softmax_bias) projection = paddle.reshape(fc, shape=[-1, self.vocab_size]) loss = paddle.nn.functional.softmax_with_cross_entropy( logits=projection, label=label, soft_label=False) loss = paddle.reshape(loss, shape=[-1, self.num_steps]) loss = paddle.mean(loss, axis=[0]) loss = paddle.sum(loss) return loss
def forward(self, x): if self.training: n, c, t, h, w = x.shape x = paddle.reshape( x, (n // self.num_splits, c * self.num_splits, t, h, w)) x = self.split_bn(x) x = paddle.reshape(x, (n, c, t, h, w)) else: x = self.bn(x) x = paddle.multiply(x, paddle.reshape(self.weight, (-1, 1, 1, 1))) x = paddle.add(x, paddle.reshape(self.bias, (-1, 1, 1, 1))) return x
def _test(self, run_npu=True): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() main_prog.random_seed = SEED startup_prog.random_seed = SEED np.random.seed(SEED) a_np = np.random.random(size=(32, 32)).astype('float32') b_np = np.random.random(size=(32, 32)).astype('float32') label_np = np.random.randint(2, size=(32, 1)).astype('int64') with paddle.static.program_guard(main_prog, startup_prog): a = paddle.static.data(name="a", shape=[32, 32], dtype='float32') b = paddle.static.data(name="b", shape=[32, 32], dtype='float32') label = paddle.static.data(name="label", shape=[32, 1], dtype='int64') sum = paddle.add(a, b) z = paddle.pow(sum, 2.0) fc_1 = fluid.layers.fc(input=z, size=128) prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax') cost = fluid.layers.cross_entropy(input=prediction, label=label) loss = fluid.layers.reduce_mean(cost) adam = paddle.optimizer.AdamW(learning_rate=0.01, weight_decay=0.02) adam.minimize(loss) if run_npu: place = paddle.NPUPlace(0) else: place = paddle.CPUPlace() exe = paddle.static.Executor(place) exe.run(startup_prog) print("Start run on {}".format(place)) for epoch in range(100): pred_res, loss_res = exe.run(main_prog, feed={ "a": a_np, "b": b_np, "label": label_np }, fetch_list=[prediction, loss]) if epoch % 10 == 0: print("Epoch {} | Prediction[0]: {}, Loss: {}".format( epoch, pred_res[0], loss_res)) return pred_res, loss_res
def forward(self, x, res_dict=None): residual = x x = self.conv1(x) x = self.conv2(x) x = self.conv3(x) if self.downsample: residual = self.conv_down(residual) if self.has_se: x = self.se(x) x = paddle.add(x=residual, y=x) x = self.relu(x) return x
def _test(self, run_npu=True): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() main_prog.random_seed = SEED startup_prog.random_seed = SEED np.random.seed(SEED) batch_size = 32 data_shape = (32, 32) a_np = np.random.random(size=data_shape).astype('float32') b_np = np.random.random(size=data_shape).astype('float32') label_np = np.random.randint(2, size=(batch_size, 1)).astype('int64') with paddle.static.program_guard(main_prog, startup_prog): a = paddle.static.data(name="a", shape=data_shape, dtype='float32') b = paddle.static.data(name="b", shape=data_shape, dtype='float32') label = paddle.static.data(name="label", shape=[batch_size, 1], dtype='int64') sum = paddle.add(a, b) z = paddle.slice(sum, axes=[0, 1], starts=[0, 0], ends=[33, 2]) prediction = paddle.static.nn.fc(z, size=2, activation='softmax') cost = paddle.fluid.layers.softmax_with_cross_entropy( logits=prediction, label=label) loss = paddle.mean(cost) sgd = paddle.optimizer.SGD(learning_rate=0.01) sgd.minimize(loss) if run_npu: place = paddle.NPUPlace(0) else: place = paddle.CPUPlace() exe = paddle.static.Executor(place) exe.run(startup_prog) print("Start run on {}".format(place)) for epoch in range(EPOCH): pred_res, loss_res = exe.run(main_prog, feed={ "a": a_np, "b": b_np, "label": label_np }, fetch_list=[prediction, loss]) if epoch % 10 == 0: print("Epoch {} | Prediction[0]: {}, Loss: {}".format( epoch, pred_res[0], loss_res)) return pred_res, loss_res
def forward(self, inputs): shifts = paddle.fluid.layers.temporal_shift(inputs, self.num_seg, 1.0 / 8) y = self.conv0(shifts) conv1 = self.conv1(y) conv2 = self.conv2(conv1) if self.shortcut: short = inputs else: short = self.short(inputs) y = paddle.add(x=short, y=conv2) return F.relu(y)
def forward(self, inputs): y = self.expand_conv(inputs) x = self.bottleneck_conv(y) if self.use_se: x = self.mid_se(x) x = self.linear_conv(x) if self.if_shortcut: x = paddle.add(inputs, x) if self.return_list: return [y, x] else: return x
def allreduce_(*_): if param.name in self._task_flow.full_grad.keys(): full_grad = self._task_flow.full_grad[param.name] # Only support sync allreduce current rank's layer now collective.all_reduce(tensor=full_grad, group=self._group) start, end = self._param2buffer[param.name][self._rank] if param.bw_storage is None: param.bw_storage = full_grad._slice(start, end).detach().clone() if self._offload: param.bw_storage = _device2cpu(param.bw_storage, True) else: if self._offload: cpu_grad = _device2cpu( full_grad._slice(start, end).detach().clone(), True) with device_guard(): param.bw_storage = paddle.add( param.bw_storage, cpu_grad) else: param.bw_storage = paddle.add( param.bw_storage, full_grad._slice(start, end).detach().clone()) param.clear_gradient(False) del self._task_flow.full_grad[param.name] if param.name in self._task_flow.full_param.keys(): if param.status == "all": param.use_count = 0 param._clear_data() start, end = self._param2buffer[param.name][self._rank] param.fw_storage = self._task_flow.full_param[ param.name][0]._slice(start, end).detach().clone() param.status = "part" del self._task_flow.full_param[param.name] if self._offload: param.fw_storage._clear_data() param.master_weight._share_buffer_to(param.fw_storage)
def news_encode(self, category, sub_category, title, content): #[b,cate_d] cate_emb = self.cate_embedding(category) sub_cate_emb = self.sub_cate_embedding(sub_category) # [b, conv_out] category = paddle.nn.ReLU()(self.category_linear(cate_emb)) sub_category = paddle.nn.ReLU()(self.sub_category_linear(sub_cate_emb)) # title [batch, title_size] # title_emb [batch,title_size, word_emb_d] title_emb = self.word2vec_embedding(title) # title_emb [batch, word_emb_d, title_size] title_emb = paddle.transpose(title_emb, perm=[0, 2, 1]) # title_emb [batch,conv_out,title_size] title_emb = self.conv_title(title_emb) # content_emb [batch, content_size, word_emb_d] content_emb = self.word2vec_embedding(content) # content_emb [batch, word_emb_d,content_size,] content_emb = paddle.transpose(content_emb, perm=[0, 2, 1]) # [batch,conv_out,content_size] content_emb = self.conv_title(content_emb) # title_emb [batch,title_size,conv_out] # content_emb [batch, content_size, conv_out] title_emb = paddle.transpose(title_emb, perm=[0, 2, 1]) content_emb = paddle.transpose(content_emb, perm=[0, 2, 1]) title_emb = paddle.nn.ReLU()(paddle.add(title_emb, self.conv_title_bias)) content_emb = paddle.nn.ReLU()(paddle.add(content_emb, self.conv_content_bias)) # [b,conv_out] title_emb = self.title_attention(title_emb) content_emb = self.content_attention(content_emb) # [b,conv_out * 4] vec = paddle.concat([title_emb, content_emb, category, sub_category], axis=-1) # [b, 4, conv_out] vec_group = paddle.reshape(vec, [-1, 4, self.conv_out_channel_size]) # [b, conv_out] final_vec = self.mix_attention(vec_group) return final_vec
def forward(self, input): outs = [] residual_func_idx = 0 for i in range(self._actual_ch): residual = input[i] for j in range(len(self._in_channels)): if j > i: y = self.residual_func_list[residual_func_idx](input[j]) residual_func_idx += 1 y = F.interpolate(y, scale_factor=2**(j - i)) residual = paddle.add(x=residual, y=y) elif j < i: y = input[j] for k in range(i - j): y = self.residual_func_list[residual_func_idx](y) residual_func_idx += 1 residual = paddle.add(x=residual, y=y) residual = F.relu(residual) outs.append(residual) return outs