def forward_grad(self, x): #grad_x = F.conv2d(F.pad(x, (0, 0, 0, 1)), self.f_grad) # , groups=self.channels) #grad_y = F.conv2d(F.pad(x, (0, 0, 0, 1)), self.f_grad2) # , groups=self.channels) x1 = fluid.layers.pad2d(x, paddings=[0, 0, 0, 1]) #pdb.set_trace() grad_x = self.conv2df_grad(x1) #grad_x[:, :, :, -1] = 0 temp = unstack(grad_x, axis=3) temp[-1] = temp[-1] * 0 grad_x = stack(temp, axis=3) x2 = fluid.layers.pad2d(x, paddings=[0, 1, 0, 0]) grad_y = self.conv2df_grad2(x2) # , groups=self.channels) #grad_y[:, :, -1, :] = 0 temp = unstack(grad_y, axis=2) temp[-1] = temp[-1] * 0 grad_y = stack(temp, axis=2) bt, c, h, w = grad_x.shape grad_x = fluid.layers.reshape(grad_x, [-1, c, h, w]) grad_y = fluid.layers.reshape(grad_y, [-1, c, h, w]) return grad_x, grad_y
def forward_grad(self, x): grad_x = self.conv4u(layers.pad(x, (0, 0, 0, 0, 0, 0, 0, 1))) tmp = layers.unstack(grad_x, axis=2) tmp[-1] = tmp[-1] - tmp[-1] #tmp[-1]=0 grad_x = layers.stack(tmp, axis=2) grad_y = self.conv4v(layers.pad(x, (0, 0, 0, 0, 0, 1, 0, 0))) tmp = layers.unstack(grad_y, axis=2) tmp[-1] = tmp[-1] - tmp[-1] # tmp[-1]=0 grad_y = layers.stack(tmp, axis=2) return grad_x, grad_y
def forward(self, *args, **kwargs): """ Args: start_pos (optional, `Variable` of shape [batch_size]): token index of start of answer span in `context` end_pos (optional, `Variable` of shape [batch_size]): token index of end of answer span in `context` Returns: loss (`Variable` of shape []): Cross entropy loss mean over batch and time, ignore positions where label == -100 if labels not set, returns None start_logits (`Variable` of shape [batch_size, hidden_size]): output logits of start position, use argmax(start_logit) to get start index end_logits (`Variable` of shape [batch_size, hidden_size]): output logits of end position, use argmax(end_logit) to get end index """ start_pos = kwargs.pop('start_pos', None) end_pos = kwargs.pop('end_pos', None) pooled, encoded = super(ErnieModelForQuestionAnswering, self).forward(*args, **kwargs) encoded = self.dropout(encoded) encoded = self.classifier(encoded) start_logit, end_logits = L.unstack(encoded, axis=-1) if start_pos is not None and end_pos is not None: if len(start_pos.shape) == 1: start_pos = L.unsqueeze(start_pos, axes=[-1]) if len(end_pos.shape) == 1: end_pos = L.unsqueeze(end_pos, axes=[-1]) start_loss = L.softmax_with_cross_entropy(start_logit, start_pos) end_loss = L.softmax_with_cross_entropy(end_logits, end_pos) loss = (L.reduce_mean(start_loss) + L.reduce_mean(end_loss)) / 2. else: loss = None return loss, start_logit, end_logits
def forward(self, x): """Forward network""" mask = layers.reduce_any(x != self.pad_index, -1) lens = nn.reduce_sum(mask, -1) masked_x = nn.masked_select(x, mask) char_mask = masked_x != self.pad_index emb = self.embed(masked_x) _, (h, _) = self.lstm(emb, char_mask, self.pad_index) h = layers.concat(layers.unstack(h), axis=-1) feat_embed = nn.pad_sequence_paddle( layers.split(h, lens.numpy().tolist(), dim=0), self.pad_index) return feat_embed
def forward(self, x): ''' bt,c,w,h=x.shape tmp=layers.reshape(x,shape=[48,-1,c,w,h]) res=layers.reshape(tmp[:,:-1],shape=[-1,c,w,h])''' x = self.bottleneck(x) inp = self.norm_img(x) bt, c, w, h = inp.shape inp = layers.reshape(inp, shape=[self.batch_size, -1, c, w, h]) x = inp[:, :-1] y = inp[:, 1:] x = layers.reshape(layers.transpose(x, perm=[0, 2, 1, 3, 4]), shape=[-1, c, h, w]) y = layers.reshape(layers.transpose(y, perm=[0, 2, 1, 3, 4]), shape=[-1, c, h, w]) u1 = fluid.dygraph.to_variable(np.zeros(x.shape)).astype('float32') u2 = fluid.dygraph.to_variable(np.zeros(x.shape)).astype('float32') l_t = self.lamda * self.theta taut = self.tau / (self.theta + 1e-12) grad2_x = self.conv4Ix(layers.pad(y, (0, 0, 0, 0, 0, 0, 1, 1))) tmp = layers.unstack(grad2_x, axis=3) tmp[-1] = 0.5 * (x[:, :, :, -1] - x[:, :, :, -2]) tmp[0] = 0.5 * (x[:, :, :, 1] - x[:, :, :, 0]) grad2_x = layers.stack(tmp, axis=3) grad2_y = self.conv4Iy(layers.pad(y, (0, 0, 0, 0, 1, 1, 0, 0))) tmp = layers.unstack(grad2_y, axis=2) tmp[-1] = 0.5 * (x[:, :, -1, :] - x[:, :, -2, :]) tmp[0] = 0.5 * (x[:, :, 1, :] - x[:, :, 0, :]) grad2_y = layers.stack(tmp, axis=2) p11 = fluid.dygraph.to_variable(np.zeros(x.shape)).astype('float32') p12 = fluid.dygraph.to_variable(np.zeros(x.shape)).astype('float32') p21 = fluid.dygraph.to_variable(np.zeros(x.shape)).astype('float32') p22 = fluid.dygraph.to_variable(np.zeros(x.shape)).astype('float32') gsqx = grad2_x**2 gsqy = grad2_y**2 grad = gsqx + gsqy + 1e-12 rho_c = y - grad2_x * u1 - grad2_y * u2 - x for i in range(self.n_iter): rho = rho_c + grad2_x * u1 + grad2_y * u2 + 1e-12 mask1 = (rho < -l_t * grad).detach().astype('float32') mask1.stop_gradient = True tmp1 = l_t * grad2_x tmp2 = l_t * grad2_y v1 = tmp1 * mask1 v2 = tmp2 * mask1 mask2 = (rho > l_t * grad).detach().astype('float32') mask2.stop_gradient = True v1 = -tmp1 * mask2 + v1 v2 = -tmp2 * mask2 + v2 mask3 = fluid.layers.ones( x.shape, dtype='float32') - (mask1 + mask2 - mask1 * mask2) mask3.stop_gradient = True tmp1 = (-rho / grad) * grad2_x tmp2 = (-rho / grad) * grad2_y v1 = tmp1 * mask3 + v1 v2 = tmp2 * mask3 + v2 del rho del mask1 del mask2 del mask3 v1 += u1 v2 += u2 u1 = v1 + self.theta * self.divergence(p11, p12) u2 = v2 + self.theta * self.divergence(p21, p22) del v1 del v2 u1 = u1 u2 = u2 u1x, u1y = self.forward_grad(u1) u2x, u2y = self.forward_grad(u2) p11 = (p11 + taut * u1x) / ( 1. + taut * layers.sqrt(u1x**2 + u1y**2 + 1e-12)) p12 = (p12 + taut * u1y) / ( 1. + taut * layers.sqrt(u1x**2 + u1y**2 + 1e-12)) p21 = (p21 + taut * u2x) / ( 1. + taut * layers.sqrt(u2x**2 + u2y**2 + 1e-12)) p22 = (p22 + taut * u2y) / ( 1. + taut * layers.sqrt(u2x**2 + u2y**2 + 1e-12)) del u1x del u1y del u2x del u2y flow = layers.concat([u1, u2], axis=1) # flow = layers.transpose(layers.reshape(flow,shape=[b,t,c*2,h,w]),perm=[0,2,1,3,4]) flow = self.unbottleneck(flow) flow = self.bn(flow) if self.bn else flow return flow
def forward(self, x): #bcthw residual = x[:, :, :-1] #print(x.shape) x = self.bottleneck(x) #bbb = self.bottleneck.weight[0] #print(bbb) inp = self.norm_img(x) x = inp[:, :, :-1] y = inp[:, :, 1:] b, c, t, h, w = x.shape #x = x.permute(0, 2, 1, 3, 4).contiguous().view(b * t, c, h, w) #y = y.permute(0, 2, 1, 3, 4).contiguous().view(b * t, c, h, w) x = fluid.layers.transpose(x, perm=[0, 2, 1, 3, 4]) y = fluid.layers.transpose(y, perm=[0, 2, 1, 3, 4]) x = fluid.layers.reshape(x, [-1, c, h, w]) y = fluid.layers.reshape(y, [-1, c, h, w]) #paddle.tensor.zeros_like(input, dtype=None, device=None, stop_gradient=True, name=None) u1 = fluid.layers.zeros_like(x) u2 = fluid.layers.zeros_like(x) l_t = self.l * self.t taut = self.a / self.t #grad2_x = F.conv2d(F.pad(y, (1, 1, 0, 0)), self.img_grad, padding=0, stride=1) # , groups=self.channels) #grad2_x = fluid.layers.conv2d(fluid.layers.pad(y, paddings=[0, 0, 0, 0, 0, 0, 1, 1]), self.img_grad) #fluid.layers.pad(y, paddings=[0, 0, 0, 0, 0, 0, 1, 1]) #grad2_x = fconv2d(fluid.layers.pad(y, paddings=[0, 0, 0, 0, 0, 0, 1, 1]), self.img_grad,0,1) grad2_x = self.conv2dimg_grad( y ) #梯度无变化#梯度无变化#梯度无变化#梯度无变化#梯度无变化#梯度无变化#梯度无变化#梯度无变化#梯度无变化#梯度无变化#梯度无变化 #grad2_x[:, :, :, 0] = 0.5 * (x[:, :, :, 1] - x[:, :, :, 0]) #grad2_x[:, :, :, -1] = 0.5 * (x[:, :, :, -1] - x[:, :, :, -2]) temp1 = unstack(x, axis=3) temp2 = unstack(x, axis=3) temp = unstack(grad2_x, axis=3) temp[0] = 0.5 * (temp1[1] - temp2[0]) temp[-1] = 0.5 * (temp1[-1] - temp2[-2]) grad2_x = stack(temp, axis=3) #查看权重有无变化 # print(self.conv2dimg_grad2[0]) # print(self.conv2df_grad[0]) # print(self.conv2df_grad2[0]) #grad2_y = F.conv2d(F.pad(y, (0, 0, 1, 1)), self.img_grad2, padding=0, stride=1) # , groups=self.channels) #grad2_y = fconv2d(fluid.layers.pad(y, paddings=[0, 0, 0, 0, 1, 1, 0, 0]), self.img_grad2) grad2_y = self.conv2dimg_grad2( y) #梯度无变化#梯度无变化#梯度无变化#梯度无变化#梯度无变化#梯度无变化#梯度无变化 #查看权重有无变化 #grad2_y[:, :, 0, :] = 0.5 * (x[:, :, 1, :] - x[:, :, 0, :]) #grad2_y[:, :, -1, :] = 0.5 * (x[:, :, -1, :] - x[:, :, -2, :]) temp1 = unstack(x, axis=2) temp2 = unstack(x, axis=2) temp = unstack(grad2_x, axis=2) temp[0] = 0.5 * (temp1[1] - temp2[0]) temp[-1] = 0.5 * (temp1[-1] - temp2[-2]) grad2_x = stack(temp, axis=2) #p11 = paddle.tensor.zeros_like(x.data) #p12 = paddle.tensor.zeros_like(x.data) #p21 = paddle.tensor.zeros_like(x.data) #p22 = paddle.tensor.zeros_like(x.data) p11 = fluid.layers.zeros_like(x) p12 = fluid.layers.zeros_like(x) p21 = fluid.layers.zeros_like(x) p22 = fluid.layers.zeros_like(x) gsqx = grad2_x**2 gsqy = grad2_y**2 grad = gsqx + gsqy + 1e-12 rho_c = y - grad2_x * u1 - grad2_y * u2 - x for i in range(self.n_iter): #pdb.set_trace() rho = rho_c + grad2_x * u1 + grad2_y * u2 + 1e-12 v1 = fluid.layers.zeros_like(x) v2 = fluid.layers.zeros_like(x) #mask3 = ((mask1 ^ 1) & (mask2 ^ 1) & (grad > 1e-12)).detach() #v1[mask3] = ((-rho / grad) * grad2_x)[mask3] #v2[mask3] = ((-rho / grad) * grad2_y)[mask3] mask3 = paddle.fluid.layers.where( (grad > 1e-12) ) #((((rho >= -l_t * grad))&((rho <= l_t * grad))&(grad > 1e-12))) if mask3.shape[0]: v13 = paddle.fluid.layers.gather_nd(v1, mask3) v23 = paddle.fluid.layers.gather_nd(v2, mask3) v13set = paddle.fluid.layers.gather_nd( ((-rho / grad) * grad2_x), mask3) v23set = paddle.fluid.layers.gather_nd( ((-rho / grad) * grad2_y), mask3) v13.set_value(v13set) v23.set_value(v23set) v1 = paddle.fluid.layers.scatter_nd(mask3, v13, v1.shape) v2 = paddle.fluid.layers.scatter_nd(mask3, v23, v2.shape) #运用gather_nd 和scatter_nd #mask1 = (rho < -l_t * grad).detach() #v1[mask1] = (l_t * grad2_x)[mask1] #v2[mask1] = (l_t * grad2_y)[mask1] mask1 = paddle.fluid.layers.where(rho < -l_t * grad) if mask1.shape[0]: v11 = paddle.fluid.layers.gather_nd(v1, mask1) v21 = paddle.fluid.layers.gather_nd(v2, mask1) v11set = paddle.fluid.layers.gather_nd((l_t * grad2_x), mask1) v21set = paddle.fluid.layers.gather_nd((l_t * grad2_y), mask1) v11.set_value(v11set) v1 = paddle.fluid.layers.scatter_nd(mask1, v11, v1.shape) v21.set_value(v21set) v2 = paddle.fluid.layers.scatter_nd(mask1, v21, v2.shape) #mask2 = (rho > l_t * grad).detach() #v1[mask2] = (-l_t * grad2_x)[mask2] #v2[mask2] = (-l_t * grad2_y)[mask2] mask2 = paddle.fluid.layers.where(rho > l_t * grad) if mask2.shape[0]: v12 = paddle.fluid.layers.gather_nd(v1, mask2) v22 = paddle.fluid.layers.gather_nd(v2, mask2) v12set = paddle.fluid.layers.gather_nd((-l_t * grad2_x), mask2) v22set = paddle.fluid.layers.gather_nd((-l_t * grad2_y), mask2) v12.set_value(v12set) v22.set_value(v22set) v1 = paddle.fluid.layers.scatter_nd(mask2, v12, v1.shape) v2 = paddle.fluid.layers.scatter_nd(mask2, v22, v2.shape) #bbb = self.conv2ddiv.weight[0] #print(bbb) del rho del mask1 del mask2 del mask3 """ del v11 del v21 del v11set del v21set del v12 del v22 del v12set del v22set del v13 del v23 del v13set del v23set """ v1 += u1 v2 += u2 u1 = v1 + self.t * self.divergence(p11, p12) #梯度回传没问题 看看前面有没问题ccf u2 = v2 + self.t * self.divergence(p21, p22) del v1 del v2 u1 = u1 u2 = u2 u1x, u1y = self.forward_grad(u1) u2x, u2y = self.forward_grad(u2) p11 = (p11 + taut * u1x) / ( 1. + taut * fluid.layers.sqrt(u1x**2 + u1y**2 + 1e-12)) p12 = (p12 + taut * u1y) / ( 1. + taut * fluid.layers.sqrt(u1x**2 + u1y**2 + 1e-12)) p21 = (p21 + taut * u2x) / ( 1. + taut * fluid.layers.sqrt(u2x**2 + u2y**2 + 1e-12)) p22 = (p22 + taut * u2y) / ( 1. + taut * fluid.layers.sqrt(u2x**2 + u2y**2 + 1e-12)) del u1x del u1y del u2x del u2y #flow = torch.cat([u1, u2], dim=1) flow = fluid.layers.concat(input=[u1, u2], axis=1) #print(u1.shape) #print(u2.shape) #print(flow.shape) #flow = flow.view(b, t, c * 2, h, w).contiguous().permute(0, 2, 1, 3, 4) #print(x.shape) flow = fluid.layers.reshape(flow, [b, t, c * 2, h, w]) flow = fluid.layers.transpose(flow, perm=[0, 2, 1, 3, 4]) flow = self.unbottleneck(flow) #print(self.name_scope) flow = self.bn(flow) #print(residual.shape,'xxxx',flow.shape) #bbb = residual+flow #print(bbb.shape) return self.prelu(residual) #+flow