def forward(self, input_tensor): reduce_sum = flow.sum(input_tensor, dim=self.axis, keepdims=self.keepdims) reduce_count = 1 if len(self.axes) == 0: for dim in input_tensor.shape: reduce_count *= dim else: for i in self.axes: reduce_count *= input_tensor.shape[i] return flow.mul(reduce_sum, 1.0 / reduce_count)
def forward(self, input): if self.dim == None: self.dim = -1 num_axes = len(input.shape) axis = self.dim if self.dim >= 0 else self.dim + num_axes assert 0 <= axis < num_axes, "axis out of range" if axis == num_axes - 1: if self.largest: indices = flow._C.top_k(input, self.k) else: neg_input = flow.mul(input, -1) indices = flow._C.top_k(neg_input, self.k) return (flow.gather(input, axis, indices), indices) else: perm = get_perm_when_transpose_axis_to_last_dim(num_axes, axis) x = flow._C.transpose(input, perm=perm) if self.largest: indices = flow._C.top_k(x, self.k) else: neg_input = flow.mul(x, -1) indices = flow._C.top_k(neg_input, self.k) indices = flow._C.transpose(indices, perm=get_inversed_perm(perm)) return (flow.gather(input, axis, indices), indices)
def _test_fused_scale_mask_softmax( test_case, batch_size, num_heads, seq_length, fill_value, scale_value, ): x = np.random.randn(batch_size, num_heads, seq_length, seq_length) mask = np.random.randint(0, 2, size=(batch_size, num_heads, seq_length, seq_length), dtype=np.bool) fused_x_tensor = flow.tensor(x).to("cuda") fused_mask_tensor = flow.tensor(mask, dtype=flow.bool).to("cuda") fused_x_tensor.requires_grad = True fused_out = flow._C.fused_scale_mask_softmax( fused_x_tensor, fused_mask_tensor, fill_value=fill_value, scale=scale_value, ) origin_x_tensor = flow.tensor(x).to("cuda") origin_mask_tensor = flow.tensor(mask, dtype=flow.float32).to("cuda") origin_x_tensor.requires_grad = True origin_out = flow.mul(origin_x_tensor, origin_mask_tensor) * scale_value + fill_value * ( 1.0 - origin_mask_tensor) origin_out = flow.softmax(origin_out, dim=-1) total_out = fused_out.sum() + origin_out.sum() total_out.backward() test_case.assertTrue( np.allclose(fused_out.numpy(), origin_out.numpy(), atol=1e-4, rtol=1e-4)) test_case.assertTrue( np.allclose( fused_x_tensor.grad.numpy(), origin_x_tensor.grad.numpy(), atol=1e-4, rtol=1e-4, ))
def test_mul(test_case): x = flow.Tensor(np.random.randn(2, 3)) y = flow.Tensor(np.random.randn(2, 3)) of_out = flow.mul(x, y) np_out = np.multiply(x.numpy(), y.numpy()) test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-4, 1e-4)) x = 5 y = flow.Tensor(np.random.randn(2, 3)) of_out = flow.mul(x, y) np_out = np.multiply(x, y.numpy()) test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-4, 1e-4)) x = flow.Tensor(np.random.randn(2, 3)) y = 5 of_out = flow.mul(x, y) np_out = np.multiply(x.numpy(), y) test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-4, 1e-4)) x = flow.Tensor(np.random.randn(1, 1)) y = flow.Tensor(np.random.randn(2, 3)) of_out = flow.mul(x, y) np_out = np.multiply(x.numpy(), y.numpy()) test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-4, 1e-4))
def ae_step(self, data, lambda_kl): x = cc(data) mu, log_sigma, emb, dec = self.model(x) criterion = nn.L1Loss() loss_rec = criterion(dec, x) loss_kl = 0.5 * flow.mean( flow.exp(log_sigma) + flow.mul(mu, mu) - 1 - log_sigma) loss = self.config["lambda"][ "lambda_rec"] * loss_rec + lambda_kl * loss_kl self.opt.zero_grad() loss.backward() grad_norm = flow.nn.utils.clip_grad_norm_( self.model.parameters(), max_norm=self.config["optimizer"]["grad_norm"]) self.opt.step() meta = { "loss_rec": loss_rec.item(), "loss_kl": loss_kl.item(), "loss": loss.item(), "grad_norm": grad_norm, } return meta
def addmm(x, mat1, mat2, alpha=1, beta=1): if len(x.shape) > 2 or len(mat1.shape) > 2 or len(mat2.shape) > 2: raise ValueError("input matrixes shape can not be greater than 2") else: return flow.mul(x, beta) + flow.mul(flow._C.matmul(mat1, mat2), alpha)
def _test_mul_impl(test_case, device): x = flow.tensor( np.random.randn(2, 3), dtype=flow.float32, device=flow.device(device), requires_grad=True, ) y = flow.tensor( np.random.randn(2, 3), dtype=flow.float32, device=flow.device(device), requires_grad=True, ) of_out = flow.mul(x, y) np_out = np.multiply(x.numpy(), y.numpy()) test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-05, 1e-05)) of_out = of_out.sum() of_out.backward() np_grad_x = y.numpy() np_grad_y = x.numpy() test_case.assertTrue(np.allclose(x.grad.numpy(), np_grad_x, 1e-05, 1e-05)) test_case.assertTrue(np.allclose(y.grad.numpy(), np_grad_y, 1e-05, 1e-05)) x = 5 y = flow.tensor( np.random.randn(2, 3), dtype=flow.float32, device=flow.device(device) ) of_out = flow.mul(x, y) np_out = np.multiply(x, y.numpy()) test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-05, 1e-05)) x = flow.tensor( np.random.randn(2, 3), dtype=flow.float32, device=flow.device(device) ) y = 5 of_out = flow.mul(x, y) np_out = np.multiply(x.numpy(), y) test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-05, 1e-05)) x = flow.tensor( np.random.randn(1, 1), dtype=flow.float32, device=flow.device(device), requires_grad=True, ) y = flow.tensor( np.random.randn(2, 3), dtype=flow.float32, device=flow.device(device), requires_grad=True, ) of_out = flow.mul(x, y) np_out = np.multiply(x.numpy(), y.numpy()) test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-05, 1e-05)) of_out = of_out.sum() of_out.backward() test_case.assertTrue(np.allclose(x.grad.numpy(), np.sum(y.numpy()), 1e-05, 1e-05)) test_case.assertTrue(np.allclose(y.grad.numpy(), x.numpy(), 1e-05, 1e-05)) x = flow.tensor( np.random.randn(1, 1), dtype=flow.float32, device=flow.device(device), requires_grad=True, ) y = flow.tensor( np.random.randn(2, 3, 4), dtype=flow.float32, device=flow.device(device), requires_grad=True, ) of_out = flow.mul(x, y) np_out = np.multiply(x.numpy(), y.numpy()) test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-05, 1e-05)) of_out = of_out.sum() of_out.backward() test_case.assertTrue(np.allclose(x.grad.numpy(), np.sum(y.numpy()), 1e-05, 1e-05)) test_case.assertTrue(np.allclose(y.grad.numpy(), x.numpy(), 1e-05, 1e-05)) x = flow.tensor( np.random.randn(1, 1), dtype=flow.float32, device=flow.device(device), requires_grad=True, ) y = flow.tensor( np.random.randn(2, 3, 4, 5), dtype=flow.float32, device=flow.device(device), requires_grad=True, ) of_out = flow.mul(x, y) np_out = np.multiply(x.numpy(), y.numpy()) test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-05, 1e-05)) of_out = of_out.sum() of_out.backward() test_case.assertTrue(np.allclose(x.grad.numpy(), np.sum(y.numpy()), 1e-05, 1e-05)) test_case.assertTrue(np.allclose(y.grad.numpy(), x.numpy(), 1e-05, 1e-05))
def __neg__(self): return flow.mul(-1, self)