def test_dygraph_1(self): with fluid.dygraph.guard(fluid.CPUPlace()): x = paddle.rand(shape=[3, 5, 9, 10], dtype='float32') updates = paddle.rand(shape=[3, 9, 10], dtype='float32') index_data = np.array([[1, 1], [0, 1], [1, 3]]).astype(np.int64) index = fluid.dygraph.to_variable(index_data) output = paddle.scatter_nd_add(x, index, updates)
def forward(self, inp): score = self.gate(inp) if self.training: noise = paddle.rand(shape=score.shape) noise = noise * 2 * self.switch_eps + 1.0 - self.switch_eps score += noise score = F.softmax(score, axis=-1) top1_score, top1_idx = paddle.topk(score, k=1, axis=-1, largest=True) cap_rate = self.capacity[0 if self.training else 1] capacity = math.ceil(cap_rate * inp.shape[0]) _new_lec, _new_gec, top1_idx = limit_by_capacity(top1_idx, self.num_expert, self.world_size, capacity, group=self.group) valid_idx = top1_idx[top1_idx > -1] valid_idx_tmp = paddle.reshape(valid_idx, shape=[len(valid_idx), 1]) fraction_expert = paddle.scatter_nd_add( x=paddle.zeros(shape=[self.tot_expert]), index=valid_idx_tmp, updates=paddle.ones_like(valid_idx, dtype=paddle.float32).reshape( shape=[len(valid_idx)]), ) / valid_idx.numel() prob_expert = score.sum(axis=0) / valid_idx.numel() loss = (fraction_expert * prob_expert).sum() * self.tot_expert self.set_loss(loss) return top1_score, top1_idx
def testcase5(self): if not fluid.core.is_compiled_with_cuda(): return shape = [2, 3, 4] x = np.arange(int(np.prod(shape))).reshape(shape) index = np.array([[0, 0, 2], [0, 1, 2]]) val = np.array([-1, -3]) with fluid.dygraph.guard(): device = paddle.get_device() paddle.set_device('gpu') gpu_value = paddle.scatter_nd_add(paddle.to_tensor(x), paddle.to_tensor(index), paddle.to_tensor(val)) paddle.set_device('cpu') cpu_value = paddle.scatter_nd_add(paddle.to_tensor(x), paddle.to_tensor(index), paddle.to_tensor(val)) self.assertTrue( np.array_equal(gpu_value.numpy(), cpu_value.numpy())) paddle.set_device(device) @switch_to_static_graph def test_static_graph(): with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): x_t = paddle.static.data(name="x", dtype=x.dtype, shape=x.shape) index_t = paddle.static.data(name="index", dtype=index.dtype, shape=index.shape) val_t = paddle.static.data(name="val", dtype=val.dtype, shape=val.shape) out_t = paddle.scatter_nd_add(x_t, index_t, val_t) feed = {x_t.name: x, index_t.name: index, val_t.name: val} fetch = [out_t] gpu_exe = paddle.static.Executor(paddle.CUDAPlace(0)) gpu_value = gpu_exe.run(feed=feed, fetch_list=fetch)[0] cpu_exe = paddle.static.Executor(paddle.CPUPlace()) cpu_value = cpu_exe.run(feed=feed, fetch_list=fetch)[0] self.assertTrue(np.array_equal(gpu_value, cpu_value)) test_static_graph()
def paddle2D_scatter_add(x_tensor, index_tensor, update_tensor, dim=0): dim0, dim1 = update_tensor.shape update_tensor = paddle.flatten(update_tensor, start_axis=0, stop_axis=1) index_tensor = paddle.reshape(index_tensor, [-1, 1]) if dim == 0: index_tensor = paddle.concat( x=[index_tensor, (paddle.arange(dim1 * dim0) % dim0).unsqueeze(1)], axis=1) elif dim == 1: index_tensor = paddle.concat(x=[ (paddle.arange(dim1 * dim0) // dim1).unsqueeze(1), index_tensor ], axis=1) output_tensor = paddle.scatter_nd_add(x_tensor, index_tensor, update_tensor) return output_tensor
def test_static_graph(): with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): x_t = paddle.static.data(name="x", dtype=x.dtype, shape=x.shape) index_t = paddle.static.data(name="index", dtype=index.dtype, shape=index.shape) val_t = paddle.static.data(name="val", dtype=val.dtype, shape=val.shape) out_t = paddle.scatter_nd_add(x_t, index_t, val_t) feed = {x_t.name: x, index_t.name: index, val_t.name: val} fetch = [out_t] gpu_exe = paddle.static.Executor(paddle.CUDAPlace(0)) gpu_value = gpu_exe.run(feed=feed, fetch_list=fetch)[0] cpu_exe = paddle.static.Executor(paddle.CPUPlace()) cpu_value = cpu_exe.run(feed=feed, fetch_list=fetch)[0] self.assertTrue(np.array_equal(gpu_value, cpu_value))
def forward(self, inputs, _index, _updates): """ forward """ x = paddle.scatter_nd_add(inputs, _index, _updates) return x
def scatter_nd_add(x, index, updates): return paddle.scatter_nd_add(x, index, updates)