def general_gaussian(M: int, p, sig, sym: bool = True, dtype: str = 'float64') -> Tensor: """Compute a window with a generalized Gaussian shape. This function is consistent with scipy.signal.windows.general_gaussian(). """ if _len_guards(M): return paddle.ones((M, ), dtype=dtype) M, needs_trunc = _extend(M, sym) n = paddle.arange(0, M, dtype=dtype) - (M - 1.0) / 2.0 w = paddle.exp(-0.5 * paddle.abs(n / sig)**(2 * p)) return _truncate(w, needs_trunc)
def test_static(self): paddle.enable_static() init_data = [paddle.ones(shape, dtype='int32') for shape in self.shapes] array = paddle.tensor.create_array('float32', init_data) for res, gt in zip(array, init_data): self.assertTrue(res.shape, gt.shape) # test error with nest list with self.assertRaises(TypeError): paddle.tensor.create_array('float32', [init_data[0], [init_data[1]]]) # test error with not variable with self.assertRaises(TypeError): paddle.tensor.create_array('float32', ("str")) paddle.enable_static()
def seq2feats(self, log_seqs, time_matrices): seqs = self.item_emb(log_seqs) seqs *= self.item_emb._embedding_dim**0.5 seqs = self.item_emb_dropout(seqs) positions = paddle.arange(log_seqs.shape[1]).unsqueeze(0).expand( [log_seqs.shape[0], -1]) abs_pos_K = self.abs_pos_K_emb(positions) abs_pos_V = self.abs_pos_V_emb(positions) abs_pos_K = self.abs_pos_K_emb_dropout(abs_pos_K) abs_pos_V = self.abs_pos_V_emb_dropout(abs_pos_V) time_matrix_K = self.time_matrix_K_emb(time_matrices) time_matrix_V = self.time_matrix_V_emb(time_matrices) time_matrix_K = self.time_matrix_K_dropout(time_matrix_K) time_matrix_V = self.time_matrix_V_dropout(time_matrix_V) # mask 0th items(placeholder for dry-run) in log_seqs # would be easier if 0th item could be an exception for training timeline_mask = log_seqs == 0 seqs *= (log_seqs != 0).astype(paddle.get_default_dtype()).unsqueeze( -1) # broadcast in last dim tl = seqs.shape[1] # time dim len for enforce causality attention_mask = ( paddle.tril(paddle.ones([tl, tl])) == 0).astype(paddle.bool) for i in range(len(self.attention_layers)): # Self-attention, Q=layernorm(seqs), K=V=seqs # seqs = paddle.transpose(seqs, 0, 1) # (N, T, C) -> (T, N, C) Q = self.attention_layernorms[i](seqs) mha_outputs = self.attention_layers[i]( Q, seqs, timeline_mask, attention_mask, time_matrix_K, time_matrix_V, abs_pos_K, abs_pos_V) seqs = Q + mha_outputs # seqs = paddle.transpose(seqs, 0, 1) # (T, N, C) -> (N, T, C) # Point-wise Feed-forward, actually 2 Conv1D for channel wise fusion seqs = self.forward_layernorms[i](seqs) seqs = self.forward_layers[i](seqs) seqs *= (timeline_mask.astype(int) == 0 ).astype(paddle.get_default_dtype()).unsqueeze(-1) log_feats = self.last_layernorm(seqs) return log_feats
def test_api(self): with fluid.program_guard(fluid.Program(), fluid.Program()): input = fluid.layers.fill_constant(shape=[1], dtype='int64', value=5) expected_result = np.array([8], dtype='int64') output = paddle.tensor.math.increment(input, value=3) exe = fluid.Executor(fluid.CPUPlace()) result = exe.run(fetch_list=[output]) self.assertEqual((result == expected_result).all(), True) with fluid.dygraph.guard(): input = paddle.ones(shape=[1], dtype='int64') expected_result = np.array([2], dtype='int64') output = paddle.tensor.math.increment(input, value=1) self.assertEqual((output.numpy() == expected_result).all(), True)
def build_P_hat_paddle(self, C, P): F = self.F eps = self.eps n = P.shape[0] # n (= self.I_r_width x self.I_r_height) # P_tile: n x 2 -> n x 1 x 2 -> n x F x 2 P_tile = paddle.tile(paddle.unsqueeze(P, axis=1), (1, F, 1)) C_tile = paddle.unsqueeze(C, axis=0) # 1 x F x 2 P_diff = P_tile - C_tile # n x F x 2 # rbf_norm: n x F rbf_norm = paddle.norm(P_diff, p=2, axis=2, keepdim=False) # rbf: n x F rbf = paddle.multiply(paddle.square(rbf_norm), paddle.log(rbf_norm + eps)) P_hat = paddle.concat([paddle.ones((n, 1), dtype='float64'), P, rbf], axis=1) return P_hat # n x F+3
def func_test_backward_success_2(self): # Although var_b is modified inplace after using it, it does not used in gradient computation. # The inplace operator doesn't result in incorrect gradient computation. with paddle.fluid.dygraph.guard(): var_a = paddle.ones(shape=[4, 2, 3], dtype="float32") var_a.stop_gradient = False var_b = var_a**2 var_b[1:2] = 3 # var_b is modified inplace before using it var_c = var_b + var_b # Here, the grad op of sum doesn't use the value of var_b loss = var_c.sum() var_b[1:2] = 3 # var_b is modified inplace after using it loss.backward()
def forward(self, src, spatial_shapes, src_mask=None, pos_embed=None, valid_ratios=None): output = src if valid_ratios is None: valid_ratios = paddle.ones( [src.shape[0], spatial_shapes.shape[0], 2]) reference_points = self.get_reference_points(spatial_shapes, valid_ratios) for layer in self.layers: output = layer(output, reference_points, spatial_shapes, src_mask, pos_embed) return output
def test_inverse(self): exe = paddle.static.Executor() sp = paddle.static.Program() mp = paddle.static.Program() with paddle.static.program_guard(mp, sp): x = paddle.ones(self.out_event_shape) t = transform.ReshapeTransform(self.in_event_shape, self.out_event_shape) output = self._t.inverse(x) exe.run(sp) [output] = exe.run(mp, feed={}, fetch_list=[output]) expected = np.ones(self.in_event_shape) np.testing.assert_allclose(output, expected, rtol=config.RTOL.get(str(expected.dtype)), atol=config.ATOL.get(str(expected.dtype)))
def force_decoding(self, beam_search_output, beam_search_state, trg_word, trg_length, time): batch_size = paddle.shape(beam_search_output.predicted_ids)[0] beam_size = paddle.shape(beam_search_output.predicted_ids)[1] ids_dtype = beam_search_output.predicted_ids.dtype scores_dtype = beam_search_output.scores.dtype parent_ids = paddle.zeros(shape=[batch_size, 1], dtype=ids_dtype) scores = paddle.ones(shape=[batch_size, beam_size], dtype=scores_dtype) * -10e9 scores = paddle.scatter( scores.flatten(), paddle.arange(0, batch_size * beam_size, step=beam_size, dtype=scores_dtype), paddle.zeros([batch_size])).reshape([batch_size, beam_size]) force_position = paddle.unsqueeze(trg_length > time, [1]) # NOTE: When the date type of the input of paddle.tile is bool # and enable static mode, its stop_gradient must be True . force_position.stop_gradient = True force_position = paddle.tile(force_position, [1, beam_size]) crt_trg_word = paddle.slice(trg_word, axes=[1], starts=[time], ends=[time + 1]) crt_trg_word = paddle.tile(crt_trg_word, [1, beam_size]) predicted_ids = paddle.where(force_position, crt_trg_word, beam_search_output.predicted_ids) scores = paddle.where(force_position, scores, beam_search_output.scores) parent_ids = paddle.where(force_position, parent_ids, beam_search_output.parent_ids) cell_states = beam_search_state.cell_states log_probs = paddle.where(force_position, scores, beam_search_state.log_probs) finished = beam_search_state.finished lengths = beam_search_state.lengths return self.OutputWrapper(scores, predicted_ids, parent_ids), self.StateWrapper( cell_states, log_probs, finished, lengths)
def __init__(self, num_classes, matcher, weight_dict, eos_coef, losses): """ Create the criterion. Parameters: num_classes: number of object categories, omitting the special no-object category matcher: module able to compute a matching between targets and proposals weight_dict: dict containing as key the names of the losses and as values their relative weight. eos_coef: relative classification weight applied to the no-object category losses: list of all the losses to be applied. See get_loss for list of available losses. """ super().__init__() self.num_classes = num_classes self.matcher = matcher self.weight_dict = weight_dict self.eos_coef = eos_coef self.losses = losses empty_weight = paddle.ones(self.num_classes + 1).requires_grad_(False) empty_weight[-1] = self.eos_coef self.register_buffer('empty_weight', empty_weight)
def _rel_shift(self, x, zero_triu=False): x_shape = x.shape zero_pad = paddle.zeros( [x_shape[0], x_shape[1], x_shape[2], 1], dtype=x.dtype) x_padded = paddle.concat([zero_pad, x], axis=-1) x_padded = paddle.reshape( x_padded, shape=[x_shape[0], x_shape[1], x_shape[3] + 1, x_shape[2]]) x = paddle.reshape(x_padded[:, :, 1:, :], shape=x_shape) if zero_triu: ones = paddle.ones([x_shape[2], x_shape[3]]) x = x * paddle.tril( ones, diagonal=x_shape[3] - x_shape[2]).unsqueeze([2, 3]) return x
def cosine(M: int, sym: bool = True, dtype: str = 'float64') -> Tensor: """Compute a window with a simple cosine shape. Parameters: M(int): window size. sym(bool):whether to return symmetric window. The default value is True dtype(str): the datatype of returned tensor. Returns: Tensor: the window tensor Notes: This function is consistent with scipy.signal.windows.cosine(). """ if _len_guards(M): return paddle.ones((M, ), dtype=dtype) M, needs_trunc = _extend(M, sym) w = paddle.sin(math.pi / M * (paddle.arange(0, M, dtype=dtype) + .5)) return _truncate(w, needs_trunc)
def partial_trace_discontiguous(rho, preserve_qubits=None): r"""计算量子态的偏迹,可选取任意子系统。 Args: rho (Tensor): 输入的量子态 preserve_qubits (list): 要保留的量子比特,默认为 None,表示全保留 """ if preserve_qubits is None: return rho else: n = int(log2(rho.size) // 2) num_preserve = len(preserve_qubits) shape = paddle.ones((n + 1, )) shape = 2 * shape shape[n] = 2**n shape = paddle.cast(shape, "int32") identity = paddle.eye(2**n) identity = paddle.reshape(identity, shape=shape) discard = list() for idx in range(0, n): if idx not in preserve_qubits: discard.append(idx) addition = [n] preserve_qubits.sort() preserve_qubits = paddle.to_tensor(preserve_qubits) discard = paddle.to_tensor(discard) addition = paddle.to_tensor(addition) permute = paddle.concat([discard, preserve_qubits, addition]) identity = paddle.transpose(identity, perm=permute) identity = paddle.reshape(identity, (2**n, 2**n)) result = np.zeros((2**num_preserve, 2**num_preserve), dtype="complex64") result = paddle.to_tensor(result) for i in range(0, 2**num_preserve): bra = identity[i * 2**num_preserve:(i + 1) * 2**num_preserve, :] result = result + matmul(matmul(bra, rho), transpose(bra, perm=[1, 0])) return result
def forward(self, x, lengths=None): C, L = x.shape[1], x.shape[2] # KP: (N, C, L) def _compute_statistics(x, m, axis=2, eps=self.eps): mean = (m * x).sum(axis) std = paddle.sqrt( (m * (x - mean.unsqueeze(axis)).pow(2)).sum(axis).clip(eps)) return mean, std if lengths is None: lengths = paddle.ones([x.shape[0]]) # Make binary mask of shape [N, 1, L] mask = length_to_mask(lengths * L, max_len=L) mask = mask.unsqueeze(1) # Expand the temporal context of the pooling layer by allowing the # self-attention to look at global properties of the utterance. if self.global_context: total = mask.sum(axis=2, keepdim=True).astype('float32') mean, std = _compute_statistics(x, mask / total) mean = mean.unsqueeze(2).tile((1, 1, L)) std = std.unsqueeze(2).tile((1, 1, L)) attn = paddle.concat([x, mean, std], axis=1) else: attn = x # Apply layers attn = self.conv(self.tanh(self.tdnn(attn))) # Filter out zero-paddings attn = paddle.where( mask.tile((1, C, 1)) == 0, paddle.ones_like(attn) * float("-inf"), attn) attn = F.softmax(attn, axis=2) mean, std = _compute_statistics(x, attn) # Append mean and std of the batch pooled_stats = paddle.concat((mean, std), axis=1) pooled_stats = pooled_stats.unsqueeze(2) return pooled_stats
def test_multiple_gpus(self): dist.init_parallel_env() self.trainer_id = dist.get_rank() model_a = SimpleNet(self.trainer_id) model_b = SimpleNet(self.trainer_id) state_dict = model_a.state_dict() model_b.set_state_dict(state_dict) model_a = paddle.DataParallel(model_a, find_unused_parameters=True) model_b = paddle.DataParallel(model_b, find_unused_parameters=True) ones_input = paddle.ones(shape=(batch, in_dim)) ones_input.stop_gradient = True w1_grad_sum = np.zeros((in_dim, out_dim), dtype='float32') w2_grad_sum = np.zeros((in_dim, out_dim), dtype='float32') for step_id in range(5): random_input = paddle.rand(shape=(batch, in_dim)) random_input.stop_gradient = True if step_id % 2 == 0: out_a = model_a(random_input) out_b = model_b(random_input) else: out_a = model_a(ones_input) out_b = model_b(ones_input) out_a.sum().backward() out_b.sum().backward() self.check_gradient(model_a.parameters()) self.check_gradient(model_b.parameters()) # test acc gradient w1_grad_sum = self.check_acc(model_a._layers.w1.grad, w1_grad_sum, model_b._layers.w1.grad) w2_grad_sum = self.check_acc(model_a._layers.w2.grad, w2_grad_sum, model_b._layers.w2.grad) model_a.clear_gradients()
def var(x, axis=None, unbiased=True, keepdim=False, name=None): """ Computes the variance of ``x`` along ``axis`` . Args: x (Tensor): The input Tensor with data type float32, float64. axis (int|list|tuple, optional): The axis along which to perform variance calculations. ``axis`` should be int, list(int) or tuple(int). - If ``axis`` is a list/tuple of dimension(s), variance is calculated along all element(s) of ``axis`` . ``axis`` or element(s) of ``axis`` should be in range [-D, D), where D is the dimensions of ``x`` . - If ``axis`` or element(s) of ``axis`` is less than 0, it works the same way as :math:`axis + D` . - If ``axis`` is None, variance is calculated over all elements of ``x``. Default is None. unbiased (bool, optional): Whether to use the unbiased estimation. If ``unbiased`` is True, the divisor used in the computation is :math:`N - 1`, where :math:`N` represents the number of elements along ``axis`` , otherwise the divisor is :math:`N`. Default is True. keep_dim (bool, optional): Whether to reserve the reduced dimension in the output Tensor. The result tensor will have one fewer dimension than the input unless keep_dim is true. Default is False. name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. Returns: Tensor, results of variance along ``axis`` of ``x``, with the same data type as ``x``. Examples: .. code-block:: python import paddle x = paddle.to_tensor([[1.0, 2.0, 3.0], [1.0, 4.0, 5.0]]) out1 = paddle.var(x) # [2.66666667] out2 = paddle.var(x, axis=1) # [1. 4.33333333] """ if not paddle.in_dynamic_mode(): check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'var') u = mean(x, axis, True, name) out = paddle.sum((x - u)**2, axis, keepdim=keepdim, name=name) n = paddle.cast(paddle.numel(x), x.dtype) \ / paddle.cast(paddle.numel(out), x.dtype) if unbiased: one_const = paddle.ones([1], x.dtype) n = where(n > one_const, n - 1., one_const) out /= n return out
def testLoad(self, ): model = hub.load( self.local_repo, model='MM', source='local', out_channels=8) data = paddle.rand((1, 3, 100, 100)) out = model(data) np.testing.assert_equal(out.shape, [1, 8, 50, 50]) model = hub.load( self.github_repo, model='MM', source='github', force_reload=True) model = hub.load( self.github_repo, model='MM', source='github', force_reload=False, pretrained=False) model = hub.load( self.github_repo.split(':')[0], model='MM', source='github', force_reload=False, pretrained=False) model = hub.load( self.github_repo, model='MM', source='github', force_reload=False, pretrained=True, out_channels=8) data = paddle.ones((1, 3, 2, 2)) out = model(data) gt = np.array([ 1.53965068, 0., 0., 1.39455748, 0.72066200, 0.19773030, 2.09201908, 0.37345418 ]) np.testing.assert_equal(out.shape, [1, 8, 1, 1]) np.testing.assert_almost_equal( out.numpy(), gt.reshape(1, 8, 1, 1), decimal=5)
def build_program(self): start_prog = paddle.static.Program() main_prog = paddle.static.Program() with paddle.static.program_guard(main_prog, start_prog): x = paddle.static.data('x', shape=[2, 2]) x.stop_gradient = False y = x * x v = paddle.ones([2, 2]) v.stop_gradient = False grad_y = paddle.zeros_like(y) grad_y.stop_gradient = False grad_x = paddle.static.gradients(y, x, grad_y) # test with single targets jvp = paddle.static.gradients(grad_x, grad_y, v) return start_prog, main_prog, [grad_x, jvp]
def test_dim4(self): fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) expected_np = np.array([[[[0, 3], [2, 2], [2, 2]], [[2, 2], [1, 4], [2, 2]], [[2, 2], [2, 2], [2, 5]], [[2, 2], [2, 2], [2, 2]]], [[[6, 9], [2, 2], [2, 2]], [[2, 2], [7, 10], [2, 2]], [[2, 2], [2, 2], [8, 11]], [[2, 2], [2, 2], [2, 2]]]]).astype('float32') expected_grad = np.array([[[[0, 0], [1, 1], [1, 1]], [[1, 1], [0, 0], [1, 1]], [[1, 1], [1, 1], [0, 0]], [[1, 1], [1, 1], [1, 1]]], [[[0, 0], [1, 1], [1, 1]], [[1, 1], [0, 0], [1, 1]], [[1, 1], [1, 1], [0, 0]], [[1, 1], [1, 1], [1, 1]]]]).astype('float32') for idx, p in enumerate(self.places): if idx == 0: paddle.set_device('cpu') else: paddle.set_device('gpu') for dtype in self.typelist: v = paddle.to_tensor(np.arange(12).reshape(2, 2, 3), dtype=dtype) var = (np.random.random() + 1) x = paddle.ones((2, 4, 3, 2), dtype=dtype) x.stop_gradient = False y = x * 2 ny = y.fill_diagonal_tensor(v, offset=0, dim1=1, dim2=2) loss = ny.sum() loss.backward() self.assertEqual( (ny.numpy().astype('float32') == expected_np).all(), True) self.assertEqual( (y.grad.numpy().astype('float32') == expected_grad).all(), True) fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def forward(self, input_ids, position_ids=None, attention_mask=None, use_cache=False, cache=None): self.checkpoints = [] if position_ids is None: past_length = 0 if cache is not None: past_length = paddle.shape(cache[0].k)[-2] position_ids = paddle.arange(past_length, paddle.shape(input_ids)[-1] + past_length, dtype='int64') position_ids = position_ids.unsqueeze(0) # .expand_as(input_ids) position_ids = paddle.fluid.layers.expand_as( position_ids, input_ids) embedding_output = self.embeddings(input_ids=input_ids, position_ids=position_ids) # TODO, use registered buffer causal_mask = paddle.tensor.triu(paddle.ones( (paddle.shape(input_ids)[-1], paddle.shape(input_ids)[-1])) * -1e9, diagonal=1) if attention_mask is not None: attention_mask = attention_mask + causal_mask else: attention_mask = causal_mask # The tensor returned by triu not in static graph. attention_mask.stop_gradient = True encoder_outputs = self.decoder(embedding_output, memory=None, tgt_mask=attention_mask, use_cache=use_cache, cache=cache) self.checkpoints.extend(self.decoder.checkpoints) return encoder_outputs
def future_mask(time_steps, dtype="bool"): """Generate lower triangular mask. It is used at transformer decoder to prevent the decoder to see future information. Parameters ---------- time_steps : int Decoder time steps. dtype : str, optional The data type of the generate mask, by default "bool". Returns ------- Tensor The generated mask. """ mask = paddle.tril(paddle.ones([time_steps, time_steps])) return paddle.cast(mask, dtype)
def limit_by_capacity(topk_idx, num_expert, world_size, capacity, group=None): with paddle.no_grad(): capacity = paddle.ones(shape=[num_expert], dtype=paddle.int64) * capacity pos, lec, gec = count_by_gate(topk_idx, num_expert, world_size, require_pos=False, group=group) new_gec = _limit_by_capacity(gec, capacity, world_size) if world_size > 1: assert group.nranks == world_size new_lec = _alltoall(new_gec, group=group) else: new_lec = new_gec topk_idx = _prune_gate_by_capacity(topk_idx, new_lec, num_expert, world_size) return new_lec, new_gec, topk_idx
def func_bool(self): expected_np = np.array([[False, True, True], [True, False, True], [True, True, False]]) typelist = ['bool'] places = [fluid.CPUPlace()] if fluid.core.is_compiled_with_cuda(): places.append(fluid.CUDAPlace(0)) for idx, p in enumerate(places): if idx == 0: paddle.set_device('cpu') else: paddle.set_device('gpu') for dtype in typelist: x = paddle.ones((3, 3), dtype=dtype) x.stop_gradient = True x.fill_diagonal_(0, offset=0, wrap=True) self.assertEqual((x.numpy() == expected_np).all(), True)
def export_model(net, cfg, save_dir): net.forward = paddle.jit.to_static(net.forward) input_shape = [1] + list(cfg.val_dataset[0][0].shape) input_var = paddle.ones(input_shape) out = net(input_var) save_path = os.path.join(save_dir, 'model') paddle.jit.save(net, save_path, input_spec=[input_var]) yml_file = os.path.join(save_dir, 'deploy.yaml') with open(yml_file, 'w') as file: transforms = cfg.dic['val_dataset']['transforms'] data = { 'Deploy': { 'transforms': transforms, 'model': 'model.pdmodel', 'params': 'model.pdiparams' } } yaml.dump(data, file)
def func_test_backward_error(self): # It raises an error because the inplace operator will result # in incorrect gradient computation. with paddle.fluid.dygraph.guard(): var_a = paddle.ones(shape=self.input_shape, dtype="float32") var_a.stop_gradient = False var_b = var_a**2 # Here, the gradient computation will use the value of var_b var_c = var_b**2 view_var_b = self.view_api_processing(var_b) view_var_b[0] = 2. # var_b is modified inplace loss = paddle.nn.functional.relu(var_c) with self.assertRaisesRegexp( RuntimeError, "received tensor_version:{} != wrapper_version_snapshot:{}" .format(1, 0)): loss.backward()
def forward(self, h, x, mess_graph): """forward""" mask = paddle.ones([h.shape[0], 1]) mask[0] = 0 for it in range(self.depth): h_nei = index_select_ND(h, 0, mess_graph) sum_h = paddle.sum(h_nei, axis=1) z_input = paddle.concat([x, sum_h], axis=1) z = F.sigmoid(self.W_z(z_input)) r_1 = paddle.reshape(self.W_r(x), shape=[-1, 1, self.hidden_size]) r_2 = self.U_r(h_nei) r = F.sigmoid(r_1 + r_2) gated_h = r * h_nei sum_gated_h = paddle.sum(gated_h, axis=1) h_input = paddle.concat([x, sum_gated_h], axis=1) pre_h = F.tanh(self.W_h(h_input)) h = (1.0 - z) * sum_h + z * pre_h h = h * mask return h
def test_backward_error(self): # It raises an error because the inplace operator will result # in incorrect gradient computation. with paddle.fluid.dygraph.guard(): var_a = paddle.ones(shape=[4, 2, 3], dtype="float32") var_a.stop_gradient = False var_b = var_a**2 # Here, the gradient computation will use the value of var_b var_c = var_b**2 var_b[1:2] = 3.3 # var_b is modified inplace after using it var_d = var_b**2 loss = paddle.nn.functional.relu(var_c + var_d) with self.assertRaisesRegexp( RuntimeError, "received tensor_version:{} != wrapper_version_snapshot:{}". format(1, 0)): loss.backward()
def graph_norm(graph, feature): """Implementation of graph normalization Reference Paper: BENCHMARKING GRAPH NEURAL NETWORKS Each node features is divied by sqrt(num_nodes) per graphs. Args: graph: the graph object from (:code:`Graph`) feature: A tensor with shape (num_nodes, feature_size). Return: A tensor with shape (num_nodes, hidden_size) """ nodes = paddle.ones(shape=[graph.num_nodes, 1], dtype="float32") norm = graph_pool(graph, nodes, pool_type="sum") norm = paddle.sqrt(norm) norm = paddle.gather(norm, graph.graph_node_id) return feature / norm
def test_api(self): with fluid.program_guard(fluid.Program(), fluid.Program()): input0 = fluid.layers.fill_constant( shape=[2, 3], dtype='int64', value=5) input1 = fluid.layers.fill_constant( shape=[2, 3], dtype='int64', value=3) expected_result = np.empty((2, 3)) expected_result.fill(8) sum_value = paddle.add_n([input0, input1]) exe = fluid.Executor(fluid.CPUPlace()) result = exe.run(fetch_list=[sum_value]) self.assertEqual((result == expected_result).all(), True) with fluid.dygraph.guard(): input0 = paddle.ones(shape=[2, 3], dtype='float32') expected_result = np.empty((2, 3)) expected_result.fill(2) sum_value = paddle.add_n([input0, input0]) self.assertEqual((sum_value.numpy() == expected_result).all(), True)
def __init__(self, block, layers, num_classes=1000, zero_init_residual=False): super(ResNet, self).__init__() self.inplanes = 64 self.conv1 = nn.Conv2D(3, 64, kernel_size=7, stride=2, padding=3, bias_attr=False) self.bn1 = nn.BatchNorm2D(64) self.relu = nn.ReLU() self.maxpool = nn.MaxPool2D(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2) self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=2) self.avgpool = nn.AdaptiveAvgPool2D((1, 1)) self.fc = nn.Linear(512 * block.expansion, num_classes) for m in self._sub_layers: if isinstance(m, nn.Conv2D): m.weight_attr = paddle.ParamAttr( initializer=nn.initializer.KaimingNormal()) elif isinstance(m, nn.BatchNorm2D): m.weight.set_value(paddle.ones(m.weight.shape)) m.bias.set_value(paddle.zeros(m.bias.shape)) # Zero-initialize the last BN in each residual branch, # so that the residual branch starts with zeros, and each residual block behaves like an identity. # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 if zero_init_residual: for m in self._sub_layers: if isinstance(m, Bottleneck): m.bn3.weight.set_value(paddle.zeros(m.bn3.weight.shape)) elif isinstance(m, BasicBlock): m.bn2.weight.set_value(paddle.zeros(m.bn2.weight.shape))