def __call__(self, x, pad): pad = paddle.reshape(pad, shape=[2, -1]) pad = paddle.transpose(pad, perm=[1, 0]) pad = paddle.reverse(pad, axis=[0]) pad = paddle.flatten(pad) pad = paddle.cast(pad, dtype="int32") pad1, pad2 = paddle.split(pad, num_or_sections=2, axis=0) x = paddle.nn.functional.pad(x=x, pad=pad1, **self.layer_attrs) x = paddle.transpose(x, perm=[2, 3, 0, 1]) x = paddle.nn.functional.pad(x=x, pad=pad2, **self.layer_attrs) out = paddle.transpose(x, perm=[2, 3, 0, 1]) return out
def iou_single(a, b, mask, n_class): valid = mask == 1 valid_flatten = paddle.reshape(valid, (-1, )) valid_flatten = paddle.cast(valid_flatten, dtype="int32") index = where(valid_flatten == 1) if index.shape[0] == 0: return paddle.zeros((1, )) index = paddle.reshape(index, (1, -1)) a_flatten = paddle.reshape(a, (1, -1)) a = paddle.index_sample(a_flatten, index) a = paddle.reshape(a, (-1, )) b_flatten = paddle.reshape(b, (1, -1)) b = paddle.index_sample(b_flatten, index) b = paddle.reshape(b, (-1, )) miou = [] for i in range(n_class): inter = paddle.logical_and(a == i, b == i) inter = paddle.cast(inter, dtype='float32') union = paddle.logical_or(a == i, b == i) union = paddle.cast(union, dtype='float32') miou.append(paddle.sum(inter) / (paddle.sum(union) + EPS)) miou = sum(miou) / len(miou) return miou
def gen_acrostic(model, start_words, prefix_words=None): result = [] start_words_len = len(start_words) input = paddle.to_tensor([word2ix['<START>']]) input = paddle.reshape(input, [1, 1]) # 指示已经生成了几句藏头诗 index = 0 pre_word = '<START>' hidden = None # 存在风格前缀,则生成hidden if prefix_words: for word in prefix_words: output, hidden = model(input, hidden) input = paddle.to_tensor([word2ix[word]]) input = paddle.reshape(input, [1, 1]) # 开始生成诗句 for i in range(Config.max_gen_len): output, hidden = model(input, hidden) _, top_index = paddle.fluid.layers.topk(output[0], k=1) top_index = top_index.numpy()[0] w = ix2word[top_index] # 说明上个字是句末 if pre_word in {'。', ',', '?', '!', '<START>'}: if index == start_words_len: break else: w = start_words[index] index += 1 # print(w,word2ix[w]) input = paddle.to_tensor([word2ix[w]]) input = paddle.reshape(input, [1, 1]) else: input = paddle.to_tensor([top_index]) input = paddle.reshape(input, [1, 1]) result.append(w) pre_word = w result = ''.join(result) return result
def delta2rbox(self, rrois, deltas, means, stds, wh_ratio_clip=1e-6): """ :param rrois: (cx, cy, w, h, theta) :param deltas: (dx, dy, dw, dh, dtheta) :param means: means of anchor :param stds: stds of anchor :param wh_ratio_clip: clip threshold of wh_ratio :return: """ deltas = paddle.reshape(deltas, [-1, 5]) rrois = paddle.reshape(rrois, [-1, 5]) pd_means = paddle.ones(shape=[5]) * means pd_stds = paddle.ones(shape=[5]) * stds denorm_deltas = deltas * pd_stds + pd_means dx = denorm_deltas[:, 0] dy = denorm_deltas[:, 1] dw = denorm_deltas[:, 2] dh = denorm_deltas[:, 3] dangle = denorm_deltas[:, 4] max_ratio = np.abs(np.log(wh_ratio_clip)) dw = paddle.clip(dw, min=-max_ratio, max=max_ratio) dh = paddle.clip(dh, min=-max_ratio, max=max_ratio) rroi_x = rrois[:, 0] rroi_y = rrois[:, 1] rroi_w = rrois[:, 2] rroi_h = rrois[:, 3] rroi_angle = rrois[:, 4] gx = dx * rroi_w * paddle.cos(rroi_angle) - dy * rroi_h * paddle.sin( rroi_angle) + rroi_x gy = dx * rroi_w * paddle.sin(rroi_angle) + dy * rroi_h * paddle.cos( rroi_angle) + rroi_y gw = rroi_w * dw.exp() gh = rroi_h * dh.exp() ga = np.pi * dangle + rroi_angle ga = (ga + np.pi / 4) % np.pi - np.pi / 4 bboxes = paddle.stack([gx, gy, gw, gh, ga], axis=-1) return bboxes
def forward(self, generator_prediction_scores, discriminator_prediction_scores, generator_labels, discriminator_labels, attention_mask): # generator loss gen_loss = self.gen_loss_fct( paddle.reshape(generator_prediction_scores, [-1, self.vocab_size]), paddle.reshape(generator_labels, [-1])) # todo: we can remove 4 lines after when CrossEntropyLoss(reduction='mean') improved umask_positions = paddle.zeros_like(generator_labels).astype( paddle.get_default_dtype()) mask_positions = paddle.ones_like(generator_labels).astype( paddle.get_default_dtype()) mask_positions = paddle.where(generator_labels == -100, umask_positions, mask_positions) if mask_positions.sum() == 0: gen_loss = paddle.to_tensor([0.0]) else: gen_loss = gen_loss.sum() / mask_positions.sum() # discriminator loss seq_length = discriminator_labels.shape[1] disc_loss = self.disc_loss_fct( paddle.reshape(discriminator_prediction_scores, [-1, seq_length]), discriminator_labels.astype(paddle.get_default_dtype())) if attention_mask is not None: umask_positions = paddle.ones_like(discriminator_labels).astype( paddle.get_default_dtype()) mask_positions = paddle.zeros_like(discriminator_labels).astype( paddle.get_default_dtype()) use_disc_loss = paddle.where(attention_mask, disc_loss, mask_positions) umask_positions = paddle.where(attention_mask, umask_positions, mask_positions) disc_loss = use_disc_loss.sum() / umask_positions.sum() else: total_positions = paddle.ones_like(discriminator_labels).astype( paddle.get_default_dtype()) disc_loss = disc_loss.sum() / total_positions.sum() return self.gen_weight * gen_loss + self.disc_weight * disc_loss
def forward(self, inputs, labels=None): """ inputs: [btc, time_steps] labels: [btc, time_steps] """ batch_size, seq_length = inputs.shape[0], inputs.shape[1] init_hidden_data = np.zeros( (self.num_layers, batch_size, self.hidden_size), dtype='float32') init_hidden = paddle.to_tensor(data=init_hidden_data, dtype=None, place=None, stop_gradient=True) x_emb = self.embedding(inputs) # 添加dropout 随机丢失 if self.dropout_layer is not None: x_emb = self.dropout_layer(x_emb) rnn_out, last_hidden = self.rnn(x_emb, init_hidden) output = self.classifier(rnn_out) if labels is not None: logits = paddle.reshape(output, shape=[-1, self.vocab_size]) labels = paddle.reshape(labels, shape=[-1, 1]) loss = nn.functional.softmax_with_cross_entropy(logits=logits, label=labels, soft_label=False, ignore_index=1) # 计算recall@20 指标 acc = paddle.metric.accuracy(input=logits, label=labels, k=20) loss = paddle.reshape(loss, shape=[-1, seq_length]) # 综合所有batch和序列长度的loss loss = paddle.mean(loss, axis=[0]) loss = paddle.sum(loss) # loss = paddle.mean(loss) return loss, acc return output
def forward(self, inputs, gsrm_word_pos, gsrm_slf_attn_bias1, gsrm_slf_attn_bias2): # ===== GSRM Visual-to-semantic embedding block ===== b, t, c = inputs.shape pvam_features = paddle.reshape(inputs, [-1, c]) word_out = self.fc0(pvam_features) word_ids = paddle.argmax(F.softmax(word_out), axis=1) word_ids = paddle.reshape(x=word_ids, shape=[-1, t, 1]) #===== GSRM Semantic reasoning block ===== """ This module is achieved through bi-transformers, ngram_feature1 is the froward one, ngram_fetaure2 is the backward one """ pad_idx = self.char_num word1 = paddle.cast(word_ids, "float32") word1 = F.pad(word1, [1, 0], value=1.0 * pad_idx, data_format="NLC") word1 = paddle.cast(word1, "int64") word1 = word1[:, :-1, :] word2 = word_ids enc_inputs_1 = [word1, gsrm_word_pos, gsrm_slf_attn_bias1] enc_inputs_2 = [word2, gsrm_word_pos, gsrm_slf_attn_bias2] gsrm_feature1 = self.wrap_encoder0(enc_inputs_1) gsrm_feature2 = self.wrap_encoder1(enc_inputs_2) gsrm_feature2 = F.pad(gsrm_feature2, [0, 1], value=0., data_format="NLC") gsrm_feature2 = gsrm_feature2[:, 1:, ] gsrm_features = gsrm_feature1 + gsrm_feature2 gsrm_out = self.mul(gsrm_features) b, t, c = gsrm_out.shape gsrm_out = paddle.reshape(gsrm_out, [-1, c]) return gsrm_features, word_out, gsrm_out
def _postprocessing_by_level(self, locations, box_cls, box_reg, box_ctn, scale_factor): """ Args: locations (Variables): anchor points for current layer, [H*W, 2] box_cls (Variables): categories prediction, [N, C, H, W], C is the number of classes box_reg (Variables): bounding box prediction, [N, 4, H, W] box_ctn (Variables): centerness prediction, [N, 1, H, W] scale_factor (Variables): [h_scale, w_scale] for input images Return: box_cls_ch_last (Variables): score for each category, in [N, C, M] C is the number of classes and M is the number of anchor points box_reg_decoding (Variables): decoded bounding box, in [N, M, 4] last dimension is [x1, y1, x2, y2] """ act_shape_cls = self._merge_hw(box_cls) box_cls_ch_last = paddle.reshape(x=box_cls, shape=act_shape_cls) box_cls_ch_last = F.sigmoid(box_cls_ch_last) act_shape_reg = self._merge_hw(box_reg) box_reg_ch_last = paddle.reshape(x=box_reg, shape=act_shape_reg) box_reg_ch_last = paddle.transpose(box_reg_ch_last, perm=[0, 2, 1]) box_reg_decoding = paddle.stack([ locations[:, 0] - box_reg_ch_last[:, :, 0], locations[:, 1] - box_reg_ch_last[:, :, 1], locations[:, 0] + box_reg_ch_last[:, :, 2], locations[:, 1] + box_reg_ch_last[:, :, 3] ], axis=1) box_reg_decoding = paddle.transpose(box_reg_decoding, perm=[0, 2, 1]) act_shape_ctn = self._merge_hw(box_ctn) box_ctn_ch_last = paddle.reshape(x=box_ctn, shape=act_shape_ctn) box_ctn_ch_last = F.sigmoid(box_ctn_ch_last) # recover the location to original image im_scale = paddle.concat([scale_factor, scale_factor], axis=1) box_reg_decoding = box_reg_decoding / im_scale box_cls_ch_last = box_cls_ch_last * box_ctn_ch_last return box_cls_ch_last, box_reg_decoding
def apply_rotary_position_embeddings(sinusoidal_pos, query_layer, key_layer, value_layer=None): # https://kexue.fm/archives/8265 # sin [batch_size, num_heads, sequence_length, embed_size_per_head//2] # cos [batch_size, num_heads, sequence_length, embed_size_per_head//2] sin, cos = paddle.chunk(sinusoidal_pos, 2, axis=-1) # sin [θ0,θ1,θ2......θd/2-1] -> sin_pos [θ0,θ0,θ1,θ1,θ2,θ2......θd/2-1,θd/2-1] sin_pos = paddle.reshape(paddle.stack([sin, sin], axis=-1), sinusoidal_pos.shape) # cos [θ0,θ1,θ2......θd/2-1] -> cos_pos [θ0,θ0,θ1,θ1,θ2,θ2......θd/2-1,θd/2-1] cos_pos = paddle.reshape(paddle.stack([cos, cos], axis=-1), sinusoidal_pos.shape) # rotate_half_query_layer [-q1,q0,-q3,q2......,-qd-1,qd-2] rotate_half_query_layer = paddle.reshape( paddle.stack( [-query_layer[:, :, :, 1::2], query_layer[:, :, :, 0::2]], axis=-1), query_layer.shape, ) query_layer = query_layer * cos_pos + rotate_half_query_layer * sin_pos # rotate_half_key_layer [-k1,k0,-k3,k2......,-kd-1,kd-2] rotate_half_key_layer = paddle.reshape( paddle.stack([-key_layer[:, :, :, 1::2], key_layer[:, :, :, 0::2]], axis=-1), key_layer.shape, ) key_layer = key_layer * cos_pos + rotate_half_key_layer * sin_pos if value_layer is not None: # rotate_half_value_layer [-v1,v0,-v3,v2......,-vd-1,vd-2] rotate_half_value_layer = paddle.reshape( paddle.stack( [-value_layer[:, :, :, 1::2], value_layer[:, :, :, 0::2]], axis=-1), value_layer.shape, ) value_layer = value_layer * cos_pos + rotate_half_value_layer * sin_pos return query_layer, key_layer, value_layer return query_layer, key_layer
def _weight_norm(v, g, dim): shape = v.shape ndims = len(shape) if dim == -1: v_normalized = v / (paddle.sqrt(paddle.sum(paddle.square(v))) + 1e-12) elif dim == 0: p_matrix = paddle.reshape(v, (shape[0], -1)) v_normalized = F.l2_normalize(p_matrix, axis=1) v_normalized = paddle.reshape(v_normalized, shape) elif dim == ndims - 1: p_matrix = paddle.reshape(v, (-1, shape[-1])) v_normalized = F.l2_normalize(p_matrix, axis=0) v_normalized = paddle.reshape(v_normalized, shape) else: perm = list(range(ndims)) perm[0] = dim perm[dim] = 0 p_transposed = paddle.transpose(v, perm) transposed_shape = p_transposed.shape p_matrix = paddle.reshape(p_transposed, (p_transposed.shape[0], -1)) v_normalized = F.l2_normalize(p_matrix, axis=1) v_normalized = paddle.reshape(v_normalized, transposed_shape) v_normalized = paddle.transpose(v_normalized, perm) weight = F.elementwise_mul(v_normalized, g, axis=dim if dim is not None else -1) return weight
def get_feature_by_coordinate(self, x, coord, offset_h, offset_w, padded_x_w): x = paddle.reshape(x, [0, 0, -1]) index = paddle.cast( coord[:, :, :, :self.N] * padded_x_w, dtype='int64') + coord[:, :, :, self.N:] # offset_x*w + offset_y index = paddle.unsqueeze(index, 1) index = paddle.tile(index, [1, self.in_channel, 1, 1, 1]) index = paddle.reshape(index, (0, 0, -1)) x_range = list(range(3)) dim = 2 x_range[0] = dim x_range[dim] = 0 x_swaped = paddle.transpose(x, perm=x_range) index_range = list(range(3)) index_range[0] = dim index_range[dim] = 0 index_swaped = paddle.transpose(index, perm=index_range) x_shape = layers.shape(x_swaped) index_shape = layers.shape(index_swaped) prod = paddle.prod(x_shape[1:], keepdim=True) x_swaped_flattend = paddle.reshape(x_swaped, [-1]) index_swaped_flattend = paddle.reshape(index_swaped, [-1]) index_swaped_flattend *= prod bias = paddle.arange(start=0, end=prod, step=1, dtype='float32') bias = paddle.tile(bias, index_shape[0]) index_swaped_flattend += bias gathered = paddle.gather(x_swaped_flattend, index_swaped_flattend) gathered = paddle.reshape(gathered, layers.shape(index_swaped)) x_offset = paddle.transpose(gathered, perm=x_range) x_offset = paddle.reshape( x_offset, (-1, self.in_channel, offset_h, offset_w, self.N)) return x_offset
def forward(self, x): out = self.conv1(x) rp = F.adaptive_max_pool2d(out, (self.s, 1)) cp = F.adaptive_max_pool2d(out, (1, self.s)) p = paddle.reshape(self.conv_p(rp), (x.shape[0], self.k, self.s, self.s)) q = paddle.reshape(self.conv_q(cp), (x.shape[0], self.k, self.s, self.s)) p = F.sigmoid(p) q = F.sigmoid(q) p = p / paddle.sum(p, axis=3, keepdim=True) q = q / paddle.sum(q, axis=2, keepdim=True) p = paddle.reshape(p, (x.shape[0], self.k, 1, self.s, self.s)) p = paddle.expand(p, (x.shape[0], self.k, x.shape[1] // self.k, self.s, self.s)) p = paddle.reshape(p, (x.shape[0], x.shape[1], self.s, self.s)) q = paddle.reshape(q, (x.shape[0], self.k, 1, self.s, self.s)) q = paddle.expand(q, (x.shape[0], self.k, x.shape[1] // self.k, self.s, self.s)) q = paddle.reshape(q, (x.shape[0], x.shape[1], self.s, self.s)) p = self.resize_mat(p, x.shape[2] // self.s) q = self.resize_mat(q, x.shape[2] // self.s) y = paddle.matmul(p, x) y = paddle.matmul(y, q) y = self.conv2(y) return y
def forward(self, keys, querys, mask): """Calculate forward propagation of tacotron2 decoder. Parameters ---------- keys: Tensor[shape=(B, T_key, C)] Batch of the sequences of padded output from encoder. querys: Tensor[shape(B, T_query, C)] Batch of the sequences of padded mel spectrogram. mask: Tensor Mask generated with text length. Shape should be (B, T_key, T_query) or broadcastable shape. Returns ------- mel_output: Tensor [shape=(B, T_query, C)] Output sequence of features. stop_logits: Tensor [shape=(B, T_query)] Output sequence of stop logits. alignments: Tensor [shape=(B, T_query, T_key)] Attention weights. """ querys = paddle.reshape( querys, [querys.shape[0], querys.shape[1] // self.reduction_factor, -1]) querys = paddle.concat([ paddle.zeros(shape=[querys.shape[0], 1, querys.shape[-1]], dtype=querys.dtype), querys ], axis=1) querys = self.prenet(querys) self._initialize_decoder_states(keys) self.mask = mask mel_outputs, stop_logits, alignments = [], [], [] while len(mel_outputs ) < querys.shape[1] - 1: # Ignore the last time step query = querys[:, len(mel_outputs), :] mel_output, stop_logit, attention_weights = self._decode(query) mel_outputs += [mel_output] stop_logits += [stop_logit] alignments += [attention_weights] alignments = paddle.stack(alignments, axis=1) stop_logits = paddle.concat(stop_logits, axis=1) mel_outputs = paddle.stack(mel_outputs, axis=1) return mel_outputs, stop_logits, alignments
def _network(self, hidden, cell, init_actions=None, is_inference=False): actions = [] entropies = [] sample_log_probs = [] with fluid.unique_name.guard('Controller'): self._create_parameter() inputs = self.g_emb for idx in range(len(self.range_tables)): logits, output, states = self._lstm(inputs, hidden, cell, token_idx=idx) hidden, cell = np.squeeze(states) probs = paddle.nn.functional.softmax(logits, axis=1) if is_inference: action = paddle.argmax(probs, axis=1) else: if init_actions: action = paddle.slice(init_actions, axes=[1], starts=[idx], ends=[idx + 1]) action = paddle.squeeze(action, axis=[1]) action.stop_gradient = True else: action = fluid.layers.sampling_id(probs) actions.append(action) log_prob = paddle.nn.functional.softmax_with_cross_entropy( logits, paddle.reshape(action, shape=[paddle.shape(action), 1]), axis=1) sample_log_probs.append(log_prob) entropy = log_prob * paddle.exp(-1 * log_prob) entropy.stop_gradient = True entropies.append(entropy) action_emb = paddle.cast(action, dtype=np.int64) inputs = paddle.static.nn.embedding( action_emb, size=(self.max_range_table, self.hidden_size), param_attr=paddle.ParamAttr( name='emb_w', initializer=uniform_initializer(1.0))) self.sample_log_probs = paddle.concat(sample_log_probs, axis=0) entropies = paddle.stack(entropies) self.sample_entropies = paddle.sum(entropies) return actions
def forward(self, x): n, c, h, w = x.shape # query: n, c, h * w query = paddle.reshape(x, (n, c, h * w)) # key: n, h * w, c key = paddle.reshape(x, (n, c, h * w)) key = paddle.transpose(key, (0, 2, 1)) # sim: n, c, c sim = paddle.bmm(query, key) # The danet author claims that this can avoid gradient divergence sim = paddle.max(sim, axis=-1, keepdim=True).expand_as(sim) - sim sim = F.softmax(sim, axis=-1) # feat: from (n, c, h * w) to (n, c, h, w) value = paddle.reshape(x, (n, c, h * w)) feat = paddle.bmm(sim, value) feat = paddle.reshape(feat, (n, c, h, w)) out = self.gamma * feat + x return out
def forward(self, n2n_g, n_feats, edge_feats, edge_feat_dist): feature = [] for att_l in self.att_layers: feature.append(att_l(n2n_g, n_feats, edge_feats, edge_feat_dist)) feature = paddle.stack(feature, axis=1) if self.merge == "cat": feature = paddle.reshape(feature, [-1, self.num_heads * self.hidden_dim]) if self.merge == "mean": feature = paddle.mean(feature, axis=1) return feature
def rzz_gate_matrix(params): """ RZZ gate :return: """ theta = params re_a = paddle.cos(theta / 2) re_b = paddle.zeros([1], 'float64') im_a = paddle.sin(theta / 2) im_b = paddle.zeros([1], 'float64') re = paddle.reshape( paddle.concat([ re_a, re_b, re_b, re_b, re_b, re_a, re_b, re_b, re_b, re_b, re_a, re_b, re_b, re_b, re_b, re_a ]), [4, 4]) im = paddle.reshape( paddle.concat([ -im_a, im_b, im_b, im_b, im_b, im_a, im_b, im_b, im_b, im_b, im_a, im_b, im_b, im_b, im_b, -im_a ]), [4, 4]) return re + im * paddle.to_tensor([1j], 'complex128')
def forward(self, words, wp): word_embed = self.word_embed(words) mask = words != self.pad_index seq_lens = paddle.sum(paddle.cast(mask, "int32"), axis=-1) x, _ = self.lstm(word_embed, sequence_length=seq_lens) x = paddle.reshape( index_sample(x, wp), shape=[wp.shape[0], wp.shape[1], x.shape[2]], ) words = paddle.index_sample(words, wp) return words, x
def forward(self, input, seq_lens): embedded = self.embedding(input) self.embedded = embedded output, hidden = self.lstm(embedded, sequence_length=paddle.to_tensor( seq_lens, dtype='int32')) encoder_feature = paddle.reshape( output, [-1, 2 * config.hidden_dim]) # B * t_k x 2*hidden_dim encoder_feature = self.W_h(encoder_feature) return output, encoder_feature, hidden
def test_shape_omit_dims(self): for dtype in self._dtypes: x_np = np.random.randn( 2, 3, 4).astype(dtype) + 1j * np.random.randn(2, 3, 4).astype(dtype) shape = (0, -1) shape_ = (2, 12) for place in self._places: with dg.guard(place): x_var = dg.to_variable(x_np) y_var = paddle.reshape(x_var, shape) y_np = y_var.numpy() self.assertTrue(np.allclose(np.reshape(x_np, shape_), y_np))
def valid_flags(self, featmap_size, valid_size): feat_h, feat_w = featmap_size valid_h, valid_w = valid_size assert valid_h <= feat_h and valid_w <= feat_w valid_x = paddle.zeros([feat_w], dtype='int32') valid_y = paddle.zeros([feat_h], dtype='int32') valid_x[:valid_w] = 1 valid_y[:valid_h] = 1 valid_xx, valid_yy = self._meshgrid(valid_x, valid_y) valid = valid_xx & valid_yy valid = paddle.reshape(valid, [-1, 1]) valid = paddle.expand(valid, [-1, self.num_base_anchors]).reshape([-1]) return valid
def forward(self, inputs): left_emb = self.emb(inputs[0]) right_emb = self.emb(inputs[1]) cross = paddle.matmul(left_emb, right_emb, transpose_y=True) cross = paddle.reshape(cross, [-1, 1, cross.shape[1], cross.shape[2]]) conv = self.conv(cross) if self.conv_act == "relu": conv = F.relu(conv) if self.pool_type == "max": pool = F.max_pool2d(conv, kernel_size=self.pool_size, stride=self.pool_stride, padding=self.pool_padding) reshape = paddle.reshape(pool, [ -1, list(pool.shape)[1] * list(pool.shape)[2] * list(pool.shape)[3] ]) hid = self.fc1(reshape) if self.hidden_act == "relu": relu_hid = F.relu(hid) prediction = self.fc2(relu_hid) return prediction
def __split_heads_qkv(queries, keys, values, n_head, d_key, d_value): """ Reshape input tensors at the last dimension to split multi-heads and then transpose. Specifically, transform the input tensor with shape [bs, max_sequence_length, n_head * hidden_dim] to the output tensor with shape [bs, n_head, max_sequence_length, hidden_dim]. """ # The value 0 in shape attr means copying the corresponding dimension # size of the input as the output dimension size. reshaped_q = paddle.reshape(x=queries, shape=[0, 0, n_head, d_key]) # permuate the dimensions into: # [batch_size, n_head, max_sequence_len, hidden_size_per_head] q = paddle.transpose(x=reshaped_q, perm=[0, 2, 1, 3]) # For encoder-decoder attention in inference, insert the ops and vars # into global block to use as cache among beam search. reshaped_k = paddle.reshape(x=keys, shape=[0, 0, n_head, d_key]) k = paddle.transpose(x=reshaped_k, perm=[0, 2, 1, 3]) reshaped_v = paddle.reshape(x=values, shape=[0, 0, n_head, d_value]) v = paddle.transpose(x=reshaped_v, perm=[0, 2, 1, 3]) return q, k, v
def test(model): model.eval() avg_acc = [[], []] for batch_id, data in enumerate(test_reader): img = paddle.to_tensor(data[0]) img = paddle.reshape(img, [-1, 1, 28, 28]) label = paddle.to_tensor(data[1]) label = paddle.reshape(label, [-1, 1]) out = model(img) acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1) acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5) avg_acc[0].append(acc_top1.numpy()) avg_acc[1].append(acc_top5.numpy()) if batch_id % 100 == 0: _logger.info( "Test | step {}: acc1 = {:}, acc5 = {:}".format( batch_id, acc_top1.numpy(), acc_top5.numpy())) _logger.info("Test | Average: acc_top1 {}, acc_top5 {}".format( np.mean(avg_acc[0]), np.mean(avg_acc[1]))) return np.mean(avg_acc[0]), np.mean(avg_acc[1])
def forward(self, hidden_states, masked_positions=None): if masked_positions is not None: hidden_states = paddle.reshape(hidden_states, [-1, hidden_states.shape[-1]]) hidden_states = paddle.tensor.gather(hidden_states, masked_positions) hidden_states = self.transform(hidden_states) hidden_states = self.activation(hidden_states) hidden_states = self.layer_norm(hidden_states) logits = paddle.tensor.matmul(hidden_states, self.decoder_weight, transpose_y=True) + self.decoder_bias return logits
def __combine_heads(x): """ Transpose and then reshape the last two dimensions of inpunt tensor x so that it becomes one dimension, which is reverse to __split_heads. """ if len(x.shape) != 4: raise ValueError("Input(x) should be a 4-D Tensor.") trans_x = paddle.transpose(x, perm=[0, 2, 1, 3]) # The value 0 in shape attr means copying the corresponding dimension # size of the input as the output dimension size. return paddle.reshape( x=trans_x, shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]])
def forward(self, inputs): with paddle.static.amp.fp16_guard(): if self.data_format == "NHWC": inputs = paddle.tensor.transpose(inputs, [0, 2, 3, 1]) inputs.stop_gradient = True y = self.conv(inputs) y = self.pool2d_max(y) for block in self.block_list: y = block(y) y = self.pool2d_avg(y) y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) y = self.out(y) return y
def test(): model.eval() accuracies = [] losses = [] for (x, y) in val_loader: with paddle.no_grad(): logits = model(x) y = paddle.reshape(y, (-1, 1)) loss = loss_fn(logits, y) acc = acc_fn(logits, y) accuracies.append(np.mean(acc.numpy())) losses.append(np.mean(loss.numpy())) return np.mean(accuracies) * 100, np.mean(losses)
def forward(self, inputs): y = self.conv1_1(inputs) y = self.conv1_2(y) y = self.conv1_3(y) y = self.pool2d_max(y) att = None y = {0: y, 1: att} for block in self.block_list: y = block(y) y = self.pool2d_avg(y[0]) y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) y = self.out(y) return y
def forward(self, x): x = self.inception_stem(x) for inception_block in self.inception_block_list: x = inception_block(x) if self.with_pool: x = self.avg_pool(x) if self.num_classes > 0: x = paddle.reshape(x, shape=[-1, 2048]) x = self.dropout(x) x = self.fc(x) return x