def test_index_select_api(self): self.input_data() # case 1: with program_guard(Program(), Program()): x = fluid.layers.data(name='x', shape=[-1, 4]) index = fluid.layers.data( name='index', shape=[3], dtype='int32', append_batch_size=False) z = paddle.index_select(x, index, dim=1) exe = fluid.Executor(fluid.CPUPlace()) res, = exe.run(feed={'x': self.data_x, 'index': self.data_index}, fetch_list=[z.name], return_numpy=False) expect_out = np.array([[1.0, 2.0, 2.0], [5.0, 6.0, 6.0], [9.0, 10.0, 10.0]]) self.assertTrue(np.allclose(expect_out, np.array(res))) # case 2: with program_guard(Program(), Program()): x = fluid.layers.data(name='x', shape=[-1, 4]) index = fluid.layers.data( name='index', shape=[3], dtype='int32', append_batch_size=False) z = paddle.index_select(x, index) exe = fluid.Executor(fluid.CPUPlace()) res, = exe.run(feed={'x': self.data_x, 'index': self.data_index}, fetch_list=[z.name], return_numpy=False) expect_out = np.array( [[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [5.0, 6.0, 7.0, 8.0]]) self.assertTrue(np.allclose(expect_out, np.array(res)))
def expand_inputs_for_generation(input_ids, expand_size, attention_mask=None, **model_kwargs): index = paddle.tile( paddle.arange(input_ids.shape[0]).unsqueeze(-1), [1, expand_size]).reshape([-1]) input_ids = paddle.index_select(input_ids, index) if attention_mask is not None: model_kwargs["attention_mask"] = paddle.index_select( attention_mask, index) if "token_type_ids" in model_kwargs: token_type_ids = model_kwargs["token_type_ids"] model_kwargs["token_type_ids"] = paddle.index_select( token_type_ids, index) if "position_ids" in model_kwargs: position_ids = model_kwargs["position_ids"] model_kwargs["position_ids"] = paddle.index_select( position_ids, index) return input_ids, model_kwargs
def reorder_neuron(layer, index, dim=0): """ Reorder feed-forward weights according index. Args: layer(paddle.nn.Layer): the instance of `paddle.nn.Linear` layer. index(list): the sort indices of feed-forward. dim(int): select weights according to the dim. """ linearLayer = layer.fn if hasattr(layer, 'fn') else layer W = paddle.index_select(linearLayer.weight, index, axis=dim).detach() if linearLayer.bias is not None: if dim == 0: b = paddle.assign(linearLayer.bias).detach() else: b = paddle.assign( paddle.index_select(linearLayer.bias, index, axis=0)).detach() linearLayer.weight.stop_gradient = True linearLayer.weight.set_value(W) linearLayer.weight.stop_gradient = False if linearLayer.bias is not None: linearLayer.bias.stop_gradient = True linearLayer.bias.set_value(b) linearLayer.bias.stop_gradient = False
def test(model, feats, labels, train_nid, val_nid, test_nid, evaluator, batch_size, history=None): model.eval() num_nodes = labels.shape[0] dataset = trainNid(np.arange(num_nodes).reshape(-1, 1)) dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, drop_last=False) scores = [] labels = paddle.to_tensor(labels, dtype='int64') for batch in dataloader: batch = batch[0].reshape([1, -1])[0] batch_feats = [paddle.index_select(x, batch) for x in feats] if history is not None: # Train aggregator partially using history batch_feats = (batch_feats, [paddle.index_select(x, batch) for x in history]) pred = model(batch_feats) scores.append(evaluator(pred, paddle.index_select(labels, batch))) # For each evaluation metric, concat along node dimension metrics = [paddle.concat(s, axis=0) for s in zip(*scores)][0] train_res = compute_mean(metrics, train_nid) val_res = compute_mean(metrics, val_nid) test_res = compute_mean(metrics, test_nid) return train_res, val_res, test_res
def getEmbedding(self, users, pos_items, neg_items): users_emb = self.embedding_user.weight items_emb = self.embedding_item.weight all_users, all_items = self.gcn(self.Graph, users_emb, items_emb) users_emb = paddle.index_select(all_users, users) pos_emb = paddle.index_select(all_items, pos_items) neg_emb = paddle.index_select(all_items, neg_items) return users_emb, pos_emb, neg_emb
def _selected_pixel(ref_labels_flat, ref_emb_flat): index_list = paddle.arange(len(ref_labels_flat)) index_list = index_list index_ = paddle.masked_select(index_list, ref_labels_flat != -1) index_ = long_(index_) ref_labels_flat = paddle.index_select(ref_labels_flat, index_, 0) ref_emb_flat = paddle.index_select(ref_emb_flat, index_, 0) return ref_labels_flat, ref_emb_flat
def forward(self, users, items): users_emb = self.embedding_user.weight items_emb = self.embedding_item.weight all_users, all_items = self.lgn(self.Graph, users_emb, items_emb) users_emb = paddle.index_select(all_users, users) items_emb = paddle.index_select(all_items, items) inner_pro = paddle.multiply(users_emb, items_emb) gamma = paddle.sum(inner_pro, axis=1) return gamma
def data_iter(batch_size, features, labels): num_examples = len(features) indices = list(range(num_examples)) random.shuffle(indices) # 打乱顺序 for i in range(0, num_examples, batch_size): j = paddle.to_tensor(indices[i:min(i + batch_size, num_examples)], dtype='int64') yield paddle.index_select(features, axis=0, index=j), paddle.index_select(labels, axis=0, index=j)
def forward(self, users, items): users_emb = self.embedding_user.weight items_emb = self.embedding_item.weight all_users, all_items = self.lightgcn(self.Graph, users_emb, items_emb) users_emb = paddle.index_select(all_users, users) items_emb = paddle.index_select(all_items, items) # users_emb = paddle.to_tensor(all_users.numpy()[users.numpy()]) # items_emb = paddle.to_tensor(all_items.numpy()[items.numpy()]) inner_pro = paddle.multiply(users_emb, items_emb) gamma = paddle.sum(inner_pro, axis=1) return gamma
def compute_rot_loss(output, target_bin, target_res, mask): # output: (B, 128, 8) [bin1_cls[0], bin1_cls[1], bin1_sin, bin1_cos, # bin2_cls[0], bin2_cls[1], bin2_sin, bin2_cos] # target_bin: (B, 128, 2) [bin1_cls, bin2_cls] # target_res: (B, 128, 2) [bin1_res, bin2_res] # mask: (B, 128, 1) # import pdb; pdb.set_trace() # output = output.view(-1, 8) # target_bin = target_bin.view(-1, 2) # target_res = target_res.view(-1, 2) # mask = mask.view(-1, 1) output = output.reshape(-1, 8) target_bin = target_bin.reshape(-1, 2) target_res = target_res.reshape(-1, 2) mask = mask.reshape(-1, 1) loss_bin1 = compute_bin_loss(output[:, 0:2], target_bin[:, 0], mask) loss_bin2 = compute_bin_loss(output[:, 4:6], target_bin[:, 1], mask) # loss_res = torch.zeros_like(loss_bin1) loss_res = paddle.zeros_like(loss_bin1, dtype='float32') if target_bin[:, 0].nonzero().shape[0] > 0: idx1 = target_bin[:, 0].nonzero()[:, 0] # valid_output1 = torch.index_select(output, 0, idx1.long()) valid_output1 = paddle.index_select(output, idx1.cast('int32'), 0) # valid_target_res1 = torch.index_select(target_res, 0, idx1.long()) valid_target_res1 = paddle.index_select(target_res, idx1.cast('int32'), 0) # loss_sin1 = compute_res_loss( # valid_output1[:, 2], torch.sin(valid_target_res1[:, 0])) # loss_cos1 = compute_res_loss( # valid_output1[:, 3], torch.cos(valid_target_res1[:, 0])) loss_sin1 = compute_res_loss(valid_output1[:, 2], paddle.sin(valid_target_res1[:, 0])) loss_cos1 = compute_res_loss(valid_output1[:, 3], paddle.cos(valid_target_res1[:, 0])) loss_res += loss_sin1 + loss_cos1 if target_bin[:, 1].nonzero().shape[0] > 0: idx2 = target_bin[:, 1].nonzero()[:, 0] # valid_output2 = torch.index_select(output, 0, idx2.long()) # valid_target_res2 = torch.index_select(target_res, 0, idx2.long()) valid_output2 = paddle.index_select(output, idx2.cast('int32'), 0) valid_target_res2 = paddle.index_select(target_res, idx2.cast('int32'), 0) # loss_sin2 = compute_res_loss( # valid_output2[:, 6], torch.sin(valid_target_res2[:, 1])) # loss_cos2 = compute_res_loss( # valid_output2[:, 7], torch.cos(valid_target_res2[:, 1])) loss_sin2 = compute_res_loss(valid_output2[:, 6], paddle.sin(valid_target_res2[:, 1])) loss_cos2 = compute_res_loss(valid_output2[:, 7], paddle.cos(valid_target_res2[:, 1])) loss_res += loss_sin2 + loss_cos2 return loss_bin1 + loss_bin2 + loss_res
def mixup_data(x, y, alpha=1.0): '''Returns mixed inputs, pairs of targets, and lambda''' if alpha > 0: lam = np.random.beta(alpha, alpha) else: lam = 1 batch_size = x.shape[0] index = paddle.randperm(batch_size) mixed_x = lam * x + (1 - lam) * paddle.index_select(x, index) #xb# x[index, :] y_a, y_b = y, paddle.index_select(y, index) #paddle.concat([y[int(i):int(i+1)] for i in index])# y[index] mixed_target = (y_a, y_b, lam) return mixed_x, mixed_target
def forward(self, node_repr, bond_length_index, bond_length, mask): node_i, node_j = bond_length_index node_i_repr = paddle.index_select(node_repr, node_i) node_j_repr = paddle.index_select(node_repr, node_j) node_ij_repr = paddle.concat([node_i_repr, node_j_repr], 1) bond_length_pred = self.bond_length_pred_linear(node_ij_repr) bond_length_pred = paddle.masked_select(bond_length_pred, mask) bond_length_pred = paddle.reshape(bond_length_pred, (-1, )) bond_length = paddle.masked_select(bond_length, mask) bond_length = paddle.reshape(bond_length, (-1, )) loss = self.loss(bond_length_pred, bond_length) loss = paddle.mean(loss) return loss
def prepare_inputs_for_generation(self, decoder_input_ids, attention_mask=None, encoder_output=None, use_cache=True, cache=None, **kwargs): if encoder_output is not None: expand_size = int(decoder_input_ids.shape[0] / encoder_output.shape[0]) if expand_size > 1: index = paddle.tile( paddle.arange(encoder_output.shape[0]).unsqueeze(-1), [1, expand_size]).reshape([-1]) encoder_output = paddle.index_select(encoder_output, index) if use_cache and cache is None: if encoder_output is None: raise ValueError( "Encoder output can not be none if `use_cache` is True") cache = self.decoder.decoder.gen_cache(memory=encoder_output) if cache is not None: decoder_input_ids = decoder_input_ids[:, -1:] return { "input_ids": None, # during prediction, Encoder_output is provided, do not need input_ids. "decoder_input_ids": decoder_input_ids, "encoder_output": encoder_output, "attention_mask": attention_mask, "use_cache": use_cache, "cache": cache }
def prepare_inputs_for_generation(self, decoder_input_ids, attention_mask=None, encoder_output=None, use_cache=True, cache=None, **kwargs): """ Prepare inputs for decoder to generate sentences. Return: dict: A dictionary containing necessary inputs for generating next token. """ if encoder_output is not None: expand_size = int(decoder_input_ids.shape[0] / encoder_output.shape[0]) if expand_size > 1: index = paddle.tile( paddle.arange(encoder_output.shape[0]).unsqueeze(-1), [1, expand_size]).reshape([-1]) encoder_output = paddle.index_select(encoder_output, index) if cache is not None: decoder_input_ids = decoder_input_ids[:, -1:] return { "input_ids": None, # during prediction, Encoder_output is provided, do not need input_ids. "decoder_input_ids": decoder_input_ids, "encoder_output": encoder_output, "attention_mask": attention_mask, "use_cache": use_cache, "cache": cache }
def forward(self, x, mask=None): """ Args: x: input features with shape of (num_windows*B, N, C) mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None """ B_, N, C = x.shape qkv = self.qkv(x).reshape([B_, N, 3, self.num_heads, C // self.num_heads]).transpose([2, 0, 3, 1, 4]) q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple) q = q * self.scale attn = q @ swapdim(k ,-2, -1) relative_position_bias = paddle.index_select(self.relative_position_bias_table, self.relative_position_index.reshape((-1,)),axis=0).reshape((self.window_size[0] * self.window_size[1],self.window_size[0] * self.window_size[1], -1)) relative_position_bias = relative_position_bias.transpose([2, 0, 1]) # nH, Wh*Ww, Wh*Ww attn = attn + relative_position_bias.unsqueeze(0) if mask is not None: nW = mask.shape[0] attn = attn.reshape([B_ // nW, nW, self.num_heads, N, N]) + mask.unsqueeze(1).unsqueeze(0) attn = attn.reshape([-1, self.num_heads, N, N]) attn = self.softmax(attn) else: attn = self.softmax(attn) attn = self.attn_drop(attn) x = swapdim((attn @ v),1, 2).reshape([B_, N, C]) x = self.proj(x) x = self.proj_drop(x) return x
def forward(self, x): x = paddle.index_select(x, self.index, axis=1) x = self.norm(x) x = self.activation(x) x = self.conv(x) x = ShuffleLayer(x, self.groups) return x
def _batch_shuffle_ddp(self, x): """ Batch shuffle, for making use of BatchNorm. *** Only support DistributedDataParallel (DDP) model. *** """ # gather from all gpus batch_size_this = x.shape[0] x_gather = concat_all_gather(x) batch_size_all = x_gather.shape[0] num_gpus = batch_size_all // batch_size_this # random shuffle index idx_shuffle = paddle.randperm(batch_size_all).cuda() # broadcast to all gpus if paddle.distributed.get_world_size() > 1: paddle.distributed.broadcast(idx_shuffle, src=0) # index for restoring idx_unshuffle = paddle.argsort(idx_shuffle) # shuffled index for this gpu gpu_idx = paddle.distributed.get_rank() idx_this = idx_shuffle.reshape([num_gpus, -1])[gpu_idx] return paddle.index_select(x_gather, idx_this), idx_unshuffle
def __call__(self, x, index): if self.dim < 0: self.dim += len(x.shape) x_range = list(range(len(x.shape))) x_range[0] = self.dim x_range[self.dim] = 0 x_swaped = paddle.transpose(x, perm=x_range) index_range = list(range(len(index.shape))) index_range[0] = self.dim index_range[self.dim] = 0 index_swaped = paddle.transpose(index, perm=index_range) dtype = index.dtype x_shape = paddle.shape(x_swaped) index_shape = paddle.shape(index_swaped) prod = paddle.cast(paddle.prod(x_shape), dtype=dtype) / x_shape[0] x_swaped_flattend = paddle.flatten(x_swaped) index_swaped_flattend = paddle.flatten(index_swaped) index_swaped_flattend *= prod bias = paddle.arange(start=0, end=prod, dtype=dtype) bias = paddle.reshape(bias, x_shape[1:]) bias = paddle.crop(bias, index_shape[1:]) bias = paddle.flatten(bias) bias = paddle.tile(bias, [index_shape[0]]) index_swaped_flattend += bias gathered = paddle.index_select(x_swaped_flattend, index_swaped_flattend) gathered = paddle.reshape(gathered, index_swaped.shape) out = paddle.transpose(gathered, perm=x_range) return out
def forward(self, inputs): """ forward """ x = paddle.index_select(inputs, index=paddle.to_tensor([1, 2]), axis=self.axis) return x
def reorder_head_matrix(linearLayer, index, dim=1): W = paddle.index_select(linearLayer.weight, index, axis=dim).detach() if linearLayer.bias is not None: if dim == 0: b = paddle.assign(linearLayer.bias).detach() else: b = paddle.assign( paddle.index_select(linearLayer.bias, index, axis=0)).detach() linearLayer.weight.stop_gradient = True linearLayer.weight.set_value(W) linearLayer.weight.stop_gradient = False if linearLayer.bias is not None: linearLayer.bias.stop_gradient = True linearLayer.bias.set_value(b) linearLayer.bias.stop_gradient = False
def index_select_ND(source, dim, index): """Return nodes for index""" index_size = index.shape suffix_dim = source.shape[1:] final_size = index_size + suffix_dim target = paddle.index_select(x=source, axis=dim, index=paddle.reshape(index, shape=[-1])) return target.reshape(final_size)
def forward(self, inputs, bc_index): inputs.stop_gradient = False outputs = self.net.nn_func(inputs) # eq_loss hes = Hessian(self.net.nn_func, inputs, is_batched=True) eq_loss = paddle.norm(hes[:, 0, 0] + hes[:, 1, 1], p=2) # bc_loss bc_u = paddle.index_select(outputs, bc_index) return eq_loss, bc_u
def train(model, feats, labels, train_nid, loss_fcn, optimizer, batch_size, history=None): model.train() paddle.set_device('cpu') train_nid_list = [] for x in train_nid: train_nid_temp = [] train_nid_temp.append(x) train_nid_list.append(train_nid_temp) train_nid = np.array(train_nid_list) train_nid_data = trainNid(train_nid) # print(feats) from tqdm import tqdm dataloader = DataLoader(train_nid_data, batch_size=batch_size, shuffle=True, drop_last=False) pbar = tqdm(dataloader) losses = [] labels = paddle.to_tensor(labels, dtype='int64') for batch in pbar: batch = batch[0].reshape([1, -1])[0] batch_feats = [paddle.index_select(x, batch).cuda() for x in feats] # batch_feats = [x[batch] for x in feats] if history is not None: # Train aggregator partially using history batch_feats = (batch_feats, [ paddle.index_select(x, batch).cuda() for x in history ]) loss = loss_fcn(model(batch_feats), paddle.index_select(labels, batch)) losses.append(loss.cpu().numpy()) pbar.set_description(f'loss:{np.mean(losses):.3f}') optimizer.clear_grad() loss.backward() optimizer.step()
def get_points_train(self, seg_logits, uncertainty_func): # finish """ Sample points for training. Sample points in [0, 1] x [0, 1] coordinate space based on their uncertainty. The uncertainties are calculated for each point using 'uncertainty_func' function that takes point's logit prediction as input. Args: seg_logits (Tensor): Semantic segmentation logits, shape ( batch_size, num_classes, height, width). uncertainty_func (func): uncertainty calculation function. cfg (dict): Training config of point head. Returns: point_coords (Tensor): A tensor of shape (batch_size, num_points, 2) that contains the coordinates of ``num_points`` sampled points. """ num_points = self.num_points oversample_ratio = self.oversample_ratio importance_sample_ratio = self.importance_sample_ratio assert oversample_ratio >= 1 assert 0 <= importance_sample_ratio <= 1 batch_size = paddle.shape(seg_logits)[0] num_sampled = int(num_points * oversample_ratio) point_coords = paddle.rand([batch_size, num_sampled, 2]) point_logits = point_sample(seg_logits, point_coords) # It is crucial to calculate uncertainty based on the sampled # prediction value for the points. Calculating uncertainties of the # coarse predictions first and sampling them for points leads to # incorrect results. To illustrate this: assume uncertainty func( # logits)=-abs(logits), a sampled point between two coarse # predictions with -1 and 1 logits has 0 logits, and therefore 0 # uncertainty value. However, if we calculate uncertainties for the # coarse predictions first, both will have -1 uncertainty, # and sampled point will get -1 uncertainty. point_uncertainties = uncertainty_func(point_logits) num_uncertain_points = int(importance_sample_ratio * num_points) num_random_points = num_points - num_uncertain_points idx = paddle.topk(point_uncertainties[:, 0, :], k=num_uncertain_points, axis=1)[1] shift = num_sampled * paddle.arange(batch_size, dtype='int64') idx += shift.unsqueeze([-1]) idx = idx.reshape([-1]) point_coords = paddle.index_select(point_coords.reshape([-1, 2]), idx, axis=0) point_coords = point_coords.reshape( [batch_size, num_uncertain_points, 2]) if num_random_points > 0: rand_point_coords = paddle.rand([batch_size, num_random_points, 2]) point_coords = paddle.concat((point_coords, rand_point_coords), axis=1) return point_coords
def kdconv(x, dim, featdim, select, conv): x = F.relu(conv(x)) x = paddle.reshape(x, (-1, featdim, 3, dim)) x = paddle.reshape(x, (-1, featdim, 3 * dim)) select = paddle.to_tensor(select) + (paddle.arange(0, dim) * 3) x = paddle.index_select(x, axis=2, index=select) x = paddle.reshape(x, (-1, featdim, int(dim / 2), 2)) x = paddle.max(x, axis=-1) return x
def mixup_data(x, y, alpha=1.0): """Mix the input data and label using mixup strategy, returns mixed inputs, pairs of targets, and lambda Reference: Zhang, Hongyi, et al. “Mixup: Beyond Empirical Risk Minimization.” International Conference on Learning Representations, 2017. """ if alpha > 0: lam = np.random.beta(alpha, alpha) else: lam = 1 batch_size = x.shape[0] index = paddle.randperm(batch_size) mixed_x = lam * x + (1 - lam) * paddle.index_select(x, index) y_a, y_b = y, paddle.index_select(y, index) mixed_target = (y_a, y_b, lam) return mixed_x, mixed_target
def forward(self, user, item): user, item = paddle.to_tensor(user), paddle.to_tensor(item) self.init_nodes_feature() user_embedding_from_consumed_items = self.infomation_gcn_layer(paddle.concat([self.init_user_feature, self.init_item_feature], axis=0))[: self.conf['num_users']] first_gcn_user_embedding = self.social_gcn_layer(self.init_user_feature) second_gcn_user_embedding = self.social_gcn_layer(self.init_user_feature) # get the item embedding final_item_embed = paddle.index_select(self.item_embedding.weight + self.init_item_feature, item) final_user_embed = paddle.index_select(user_embedding_from_consumed_items + second_gcn_user_embedding, user) # predict ratings from user to item prediction = paddle.nn.functional.sigmoid(paddle.sum(final_user_embed * final_item_embed, axis=1, keepdim=True)) return prediction
def forward(self, node_repr, bond_angle_index, bond_angle, mask): node_i, node_j, node_k = bond_angle_index node_i_repr = paddle.index_select(node_repr, node_i) node_j_repr = paddle.index_select(node_repr, node_j) node_k_repr = paddle.index_select(node_repr, node_k) node_ijk_repr = paddle.concat([node_i_repr, node_j_repr, node_k_repr], axis=1) bond_angle_pred = self.bond_angle_pred_linear(node_ijk_repr) bond_angle_pred = paddle.masked_select(bond_angle_pred, mask) bond_angle_pred = paddle.reshape(bond_angle_pred, [ -1, ]) bond_angle = paddle.masked_select(bond_angle, mask) bond_angle = paddle.reshape(bond_angle, [ -1, ]) loss = self.loss(bond_angle_pred, bond_angle) loss = paddle.mean(loss) return loss
def rel_shift_bnij(x, klen=-1): # Relative shift of the attention matrix from bd~ to bd (refer to Appendix B in the Transformer-XL paper) x_size = x.shape x = paddle.reshape(x, [x_size[0], x_size[1], x_size[3], x_size[2]]) x = x[:, :, 1:, :] x = paddle.reshape(x, [x_size[0], x_size[1], x_size[2], x_size[3] - 1]) x = paddle.index_select(x, index=paddle.arange(klen, dtype='int64'), axis=3) return x
def kdconv(x, shortcut, dim, featdim, select, convbnrelu): shortcut.append(x) x = convbnrelu(x) x = paddle.reshape(x, (-1, featdim, 3, dim)) x = paddle.reshape(x, (-1, featdim, 3 * dim)) select = paddle.to_tensor(select) + (paddle.arange(0, dim) * 3) x = paddle.index_select(x, axis=2, index=select) x = paddle.reshape(x, (-1, featdim, int(dim / 2), 2)) x = paddle.max(x, axis=-1) return x, shortcut