def message(self, x_i, x_j, edge_index_i, size_i, return_attention_weights): # Compute attention coefficients. if return_attention_weights is False: return x_j x_i = x_i.view(-1, self.heads, self.out_channels) x_j = x_j.view(-1, self.heads, self.out_channels) alpha = (x_i * self.att_i).sum(-1) + (x_j * self.att_j).sum(-1) alpha = F.leaky_relu(alpha, self.negative_slope) alpha = 2 * softmax(alpha, edge_index_i, num_nodes=size_i) - 1 if return_attention_weights: self.__alpha__ = alpha # Sample attention coefficients stochastically. alpha = F.dropout(alpha, p=self.dropout, training=self.training) return (x_j * alpha.view(-1, self.heads, 1)).squeeze()
def message(self, x_i, x_j, edge_attr, edge_index_i, size_i): """ Arguments: x_i has shape [num_edges, node_feat_size] x_j has shape [num_edges, node_feat_size] edge_attr has shape [num_edges, edge_feat_size] Returns: tensor of shape [num_edges, num_heads, hidden_dim] """ alpha = self.att(torch.cat([x_i, x_j, edge_attr], dim=-1)) alpha = softmax(alpha, edge_index_i, size_i) alpha = self.dropout(alpha).view(-1, self.num_heads, 1) value = self.value(torch.cat([x_j, edge_attr], dim=-1)).view( -1, self.num_heads, self.hidden_dim ) return value * alpha
def message(self, edge_index_i, x_i, x_j, size_i): # Compute attention coefficients. x_j = x_j.view(-1, self.heads, self.out_channels) pdb.set_trace() # ones = torch.ones(edge_index_i.size(), dtype=torch.LongTensor) if x_i is None: #self-update alpha = (x_j * self.att[:, :, self.out_channels:]).sum(dim=-1) else: #neighboring nodes x_i = x_i.view(-1, self.heads, self.out_channels) alpha = (torch.cat([x_i, x_j], dim=-1) * self.att).sum(dim=-1) alpha = F.leaky_relu(alpha, self.negative_slope) alpha = softmax(alpha, edge_index_i, size_i) # Sample attention coefficients stochastically. alpha = F.dropout(alpha, p=self.dropout, training=self.training) return x_j * alpha.view(-1, self.heads, 1)
def match(self, edge_index_i, x_i, x_j, size_i): #x_j = x_j.view(-1, 1, self.out_channels) #alpha = torch.dot(x_i, x_j) #print(edge_index_i.size()) #print(x_i.size(),x_j.size()) alpha = torch.sum(x_i * x_j, dim=1) #alpha=torch.bmm(x_i.unsqueeze(1), x_j.unsqueeze(2)) #print(alpha.size()) size_i = x_i.size(0) alpha = softmax(alpha, edge_index_i, size_i) #print(alpha.size()) '''c = torch.ones(A, B) * 2 v = torch.randn(A, B, C) print(c) print(v) print(c[:,:, None].size()) d = c[:,:, None] * v''' return alpha[:, None] * x_j
def message(self, x_i: Tensor, x_j: Tensor, edge_type: Tensor, index: Tensor, edge_index_i: Tensor, edge_index_j: Tensor, ptr: OptTensor, size_i: Optional[int]) -> Tensor: x_i = x_i * self.lin_l[edge_type] # edge, head, channel x_j = x_j * self.lin_r[edge_type] # edge, head, channel alpha_i = (x_i * self.att_l[edge_type]).sum(-1) # edge, head alpha_j = (x_j * self.att_r[edge_type]).sum(-1) alpha = alpha_i + alpha_j # edge, head relative_index = torch.FloatTensor((edge_index_j - edge_index_i).cpu().numpy()).to(self.device).unsqueeze(-1) if self.encoding == "relational" or self.encoding == "multi": positional_encodings = self.encoding_layer_weight[edge_type] * relative_index + self.encoding_layer_bias[edge_type] # edge, 1 alpha += positional_encodings elif self.encoding == "relative": positional_encodings = self.encoding_layer_weight * relative_index + self.encoding_layer_bias # edge, 1 alpha += positional_encodings alpha = F.leaky_relu(alpha, self.negative_slope) alpha = softmax(alpha, index, ptr, size_i) self._alpha = alpha return x_j * alpha.unsqueeze(-1) # edge, head, channel
def forward(self, x, batch): """""" batch_size = batch.max().item() + 1 h = (x.new_zeros((self.num_layers, batch_size, self.in_channels)), x.new_zeros((self.num_layers, batch_size, self.in_channels))) q_star = x.new_zeros(batch_size, self.out_channels) for i in range(self.processing_steps): q, h = self.lstm(q_star.unsqueeze(0), h) q = q.view(batch_size, self.in_channels) e = (x * q[batch]).sum(dim=-1, keepdim=True) a = softmax(e, batch, num_nodes=batch_size) r = scatter_add(a * x, batch, dim=0, dim_size=batch_size) q_star = torch.cat([q, r], dim=-1) # print('max {:4f}, mean: {:.4f} var: {:.4f}'.format(a.max(), a.mean(), a.std())) return q_star
def message(self, x_j: Tensor, x_i: Tensor, edge_attr: OptTensor, index: Tensor, ptr: OptTensor, size_i: Optional[int]) -> Tensor: x = x_i + x_j if edge_attr is not None: if edge_attr.dim() == 1: edge_attr = edge_attr.view(-1, 1) assert self.lin_edge is not None edge_attr = self.lin_edge(edge_attr) edge_attr = edge_attr.view(-1, self.heads, self.out_channels) x += edge_attr x = F.leaky_relu(x, self.negative_slope) alpha = (x * self.att).sum(dim=-1) alpha = softmax(alpha, index, ptr, size_i) self._alpha = alpha alpha = F.dropout(alpha, p=self.dropout, training=self.training) return x_j * alpha.unsqueeze(-1)
def message(self, edge_index_i, x_i, x_j, norm, size_i, edge_feature, task_emb): if self.msg_direction == 'both': x_j = torch.cat((x_i, x_j, edge_feature), dim=-1) else: x_j = torch.cat((x_j, edge_feature), dim=-1) x_j = self.linear_value(x_j) x_j = x_j.view(-1, self.heads, self.head_channels) if task_emb is not None: task_emb = task_emb.view(1, 1, self.task_channels) alpha = (x_j * self.att_msg).sum(-1) + (task_emb * self.att_task).sum(-1) else: alpha = (x_j * self.att_msg).sum(-1) alpha = F.leaky_relu(alpha, self.negative_slope) alpha = softmax(alpha, edge_index_i, size_i) alpha = alpha.view(-1, self.heads, 1) return norm.view(-1, 1) * x_j * alpha if norm is not None else x_j * alpha
def message(self, edge_index_i, x_i, x_j, size_i): # Compute attention coefficients. x_j = x_j.view(-1, self.heads, self.out_channels) if x_i is None: alpha = (x_j * self.att[:, :, self.out_channels:]).sum(dim=-1) else: x_i = x_i.view(-1, self.heads, self.out_channels) alpha = (torch.cat([x_i, x_j], dim=-1) * self.att).sum(dim=-1) alpha = F.leaky_relu(alpha, self.negative_slope) alpha = softmax(alpha, edge_index_i, size_i) if self.__save_att__: self.__alpha__ = alpha # Sample attention coefficients stochastically. alpha = F.dropout(alpha, p=self.dropout, training=self.training) return x_j * alpha.view(-1, self.heads, 1)
def forward(self, H, batch, model_name='GMGCN', size=None): size = batch[-1].item() + 1 if size is None else size x = torch.tanh(torch.mm(self.w1, torch.transpose(H, 1, 0))) x = torch.mm(self.w2, x) S = softmax(torch.transpose(x, 1, 0), batch) if model_name == 'ATTGCN': fin_x1 = scatter_add(S[:, 0].view(-1, 1) * H, batch, dim=0, dim_size=size) fin_x2 = scatter_add(S[:, 1].view(-1, 1) * H, batch, dim=0, dim_size=size) fin_x = torch.cat((fin_x1, fin_x2), 1) return fin_x, S else: fin_x = scatter_add(S * H, batch, dim=0, dim_size=size) return fin_x
def message(self, edge_index_i, x_i, x_j, size_i, edge_index, size): # Compute attention coefficients. x_i = x_i.view(-1, self.out_channels) x_j = x_j.view(-1, self.out_channels) inner_product = torch.mul(x_i, F.leaky_relu(x_j)).sum(dim=-1) # gate row, col = edge_index deg = degree(row, size[0], dtype=x_i.dtype) deg_inv_sqrt = deg[row].pow(-0.5) tmp = torch.mul(deg_inv_sqrt, inner_product) gate_w = torch.sigmoid(tmp) # gate_w = F.dropout(gate_w, p=self.dropout, training=self.training) # attention tmp = torch.mul(inner_product, gate_w) attention_w = softmax(tmp, edge_index_i, size_i) #attention_w = F.dropout(attention_w, p=self.dropout, training=self.training) return torch.mul(x_j, attention_w.view(-1, 1))
def message(self, x_i, x_j, edge_index_i, size_i, edge_weight, return_attention_weights): # Compute attention coefficients. x_i = x_i.view(-1, self.heads, self.out_channels) x_j = x_j.view(-1, self.heads, self.out_channels) alpha = (x_i * self.att_i).sum(-1) + (x_j * self.att_j).sum(-1) alpha = F.leaky_relu(alpha, self.negative_slope) alpha = softmax(alpha, edge_index_i, size_i) # alpha = F.tanh(alpha) alpha = alpha if return_attention_weights: self.__alpha__ = alpha # Sample attention coefficients stochastically. alpha = F.dropout(alpha, p=self.dropout, training=self.training) return x_j * alpha.view(-1, self.heads, 1)
def message(self, edge_index_i, x_i, x_j, size_i, num_nodes, edge_attr): # Compute attention coefficients # N.B - only modification is the attention is now computed with the edge attributes x_j = x_j.view(-1, self.heads, self.out_channels) if x_i is None: alpha = (x_j * self.att[:, :, self.out_channels :]).sum(dim=-1) else: x_i = x_i.view(-1, self.heads, self.out_channels) alpha = (torch.cat([x_i, x_j], dim=-1) * self.att).sum(dim=-1) alpha = F.leaky_relu(alpha, self.negative_slope) alpha = softmax(alpha, edge_index_i, size_i) # Sample attention coefficients stochastically. if self.training and self.dropout > 0: alpha = F.dropout(alpha, p=self.dropout, training=True) return x_j * alpha.view(-1, self.heads, 1)
def edge_update(self, alpha_j: Tensor, alpha_i: OptTensor, edge_attr: OptTensor, index: Tensor, ptr: OptTensor, size_i: Optional[int]) -> Tensor: # Given edge-level attention coefficients for source and target nodes, # we simply need to sum them up to "emulate" concatenation: alpha = alpha_j if alpha_i is None else alpha_j + alpha_i if edge_attr is not None and self.lin_edge is not None: if edge_attr.dim() == 1: edge_attr = edge_attr.view(-1, 1) edge_attr = self.lin_edge(edge_attr) edge_attr = edge_attr.view(-1, self.heads, self.out_channels) alpha_edge = (edge_attr * self.att_edge).sum(dim=-1) alpha = alpha + alpha_edge alpha = F.leaky_relu(alpha, self.negative_slope) alpha = softmax(alpha, index, ptr, size_i) alpha = F.dropout(alpha, p=self.dropout, training=self.training) return alpha
def forward(self, atom, bond, bond_index, mol_index): num_atom, atom_dim = atom.shape num_bond, bond_dim = bond.shape bond_index = bond_index.t().contiguous() atom = self.atom_preprocess(atom) bond = self.bond_preprocess(bond) # 将边特征转变为节点特征大小 (12,128,128) neighbor_atom = atom[bond_index[1]] # 获得所有边的第二个节点的特征 mixture = neighbor_atom + bond - neighbor_atom * bond # 12,1,128 neighbor = torch.cat([bond, neighbor_atom, mixture],-1) target_atom = atom[bond_index[0]] feature_align = torch.cat([target_atom, neighbor],dim=-1) align_score = F.leaky_relu(self.align(self.dropout(feature_align))) attention_weight = softmax(align_score, bond_index[0], num=num_atom) context = scatter_('add', torch.mul(attention_weight, self.attend(neighbor)), \ bond_index[0], dim_size=num_atom) context = F.elu(context) atom = self.gru(context, atom) atoms = [] for k in range(self.K-1): atom = self.propagate(atom, bond_index) superatom_num = mol_index.max()+1 superatom = scatter_('add', atom, mol_index, dim_size=superatom_num) # get init superatom by sum superatoms = [] for t in range(self.T): superatom, attention_weight = self.superGather(superatom, atom, mol_index) predict = self.predict(superatom) return predict
def get_attention(self, edge_index_i: Tensor, x_i: Tensor, x_j: Tensor, num_nodes: Optional[int], return_logits: bool = False) -> Tensor: if self.attention_type == 'MX': logits = (x_i * x_j).sum(dim=-1) if return_logits: return logits alpha = (x_j * self.att_l).sum(-1) + (x_i * self.att_r).sum(-1) alpha = alpha * logits.sigmoid() else: # self.attention_type == 'SD' alpha = (x_i * x_j).sum(dim=-1) / math.sqrt(self.out_channels) if return_logits: return alpha alpha = F.leaky_relu(alpha, self.negative_slope) alpha = softmax(alpha, edge_index_i, num_nodes=num_nodes) return alpha
def message(self, x_i, x_j, pass_edge_index, num_nodes): """ x_i: (E x head x out_channels) x_j: (E x head x out_channels) pass_edge_index: 2 * E TODO 新的pytorch_geometric框架不认同这种做法 num_nodes: embedding size return: 注意力权重相乘下的特征 """ # Compute attention coefficients. # compute multi head attention based on head and tail nodes # E * head alpha = (torch.cat([x_i, x_j], dim=-1) * self.att).sum(dim=-1) alpha = F.leaky_relu(alpha, self.negative_slope) alpha = softmax(alpha, pass_edge_index[0], num_nodes) alpha = F.dropout(alpha, p=self.dropout) # (E * head * out_channels) * (E * head) 每个head采用不同attention权重 return x_j * alpha.view(-1, self.heads, 1)
def message(self, edge_index_i, x_i, x_j, size_i): # Constructs messages to node i for each edge (j, i). ############################################################################ # TODO: Your code here! Compute the attention coefficients alpha as described # in equation (7). Remember to be careful of the number of heads with dimension! # HINT: torch_geometric.utils.softmax may help to calculate softmax for neighbors of i. # https://pytorch-geometric.readthedocs.io/en/latest/modules/utils.html#torch_geometric.utils.softmax # Our implementation is ~5 lines, but don't worry if you deviate from this. x_i = x_i.view(-1, self.heads, self.out_channels) x_j = x_j.view(-1, self.heads, self.out_channels) e_s = torch.sum(torch.cat((x_i, x_j), dim=-1) * self.att, dim=-1) e_s = F.leaky_relu(e_s, negative_slope=0.2) alpha = pyg_utils.softmax(e_s, edge_index_i, size_i) ############################################################################ alpha = F.dropout(alpha, p=self.dropout, training=self.training) return x_j * alpha.view(-1, self.heads, 1)
def message(self, x_i, x_j, edge_index, num_nodes, edge_attr): # Compute attention coefficients. if edge_attr is not None: # alpha = ((torch.cat([x_i, x_j], dim=-1) * self.att) * edge_attr.view(-1, 1, 1)).sum(dim=-1) alpha = (torch.cat([ x_i, x_j, edge_attr.view(-1, 1).repeat(1, x_i.shape[1]).view( -1, x_i.shape[1], 1) ], dim=-1) * self.att).sum(dim=-1) else: alpha = (torch.cat([x_i, x_j], dim=-1) * self.att).sum(dim=-1) alpha = F.leaky_relu(alpha, self.negative_slope) alpha = softmax(alpha, edge_index[0], num_nodes) # Sample attention coefficients stochastically. if self.training and self.dropout > 0: alpha = F.dropout(alpha, p=self.dropout, training=True) return x_j * alpha.view(-1, self.heads, 1)
def message(self, x_i, x_j, edge_attr, index, ptr, size_i): query = self.lin_key(x_i).view(-1, self.heads, self.out_channels) key = self.lin_query(x_i).view(-1, self.heads, self.out_channels) lin_edge = self.lin_edge if edge_attr is not None: edge_attr = lin_edge(edge_attr).view(-1, self.heads, self.out_channels) key = key + edge_attr alpha = (query * key).sum(dim=-1) / math.sqrt(self.out_channels) alpha = softmax(alpha, index, ptr, size_i) alpha = F.softmax(alpha, p=self.dropout, training=self.training) out = self.lin_value(x_j).view(-1, self.heads, self.out_channels) if edge_attr is not None: out = out + edge_attr out = out * alpha.view(-1, self.heads, 1) return out
def forward(self, superatom, atom, mol_index): superatom_num = mol_index.max().item() + 1 # number of molecules in a batch superatom_expand = superatom[mol_index] feature_align = torch.cat([superatom_expand, atom],dim=-1) align_score = F.leaky_relu(self.align(self.dropout(feature_align))) attention_weight = softmax(align_score, mol_index, num=superatom_num) context = scatter_('add', torch.mul(attention_weight, self.attend(self.dropout(atom))), \ mol_index, dim_size=superatom_num) context = F.elu(context) update = self.gru(context, superatom) return update, attention_weight
def message(self, x_i, x_j, edge_index, num_nodes, edge_attr): # x_i/x_j = E x out_channel, one message for each incoming edge # x_i and x_j are lifted tensors of shape E x heads x out_channels # our edge attributes are E x edge_dim # naive approach would be to append the edge dim to the messages # first, repeat the edge attribute for each head edge_attr = edge_attr.unsqueeze(1).repeat(1, self.heads, 1) x_j = torch.cat([x_j, edge_attr], dim=-1) # Compute attention coefficients. # N.B - only modification is the attention is now computed with the edge attributes alpha = (torch.cat([x_i, x_j], dim=-1) * self.att).sum(dim=-1) alpha = F.leaky_relu(alpha, self.negative_slope) alpha = softmax(alpha, edge_index[0], num_nodes) # Sample attention coefficients stochastically. if self.training and self.dropout > 0: alpha = F.dropout(alpha, p=self.dropout, training=True) return x_j * alpha.view(-1, self.heads, 1)
def message(self, edge_index_i, x_i, x_j, size_i): # Constructs messages to node i for each edge (j, i). ############################################################################ # TODO: Your code here! Compute the attention coefficients alpha as described # in equation (7). Remember to be careful of the number of heads with # dimension! # Our implementation is ~5 lines, but don't worry if you deviate from this. x_j = x_j.view(-1, self.heads, self.out_channels) x_i = x_i.view(-1, self.heads, self.out_channels) a = torch.cat((x_j, x_i), dim=-1) * self.att a_relu = F.leaky_relu(a.sum(dim=-1), LEAKY_RELU_SLOPE, size_i) alpha = pyg_utils.softmax(a_relu, edge_index_i) ############################################################################ alpha = F.dropout(alpha, p=self.dropout, training=self.training) return x_j * alpha.view(-1, self.heads, 1)
def message(self, edge_index_i, x_i, x_j, size_i): # Constructs messages to node i for each edge (j, i). # edge_index_i has shape [E] ############################################################################ # DONE # in equation (7). Remember to be careful of the number of heads with dimension! # HINT: torch_geometric.utils.softmax may help to calculate softmax for neighbors of i. # https://pytorch-geometric.readthedocs.io/en/latest/modules/utils.html#torch_geometric.utils.softmax # Our implementation is ~5 lines, but don't worry if you deviate from this. cat = torch.cat([x_i, x_j], dim=1) arg = torch.mm(cat, self.att) arg = F.leaky_relu(arg, negative_slope=0.2) alpha = pyg_utils.softmax(arg, edge_index_i, size_i) ############################################################################ alpha = F.dropout(alpha, p=self.dropout, training=self.training) return x_j * alpha
def message(self, x_i: Tensor, x_j: Tensor, edge_attr: Tensor, index: Tensor, ptr: Tensor, size_i: int) -> Tensor: queries = self.q_projection(dropout(x_i, self.dropout, self.training)).view( -1, self.num_heads, self.output_dim) keys = self.k_projection(dropout(x_j, self.dropout, self.training)).view( -1, self.num_heads, self.output_dim) values = self.v_projection(dropout(x_j, self.dropout, self.training)).view( -1, self.num_heads, self.output_dim) edges = self.e_projection(edge_attr).view(-1, self.num_heads, self.output_dim) atn = softmax( (queries * (keys + edges)).sum(dim=-1) / sqrt(self.output_dim), index, ptr, size_i) return atn.view(-1, self.num_heads, 1) * values
def message(self, x_i, x_j, edge_index_i, edge_index_j, size_i, return_attention_weights): # Compute attention coefficients. x_i = x_i.view(-1, self.heads, self.out_channels) x_j = x_j.view(-1, self.heads, self.out_channels) alpha = (x_i * self.att_i).sum(-1) + (x_j * self.att_j).sum(-1) if not self.use_hypernetworks \ else (x_i * self.att_i[edge_index_i]).sum(-1) + (x_j * self.att_j[edge_index_j]).sum(-1) # either broadcasting over the examples or per example (hypernetwork) ) alpha = F.leaky_relu(alpha, self.negative_slope) alpha = softmax(alpha, edge_index_i, size_i) if return_attention_weights: self.__alpha__ = alpha # Sample attention coefficients stochastically. alpha = F.dropout(alpha, p=self.dropout, training=self.training) return x_j * alpha.view(-1, self.heads, 1)
def forward(self, x, edge_index): num_nodes = x.size(0) x = x.unsqueeze(-1) if x.dim() == 1 else x beta = self.beta if self.requires_grad else self._buffers['beta'] # Add self-loops to adjacency matrix. edge_index, edge_attr = remove_self_loops(edge_index) edge_index = add_self_loops(edge_index, num_nodes=x.size(0)) row, col = edge_index # Compute attention coefficients. norm = torch.norm(x, p=2, dim=1) alpha = (x[row] * x[col]).sum(dim=1) / (norm[row] * norm[col]) alpha = softmax(alpha * beta, row, num_nodes=x.size(0)) # Perform the propagation. out = spmm(edge_index, alpha, x, num_nodes) return out
def message(self, edge_index_i, x_i, x_j, size_i): # Constructs messages to node i for each edge (j, i). ############################################################################ # TODO: Your code here! Compute the attention coefficients alpha as described # in equation (7). Remember to be careful of the number of heads with # dimension! # Our implementation is ~5 lines, but don't worry if you deviate from this. x_i = x_i.view(-1, self.heads, self.out_channels) x_j = x_j.view(-1, self.heads, self.out_channels) cat_x = torch.cat([x_i, x_j], dim=-1) alpha = F.leaky_relu((cat_x * self.att).sum(-1), 0.2) alpha = pyg_utils.softmax(alpha, edge_index_i, num_nodes=size_i) ############################################################################ alpha = F.dropout(alpha, p=self.dropout, training=self.training) return (x_j * alpha.unsqueeze(-1)).view(-1, self.heads * self.out_channels)
def message(self, edge_index_i, x_i, x_j, size_i, norm): # Compute attention coefficients. x_j = x_j.view(-1, self.heads, self.out_channels) if x_i is None: alpha = (x_j * self.att[:, :, self.out_channels:]).sum(dim=-1) else: x_i = x_i.view(-1, self.heads, self.out_channels) alpha = (torch.cat([x_i, x_j], dim=-1) * self.att).sum(dim=-1) alpha = F.leaky_relu(alpha, self.negative_slope) alpha = softmax(alpha, edge_index_i, size_i) # Sample attention coefficients stochastically. alpha = F.dropout(alpha, p=self.dropout, training=self.training) norm = norm.unsqueeze(dim=-1).repeat( 1, self.heads) # repeat it for several times return norm.view(-1, self.heads, 1) * x_j * alpha.view( -1, self.heads, 1)
def forward(self, x, edge_index, edge_weight=None, M=None, UM=None, batch=None, num_nodes=None): """""" if batch is None: batch = edge_index.new_zeros(x.size(0)) if edge_weight is None: edge_weight = torch.ones(edge_index.size(1), device=edge_index.device) # diag of unnormalized M diag_UM = torch.diag(UM).squeeze() # linear transform xtransform = torch.matmul(x, self.transform) # aggregate score score = self.pan_pool_weight[0] * xtransform + self.pan_pool_weight[ 1] * diag_UM if self.min_score is None: score = self.nonlinearity(score) else: score = softmax(score, batch) perm = self.topk(score, self.ratio, batch, self.min_score) x = x[perm] * score[perm].view(-1, 1) x = self.multiplier * x if self.multiplier != 1 else x batch = batch[perm] edge_index, edge_weight = self.filter_adj(edge_index, edge_weight, perm, num_nodes=score.size(0)) return x, edge_index, edge_weight, batch, perm, score[perm]