class LEConv(MessagePassing): r"""The local extremum graph neural network operator from the `"ASAP: Adaptive Structure Aware Pooling for Learning Hierarchical Graph Representations" <https://arxiv.org/abs/1911.07979>`_ paper, which finds the importance of nodes with respect to their neighbors using the difference operator: .. math:: \mathbf{x}^{\prime}_i = \mathbf{x}_i \cdot \mathbf{\Theta}_1 + \sum_{j \in \mathcal{N}(i)} e_{j,i} \cdot (\mathbf{\Theta}_2 \mathbf{x}_i - \mathbf{\Theta}_3 \mathbf{x}_j) where :math:`e_{j,i}` denotes the edge weight from source node :obj:`j` to target node :obj:`i` (default: :obj:`1`) Args: in_channels (int): Size of each input sample, or :obj:`-1` to derive the size from the first input(s) to the forward method. out_channels (int): Size of each output sample. bias (bool, optional): If set to :obj:`False`, the layer will not learn an additive bias. (default: :obj:`True`). **kwargs (optional): Additional arguments of :class:`torch_geometric.nn.conv.MessagePassing`. """ def __init__(self, in_channels, out_channels, bias=True, **kwargs): kwargs.setdefault('aggr', 'add') super(LEConv, self).__init__(**kwargs) self.in_channels = in_channels self.out_channels = out_channels self.lin1 = Linear(in_channels, out_channels, bias=bias) self.lin2 = Linear(in_channels, out_channels, bias=False) self.lin3 = Linear(in_channels, out_channels, bias=bias) self.reset_parameters() def reset_parameters(self): self.lin1.reset_parameters() self.lin2.reset_parameters() self.lin3.reset_parameters() def forward(self, x, edge_index, edge_weight=None): """""" a = self.lin1(x) b = self.lin2(x) out = self.propagate(edge_index, a=a, b=b, edge_weight=edge_weight) return out + self.lin3(x) def message(self, a_i, b_j, edge_weight): out = a_i - b_j return out if edge_weight is None else out * edge_weight.view(-1, 1) def __repr__(self): return '{}({}, {})'.format(self.__class__.__name__, self.in_channels, self.out_channels)
class GravNetConv(MessagePassing): r"""The GravNet operator from the `"Learning Representations of Irregular Particle-detector Geometry with Distance-weighted Graph Networks" <https://arxiv.org/abs/1902.07987>`_ paper, where the graph is dynamically constructed using nearest neighbors. The neighbors are constructed in a learnable low-dimensional projection of the feature space. A second projection of the input feature space is then propagated from the neighbors to each vertex using distance weights that are derived by applying a Gaussian function to the distances. Args: in_channels (int): Size of each input sample, or :obj:`-1` to derive the size from the first input(s) to the forward method. out_channels (int): The number of output channels. space_dimensions (int): The dimensionality of the space used to construct the neighbors; referred to as :math:`S` in the paper. propagate_dimensions (int): The number of features to be propagated between the vertices; referred to as :math:`F_{\textrm{LR}}` in the paper. k (int): The number of nearest neighbors. num_workers (int): Number of workers to use for k-NN computation. Has no effect in case :obj:`batch` is not :obj:`None`, or the input lies on the GPU. (default: :obj:`1`) **kwargs (optional): Additional arguments of :class:`torch_geometric.nn.conv.MessagePassing`. Shapes: - **input:** node features :math:`(|\mathcal{V}|, F_{in})` or :math:`((|\mathcal{V_s}|, F_{in}), (|\mathcal{V_t}|, F_{in}))` if bipartite, batch vector :math:`(|\mathcal{V}|)` or :math:`((|\mathcal{V}_s|), (|\mathcal{V}_t|))` if bipartite *(optional)* - **output:** node features :math:`(|\mathcal{V}|, F_{out})` or :math:`(|\mathcal{V}_t|, F_{out})` if bipartite """ def __init__(self, in_channels: int, out_channels: int, space_dimensions: int, propagate_dimensions: int, k: int, num_workers: int = 1, **kwargs): super().__init__(flow='source_to_target', **kwargs) if knn is None: raise ImportError('`GravNetConv` requires `torch-cluster`.') self.in_channels = in_channels self.out_channels = out_channels self.k = k self.num_workers = num_workers self.lin_s = Linear(in_channels, space_dimensions) self.lin_h = Linear(in_channels, propagate_dimensions) self.lin_out1 = Linear(in_channels, out_channels, bias=False) self.lin_out2 = Linear(2 * propagate_dimensions, out_channels) self.reset_parameters() def reset_parameters(self): self.lin_s.reset_parameters() self.lin_h.reset_parameters() self.lin_out1.reset_parameters() self.lin_out2.reset_parameters() def forward( self, x: Union[Tensor, PairTensor], batch: Union[OptTensor, Optional[PairTensor]] = None) -> Tensor: # type: (Tensor, OptTensor) -> Tensor # noqa # type: (PairTensor, Optional[PairTensor]) -> Tensor # noqa """""" is_bipartite: bool = True if isinstance(x, Tensor): x: PairTensor = (x, x) is_bipartite = False if x[0].dim() != 2: raise ValueError("Static graphs not supported in 'GravNetConv'") b: PairOptTensor = (None, None) if isinstance(batch, Tensor): b = (batch, batch) elif isinstance(batch, tuple): assert batch is not None b = (batch[0], batch[1]) h_l: Tensor = self.lin_h(x[0]) s_l: Tensor = self.lin_s(x[0]) s_r: Tensor = self.lin_s(x[1]) if is_bipartite else s_l edge_index = knn(s_l, s_r, self.k, b[0], b[1]).flip([0]) edge_weight = (s_l[edge_index[0]] - s_r[edge_index[1]]).pow(2).sum(-1) edge_weight = torch.exp(-10. * edge_weight) # 10 gives a better spread # propagate_type: (x: OptPairTensor, edge_weight: OptTensor) out = self.propagate(edge_index, x=(h_l, None), edge_weight=edge_weight, size=(s_l.size(0), s_r.size(0))) return self.lin_out1(x[1]) + self.lin_out2(out) def message(self, x_j: Tensor, edge_weight: Tensor) -> Tensor: return x_j * edge_weight.unsqueeze(1) def aggregate(self, inputs: Tensor, index: Tensor, dim_size: Optional[int] = None) -> Tensor: out_mean = scatter(inputs, index, dim=self.node_dim, dim_size=dim_size, reduce='mean') out_max = scatter(inputs, index, dim=self.node_dim, dim_size=dim_size, reduce='max') return torch.cat([out_mean, out_max], dim=-1) def __repr__(self) -> str: return (f'{self.__class__.__name__}({self.in_channels}, ' f'{self.out_channels}, k={self.k})')
class EGConv(MessagePassing): r"""The Efficient Graph Convolution from the `"Adaptive Filters and Aggregator Fusion for Efficient Graph Convolutions" <https://arxiv.org/abs/2104.01481>`_ paper. Its node-wise formulation is given by: .. math:: \mathbf{x}_i^{\prime} = {\LARGE ||}_{h=1}^H \sum_{\oplus \in \mathcal{A}} \sum_{b = 1}^B w_{i, h, \oplus, b} \; \underset{j \in \mathcal{N}(i) \cup \{i\}}{\bigoplus} \mathbf{\Theta}_b \mathbf{x}_{j} with :math:`\mathbf{\Theta}_b` denoting a basis weight, :math:`\oplus` denoting an aggregator, and :math:`w` denoting per-vertex weighting coefficients across different heads, bases and aggregators. EGC retains :math:`\mathcal{O}(|\mathcal{V}|)` memory usage, making it a sensible alternative to :class:`~torch_geometric.nn.conv.GCNConv`, :class:`~torch_geometric.nn.conv.SAGEConv` or :class:`~torch_geometric.nn.conv.GINConv`. .. note:: For an example of using :obj:`EGConv`, see `examples/egc.py <https://github.com/pyg-team/pytorch_geometric/blob/master/ examples/egc.py>`_. Args: in_channels (int): Size of each input sample, or :obj:`-1` to derive the size from the first input(s) to the forward method. out_channels (int): Size of each output sample. aggregators (List[str], optional): Aggregators to be used. Supported aggregators are :obj:`"sum"`, :obj:`"mean"`, :obj:`"symnorm"`, :obj:`"max"`, :obj:`"min"`, :obj:`"std"`, :obj:`"var"`. Multiple aggregators can be used to improve the performance. (default: :obj:`["symnorm"]`) num_heads (int, optional): Number of heads :math:`H` to use. Must have :obj:`out_channels % num_heads == 0`. It is recommended to set :obj:`num_heads >= num_bases`. (default: :obj:`8`) num_bases (int, optional): Number of basis weights :math:`B` to use. (default: :obj:`4`) cached (bool, optional): If set to :obj:`True`, the layer will cache the computation of the edge index with added self loops on first execution, along with caching the calculation of the symmetric normalized edge weights if the :obj:`"symnorm"` aggregator is being used. This parameter should only be set to :obj:`True` in transductive learning scenarios. (default: :obj:`False`) add_self_loops (bool, optional): If set to :obj:`False`, will not add self-loops to the input graph. (default: :obj:`True`) bias (bool, optional): If set to :obj:`False`, the layer will not learn an additive bias. (default: :obj:`True`) **kwargs (optional): Additional arguments of :class:`torch_geometric.nn.conv.MessagePassing`. Shapes: - **input:** node features :math:`(|\mathcal{V}|, F_{in})`, edge indices :math:`(2, |\mathcal{E}|)` - **output:** node features :math:`(|\mathcal{V}|, F_{out})` """ _cached_edge_index: Optional[Tuple[Tensor, OptTensor]] _cached_adj_t: Optional[SparseTensor] def __init__(self, in_channels: int, out_channels: int, aggregators: List[str] = ["symnorm"], num_heads: int = 8, num_bases: int = 4, cached: bool = False, add_self_loops: bool = True, bias: bool = True, **kwargs): super().__init__(node_dim=0, **kwargs) if out_channels % num_heads != 0: raise ValueError( 'out_channels must be divisible by the number of heads') for a in aggregators: if a not in ['sum', 'mean', 'symnorm', 'min', 'max', 'var', 'std']: raise ValueError(f"Unsupported aggregator: '{a}'") self.in_channels = in_channels self.out_channels = out_channels self.num_heads = num_heads self.num_bases = num_bases self.cached = cached self.add_self_loops = add_self_loops self.aggregators = aggregators self.bases_lin = Linear(in_channels, (out_channels // num_heads) * num_bases, bias=False, weight_initializer='glorot') self.comb_lin = Linear(in_channels, num_heads * num_bases * len(aggregators)) if bias: self.bias = Parameter(torch.Tensor(out_channels)) else: self.register_parameter('bias', None) self.reset_parameters() def reset_parameters(self): self.bases_lin.reset_parameters() self.comb_lin.reset_parameters() zeros(self.bias) self._cached_adj_t = None self._cached_edge_index = None def forward(self, x: Tensor, edge_index: Adj) -> Tensor: """""" symnorm_weight: OptTensor = None if "symnorm" in self.aggregators: if isinstance(edge_index, Tensor): cache = self._cached_edge_index if cache is None: edge_index, symnorm_weight = gcn_norm( # yapf: disable edge_index, None, num_nodes=x.size(self.node_dim), improved=False, add_self_loops=self.add_self_loops) if self.cached: self._cached_edge_index = (edge_index, symnorm_weight) else: edge_index, symnorm_weight = cache elif isinstance(edge_index, SparseTensor): cache = self._cached_adj_t if cache is None: edge_index = gcn_norm( # yapf: disable edge_index, None, num_nodes=x.size(self.node_dim), improved=False, add_self_loops=self.add_self_loops) if self.cached: self._cached_adj_t = edge_index else: edge_index = cache elif self.add_self_loops: if isinstance(edge_index, Tensor): cache = self._cached_edge_index if self.cached and cache is not None: edge_index = cache[0] else: edge_index, _ = add_remaining_self_loops(edge_index) if self.cached: self._cached_edge_index = (edge_index, None) elif isinstance(edge_index, SparseTensor): cache = self._cached_adj_t if self.cached and cache is not None: edge_index = cache else: edge_index = fill_diag(edge_index, 1.0) if self.cached: self._cached_adj_t = edge_index # [num_nodes, (out_channels // num_heads) * num_bases] bases = self.bases_lin(x) # [num_nodes, num_heads * num_bases * num_aggrs] weightings = self.comb_lin(x) # [num_nodes, num_aggregators, (out_channels // num_heads) * num_bases] # propagate_type: (x: Tensor, symnorm_weight: OptTensor) aggregated = self.propagate(edge_index, x=bases, symnorm_weight=symnorm_weight, size=None) weightings = weightings.view(-1, self.num_heads, self.num_bases * len(self.aggregators)) aggregated = aggregated.view( -1, len(self.aggregators) * self.num_bases, self.out_channels // self.num_heads, ) # [num_nodes, num_heads, out_channels // num_heads] out = torch.matmul(weightings, aggregated) out = out.view(-1, self.out_channels) if self.bias is not None: out += self.bias return out def message(self, x_j: Tensor) -> Tensor: return x_j def aggregate(self, inputs: Tensor, index: Tensor, dim_size: Optional[int] = None, symnorm_weight: OptTensor = None) -> Tensor: outs = [] for aggr in self.aggregators: if aggr == 'symnorm': assert symnorm_weight is not None out = scatter(inputs * symnorm_weight.view(-1, 1), index, 0, None, dim_size, reduce='sum') elif aggr == 'var' or aggr == 'std': mean = scatter(inputs, index, 0, None, dim_size, reduce='mean') mean_squares = scatter(inputs * inputs, index, 0, None, dim_size, reduce='mean') out = mean_squares - mean * mean if aggr == 'std': out = torch.sqrt(out.relu_() + 1e-5) else: out = scatter(inputs, index, 0, None, dim_size, reduce=aggr) outs.append(out) return torch.stack(outs, dim=1) if len(outs) > 1 else outs[0] def message_and_aggregate(self, adj_t: SparseTensor, x: Tensor) -> Tensor: adj_t_2 = adj_t if len(self.aggregators) > 1 and 'symnorm' in self.aggregators: adj_t_2 = adj_t.set_value(None) outs = [] for aggr in self.aggregators: if aggr == 'symnorm': out = matmul(adj_t, x, reduce='sum') elif aggr in ['var', 'std']: mean = matmul(adj_t_2, x, reduce='mean') mean_sq = matmul(adj_t_2, x * x, reduce='mean') out = mean_sq - mean * mean if aggr == 'std': out = torch.sqrt(out.relu_() + 1e-5) else: out = matmul(adj_t_2, x, reduce=aggr) outs.append(out) return torch.stack(outs, dim=1) if len(outs) > 1 else outs[0] def __repr__(self) -> str: return (f'{self.__class__.__name__}({self.in_channels}, ' f'{self.out_channels}, aggregators={self.aggregators})')
class PNAConv(MessagePassing): r"""The Principal Neighbourhood Aggregation graph convolution operator from the `"Principal Neighbourhood Aggregation for Graph Nets" <https://arxiv.org/abs/2004.05718>`_ paper .. math:: \mathbf{x}_i^{\prime} = \gamma_{\mathbf{\Theta}} \left( \mathbf{x}_i, \underset{j \in \mathcal{N}(i)}{\bigoplus} h_{\mathbf{\Theta}} \left( \mathbf{x}_i, \mathbf{x}_j \right) \right) with .. math:: \bigoplus = \underbrace{\begin{bmatrix} 1 \\ S(\mathbf{D}, \alpha=1) \\ S(\mathbf{D}, \alpha=-1) \end{bmatrix} }_{\text{scalers}} \otimes \underbrace{\begin{bmatrix} \mu \\ \sigma \\ \max \\ \min \end{bmatrix}}_{\text{aggregators}}, where :math:`\gamma_{\mathbf{\Theta}}` and :math:`h_{\mathbf{\Theta}}` denote MLPs. .. note:: For an example of using :obj:`PNAConv`, see `examples/pna.py <https://github.com/pyg-team/pytorch_geometric/blob/master/ examples/pna.py>`_. Args: in_channels (int): Size of each input sample, or :obj:`-1` to derive the size from the first input(s) to the forward method. out_channels (int): Size of each output sample. aggregators (list of str): Set of aggregation function identifiers, namely :obj:`"sum"`, :obj:`"mean"`, :obj:`"min"`, :obj:`"max"`, :obj:`"var"` and :obj:`"std"`. scalers (list of str): Set of scaling function identifiers, namely :obj:`"identity"`, :obj:`"amplification"`, :obj:`"attenuation"`, :obj:`"linear"` and :obj:`"inverse_linear"`. deg (Tensor): Histogram of in-degrees of nodes in the training set, used by scalers to normalize. edge_dim (int, optional): Edge feature dimensionality (in case there are any). (default :obj:`None`) towers (int, optional): Number of towers (default: :obj:`1`). pre_layers (int, optional): Number of transformation layers before aggregation (default: :obj:`1`). post_layers (int, optional): Number of transformation layers after aggregation (default: :obj:`1`). divide_input (bool, optional): Whether the input features should be split between towers or not (default: :obj:`False`). **kwargs (optional): Additional arguments of :class:`torch_geometric.nn.conv.MessagePassing`. Shapes: - **input:** node features :math:`(|\mathcal{V}|, F_{in})`, edge indices :math:`(2, |\mathcal{E}|)`, edge features :math:`(|\mathcal{E}|, D)` *(optional)* - **output:** node features :math:`(|\mathcal{V}|, F_{out})` """ def __init__(self, in_channels: int, out_channels: int, aggregators: List[str], scalers: List[str], deg: Tensor, edge_dim: Optional[int] = None, towers: int = 1, pre_layers: int = 1, post_layers: int = 1, divide_input: bool = False, **kwargs): kwargs.setdefault('aggr', None) super().__init__(node_dim=0, **kwargs) if divide_input: assert in_channels % towers == 0 assert out_channels % towers == 0 self.in_channels = in_channels self.out_channels = out_channels self.aggregators = aggregators self.scalers = scalers self.edge_dim = edge_dim self.towers = towers self.divide_input = divide_input self.F_in = in_channels // towers if divide_input else in_channels self.F_out = self.out_channels // towers deg = deg.to(torch.float) num_nodes = int(deg.sum()) bin_degrees = torch.arange(deg.numel()) self.avg_deg: Dict[str, float] = { 'lin': float((bin_degrees * deg).sum()) / num_nodes, 'log': float(((bin_degrees + 1).log() * deg).sum()) / num_nodes, 'exp': float((bin_degrees.exp() * deg).sum()) / num_nodes, } if self.edge_dim is not None: self.edge_encoder = Linear(edge_dim, self.F_in) self.pre_nns = ModuleList() self.post_nns = ModuleList() for _ in range(towers): modules = [Linear((3 if edge_dim else 2) * self.F_in, self.F_in)] for _ in range(pre_layers - 1): modules += [ReLU()] modules += [Linear(self.F_in, self.F_in)] self.pre_nns.append(Sequential(*modules)) in_channels = (len(aggregators) * len(scalers) + 1) * self.F_in modules = [Linear(in_channels, self.F_out)] for _ in range(post_layers - 1): modules += [ReLU()] modules += [Linear(self.F_out, self.F_out)] self.post_nns.append(Sequential(*modules)) self.lin = Linear(out_channels, out_channels) self.reset_parameters() def reset_parameters(self): if self.edge_dim is not None: self.edge_encoder.reset_parameters() for nn in self.pre_nns: reset(nn) for nn in self.post_nns: reset(nn) self.lin.reset_parameters() def forward(self, x: Tensor, edge_index: Adj, edge_attr: OptTensor = None) -> Tensor: """""" if self.divide_input: x = x.view(-1, self.towers, self.F_in) else: x = x.view(-1, 1, self.F_in).repeat(1, self.towers, 1) # propagate_type: (x: Tensor, edge_attr: OptTensor) out = self.propagate(edge_index, x=x, edge_attr=edge_attr, size=None) out = torch.cat([x, out], dim=-1) outs = [nn(out[:, i]) for i, nn in enumerate(self.post_nns)] out = torch.cat(outs, dim=1) return self.lin(out) def message(self, x_i: Tensor, x_j: Tensor, edge_attr: OptTensor) -> Tensor: h: Tensor = x_i # Dummy. if edge_attr is not None: edge_attr = self.edge_encoder(edge_attr) edge_attr = edge_attr.view(-1, 1, self.F_in) edge_attr = edge_attr.repeat(1, self.towers, 1) h = torch.cat([x_i, x_j, edge_attr], dim=-1) else: h = torch.cat([x_i, x_j], dim=-1) hs = [nn(h[:, i]) for i, nn in enumerate(self.pre_nns)] return torch.stack(hs, dim=1) def aggregate(self, inputs: Tensor, index: Tensor, dim_size: Optional[int] = None) -> Tensor: outs = [] for aggregator in self.aggregators: if aggregator == 'sum': out = scatter(inputs, index, 0, None, dim_size, reduce='sum') elif aggregator == 'mean': out = scatter(inputs, index, 0, None, dim_size, reduce='mean') elif aggregator == 'min': out = scatter(inputs, index, 0, None, dim_size, reduce='min') elif aggregator == 'max': out = scatter(inputs, index, 0, None, dim_size, reduce='max') elif aggregator == 'var' or aggregator == 'std': mean = scatter(inputs, index, 0, None, dim_size, reduce='mean') mean_squares = scatter(inputs * inputs, index, 0, None, dim_size, reduce='mean') out = mean_squares - mean * mean if aggregator == 'std': out = torch.sqrt(torch.relu(out) + 1e-5) else: raise ValueError(f'Unknown aggregator "{aggregator}".') outs.append(out) out = torch.cat(outs, dim=-1) deg = degree(index, dim_size, dtype=inputs.dtype) deg = deg.clamp_(1).view(-1, 1, 1) outs = [] for scaler in self.scalers: if scaler == 'identity': pass elif scaler == 'amplification': out = out * (torch.log(deg + 1) / self.avg_deg['log']) elif scaler == 'attenuation': out = out * (self.avg_deg['log'] / torch.log(deg + 1)) elif scaler == 'linear': out = out * (deg / self.avg_deg['lin']) elif scaler == 'inverse_linear': out = out * (self.avg_deg['lin'] / deg) else: raise ValueError(f'Unknown scaler "{scaler}".') outs.append(out) return torch.cat(outs, dim=-1) def __repr__(self): return (f'{self.__class__.__name__}({self.in_channels}, ' f'{self.out_channels}, towers={self.towers}, ' f'edge_dim={self.edge_dim})')
class GCNConv(MessagePassing): r"""The graph convolutional operator from the `"Semi-supervised Classification with Graph Convolutional Networks" <https://arxiv.org/abs/1609.02907>`_ paper .. math:: \mathbf{X}^{\prime} = \mathbf{\hat{D}}^{-1/2} \mathbf{\hat{A}} \mathbf{\hat{D}}^{-1/2} \mathbf{X} \mathbf{\Theta}, where :math:`\mathbf{\hat{A}} = \mathbf{A} + \mathbf{I}` denotes the adjacency matrix with inserted self-loops and :math:`\hat{D}_{ii} = \sum_{j=0} \hat{A}_{ij}` its diagonal degree matrix. The adjacency matrix can include other values than :obj:`1` representing edge weights via the optional :obj:`edge_weight` tensor. Its node-wise formulation is given by: .. math:: \mathbf{x}^{\prime}_i = \mathbf{\Theta} \sum_{j \in \mathcal{N}(v) \cup \{ i \}} \frac{e_{j,i}}{\sqrt{\hat{d}_j \hat{d}_i}} \mathbf{x}_j with :math:`\hat{d}_i = 1 + \sum_{j \in \mathcal{N}(i)} e_{j,i}`, where :math:`e_{j,i}` denotes the edge weight from source node :obj:`j` to target node :obj:`i` (default: :obj:`1.0`) Args: in_channels (int): Size of each input sample, or :obj:`-1` to derive the size from the first input(s) to the forward method. out_channels (int): Size of each output sample. improved (bool, optional): If set to :obj:`True`, the layer computes :math:`\mathbf{\hat{A}}` as :math:`\mathbf{A} + 2\mathbf{I}`. (default: :obj:`False`) cached (bool, optional): If set to :obj:`True`, the layer will cache the computation of :math:`\mathbf{\hat{D}}^{-1/2} \mathbf{\hat{A}} \mathbf{\hat{D}}^{-1/2}` on first execution, and will use the cached version for further executions. This parameter should only be set to :obj:`True` in transductive learning scenarios. (default: :obj:`False`) add_self_loops (bool, optional): If set to :obj:`False`, will not add self-loops to the input graph. (default: :obj:`True`) normalize (bool, optional): Whether to add self-loops and compute symmetric normalization coefficients on the fly. (default: :obj:`True`) bias (bool, optional): If set to :obj:`False`, the layer will not learn an additive bias. (default: :obj:`True`) **kwargs (optional): Additional arguments of :class:`torch_geometric.nn.conv.MessagePassing`. """ _cached_edge_index: Optional[Tuple[Tensor, Tensor]] _cached_adj_t: Optional[SparseTensor] def __init__(self, in_channels: int, out_channels: int, improved: bool = False, cached: bool = False, add_self_loops: bool = True, normalize: bool = True, bias: bool = True, **kwargs): kwargs.setdefault('aggr', 'add') super(GCNConv, self).__init__(**kwargs) self.in_channels = in_channels self.out_channels = out_channels self.improved = improved self.cached = cached self.add_self_loops = add_self_loops self.normalize = normalize self._cached_edge_index = None self._cached_adj_t = None self.lin = Linear(in_channels, out_channels, bias=False, weight_initializer='glorot') if bias: self.bias = Parameter(torch.Tensor(out_channels)) else: self.register_parameter('bias', None) self.reset_parameters() def reset_parameters(self): self.lin.reset_parameters() zeros(self.bias) self._cached_edge_index = None self._cached_adj_t = None def forward(self, x: Tensor, edge_index: Adj, edge_weight: OptTensor = None) -> Tensor: """""" if self.normalize: if isinstance(edge_index, Tensor): cache = self._cached_edge_index if cache is None: edge_index, edge_weight = gcn_norm( # yapf: disable edge_index, edge_weight, x.size(self.node_dim), self.improved, self.add_self_loops) if self.cached: self._cached_edge_index = (edge_index, edge_weight) else: edge_index, edge_weight = cache[0], cache[1] elif isinstance(edge_index, SparseTensor): cache = self._cached_adj_t if cache is None: edge_index = gcn_norm( # yapf: disable edge_index, edge_weight, x.size(self.node_dim), self.improved, self.add_self_loops) if self.cached: self._cached_adj_t = edge_index else: edge_index = cache x = self.lin(x) # propagate_type: (x: Tensor, edge_weight: OptTensor) out = self.propagate(edge_index, x=x, edge_weight=edge_weight, size=None) if self.bias is not None: out += self.bias return out def message(self, x_j: Tensor, edge_weight: OptTensor) -> Tensor: return x_j if edge_weight is None else edge_weight.view(-1, 1) * x_j def message_and_aggregate(self, adj_t: SparseTensor, x: Tensor) -> Tensor: return matmul(adj_t, x, reduce=self.aggr) def __repr__(self): return '{}({}, {})'.format(self.__class__.__name__, self.in_channels, self.out_channels)
class TransformerConv(MessagePassing): r"""The graph transformer operator from the `"Masked Label Prediction: Unified Message Passing Model for Semi-Supervised Classification" <https://arxiv.org/abs/2009.03509>`_ paper .. math:: \mathbf{x}^{\prime}_i = \mathbf{W}_1 \mathbf{x}_i + \sum_{j \in \mathcal{N}(i)} \alpha_{i,j} \mathbf{W}_2 \mathbf{x}_{j}, where the attention coefficients :math:`\alpha_{i,j}` are computed via multi-head dot product attention: .. math:: \alpha_{i,j} = \textrm{softmax} \left( \frac{(\mathbf{W}_3\mathbf{x}_i)^{\top} (\mathbf{W}_4\mathbf{x}_j)} {\sqrt{d}} \right) Args: in_channels (int or tuple): Size of each input sample, or :obj:`-1` to derive the size from the first input(s) to the forward method. A tuple corresponds to the sizes of source and target dimensionalities. out_channels (int): Size of each output sample. heads (int, optional): Number of multi-head-attentions. (default: :obj:`1`) concat (bool, optional): If set to :obj:`False`, the multi-head attentions are averaged instead of concatenated. (default: :obj:`True`) beta (bool, optional): If set, will combine aggregation and skip information via .. math:: \mathbf{x}^{\prime}_i = \beta_i \mathbf{W}_1 \mathbf{x}_i + (1 - \beta_i) \underbrace{\left(\sum_{j \in \mathcal{N}(i)} \alpha_{i,j} \mathbf{W}_2 \vec{x}_j \right)}_{=\mathbf{m}_i} with :math:`\beta_i = \textrm{sigmoid}(\mathbf{w}_5^{\top} [ \mathbf{W}_1 \mathbf{x}_i, \mathbf{m}_i, \mathbf{W}_1 \mathbf{x}_i - \mathbf{m}_i ])` (default: :obj:`False`) dropout (float, optional): Dropout probability of the normalized attention coefficients which exposes each node to a stochastically sampled neighborhood during training. (default: :obj:`0`) edge_dim (int, optional): Edge feature dimensionality (in case there are any). Edge features are added to the keys after linear transformation, that is, prior to computing the attention dot product. They are also added to final values after the same linear transformation. The model is: .. math:: \mathbf{x}^{\prime}_i = \mathbf{W}_1 \mathbf{x}_i + \sum_{j \in \mathcal{N}(i)} \alpha_{i,j} \left( \mathbf{W}_2 \mathbf{x}_{j} + \mathbf{W}_6 \mathbf{e}_{ij} \right), where the attention coefficients :math:`\alpha_{i,j}` are now computed via: .. math:: \alpha_{i,j} = \textrm{softmax} \left( \frac{(\mathbf{W}_3\mathbf{x}_i)^{\top} (\mathbf{W}_4\mathbf{x}_j + \mathbf{W}_6 \mathbf{e}_{ij})} {\sqrt{d}} \right) (default :obj:`None`) bias (bool, optional): If set to :obj:`False`, the layer will not learn an additive bias. (default: :obj:`True`) root_weight (bool, optional): If set to :obj:`False`, the layer will not add the transformed root node features to the output and the option :attr:`beta` is set to :obj:`False`. (default: :obj:`True`) **kwargs (optional): Additional arguments of :class:`torch_geometric.nn.conv.MessagePassing`. """ _alpha: OptTensor def __init__( self, in_channels: Union[int, Tuple[int, int]], out_channels: int, heads: int = 1, concat: bool = True, beta: bool = False, dropout: float = 0., edge_dim: Optional[int] = None, bias: bool = True, root_weight: bool = True, **kwargs, ): kwargs.setdefault('aggr', 'add') super(TransformerConv, self).__init__(node_dim=0, **kwargs) self.in_channels = in_channels self.out_channels = out_channels self.heads = heads self.beta = beta and root_weight self.root_weight = root_weight self.concat = concat self.dropout = dropout self.edge_dim = edge_dim self._alpha = None if isinstance(in_channels, int): in_channels = (in_channels, in_channels) self.lin_key = Linear(in_channels[0], heads * out_channels) self.lin_query = Linear(in_channels[1], heads * out_channels) self.lin_value = Linear(in_channels[0], heads * out_channels) if edge_dim is not None: self.lin_edge = Linear(edge_dim, heads * out_channels, bias=False) else: self.lin_edge = self.register_parameter('lin_edge', None) if concat: self.lin_skip = Linear(in_channels[1], heads * out_channels, bias=bias) if self.beta: self.lin_beta = Linear(3 * heads * out_channels, 1, bias=False) else: self.lin_beta = self.register_parameter('lin_beta', None) else: self.lin_skip = Linear(in_channels[1], out_channels, bias=bias) if self.beta: self.lin_beta = Linear(3 * out_channels, 1, bias=False) else: self.lin_beta = self.register_parameter('lin_beta', None) self.reset_parameters() def reset_parameters(self): self.lin_key.reset_parameters() self.lin_query.reset_parameters() self.lin_value.reset_parameters() if self.edge_dim: self.lin_edge.reset_parameters() self.lin_skip.reset_parameters() if self.beta: self.lin_beta.reset_parameters() def forward(self, x: Union[Tensor, PairTensor], edge_index: Adj, edge_attr: OptTensor = None, return_attention_weights=None): # type: (Union[Tensor, PairTensor], Tensor, OptTensor, NoneType) -> Tensor # noqa # type: (Union[Tensor, PairTensor], SparseTensor, OptTensor, NoneType) -> Tensor # noqa # type: (Union[Tensor, PairTensor], Tensor, OptTensor, bool) -> Tuple[Tensor, Tuple[Tensor, Tensor]] # noqa # type: (Union[Tensor, PairTensor], SparseTensor, OptTensor, bool) -> Tuple[Tensor, SparseTensor] # noqa r""" Args: return_attention_weights (bool, optional): If set to :obj:`True`, will additionally return the tuple :obj:`(edge_index, attention_weights)`, holding the computed attention weights for each edge. (default: :obj:`None`) """ H, C = self.heads, self.out_channels if isinstance(x, Tensor): x: PairTensor = (x, x) query = self.lin_query(x[1]).view(-1, H, C) key = self.lin_key(x[0]).view(-1, H, C) value = self.lin_value(x[0]).view(-1, H, C) # propagate_type: (query: Tensor, key:Tensor, value: Tensor, edge_attr: OptTensor) # noqa out = self.propagate(edge_index, query=query, key=key, value=value, edge_attr=edge_attr, size=None) alpha = self._alpha self._alpha = None if self.concat: out = out.view(-1, self.heads * self.out_channels) else: out = out.mean(dim=1) if self.root_weight: x_r = self.lin_skip(x[1]) if self.lin_beta is not None: beta = self.lin_beta(torch.cat([out, x_r, out - x_r], dim=-1)) beta = beta.sigmoid() out = beta * x_r + (1 - beta) * out else: out += x_r if isinstance(return_attention_weights, bool): assert alpha is not None if isinstance(edge_index, Tensor): return out, (edge_index, alpha) elif isinstance(edge_index, SparseTensor): return out, edge_index.set_value(alpha, layout='coo') else: return out def message(self, query_i: Tensor, key_j: Tensor, value_j: Tensor, edge_attr: OptTensor, index: Tensor, ptr: OptTensor, size_i: Optional[int]) -> Tensor: if self.lin_edge is not None: assert edge_attr is not None edge_attr = self.lin_edge(edge_attr).view(-1, self.heads, self.out_channels) key_j += edge_attr alpha = (query_i * key_j).sum(dim=-1) / math.sqrt(self.out_channels) alpha = softmax(alpha, index, ptr, size_i) self._alpha = alpha alpha = F.dropout(alpha, p=self.dropout, training=self.training) out = value_j if edge_attr is not None: out += edge_attr out *= alpha.view(-1, self.heads, 1) return out def __repr__(self) -> str: return (f'{self.__class__.__name__}({self.in_channels}, ' f'{self.out_channels}, heads={self.heads})')
class ResGatedGraphConv(MessagePassing): r"""The residual gated graph convolutional operator from the `"Residual Gated Graph ConvNets" <https://arxiv.org/abs/1711.07553>`_ paper .. math:: \mathbf{x}^{\prime}_i = \mathbf{W}_1 \mathbf{x}_i + \sum_{j \in \mathcal{N}(i)} \eta_{i,j} \odot \mathbf{W}_2 \mathbf{x}_j where the gate :math:`\eta_{i,j}` is defined as .. math:: \eta_{i,j} = \sigma(\mathbf{W}_3 \mathbf{x}_i + \mathbf{W}_4 \mathbf{x}_j) with :math:`\sigma` denoting the sigmoid function. Args: in_channels (int or tuple): Size of each input sample, or :obj:`-1` to derive the size from the first input(s) to the forward method. A tuple corresponds to the sizes of source and target dimensionalities. out_channels (int): Size of each output sample. act (callable, optional): Gating function :math:`\sigma`. (default: :meth:`torch.nn.Sigmoid()`) bias (bool, optional): If set to :obj:`False`, the layer will not learn an additive bias. (default: :obj:`True`) root_weight (bool, optional): If set to :obj:`False`, the layer will not add transformed root node features to the output. (default: :obj:`True`) **kwargs (optional): Additional arguments of :class:`torch_geometric.nn.conv.MessagePassing`. """ def __init__( self, in_channels: Union[int, Tuple[int, int]], out_channels: int, act: Optional[Callable] = Sigmoid(), root_weight: bool = True, bias: bool = True, **kwargs, ): kwargs.setdefault('aggr', 'add') super(ResGatedGraphConv, self).__init__(**kwargs) self.in_channels = in_channels self.out_channels = out_channels self.act = act self.root_weight = root_weight if isinstance(in_channels, int): in_channels = (in_channels, in_channels) self.lin_key = Linear(in_channels[1], out_channels) self.lin_query = Linear(in_channels[0], out_channels) self.lin_value = Linear(in_channels[0], out_channels) if root_weight: self.lin_skip = Linear(in_channels[1], out_channels, bias=False) else: self.register_parameter('lin_skip', None) if bias: self.bias = Parameter(Tensor(out_channels)) else: self.register_parameter('bias', None) self.reset_parameters() def reset_parameters(self): self.lin_key.reset_parameters() self.lin_query.reset_parameters() self.lin_value.reset_parameters() if self.lin_skip is not None: self.lin_skip.reset_parameters() if self.bias is not None: zeros(self.bias) def forward(self, x: Union[Tensor, PairTensor], edge_index: Adj) -> Tensor: """""" if isinstance(x, Tensor): x: PairTensor = (x, x) k = self.lin_key(x[1]) q = self.lin_query(x[0]) v = self.lin_value(x[0]) # propagate_type: (k: Tensor, q: Tensor, v: Tensor) out = self.propagate(edge_index, k=k, q=q, v=v, size=None) if self.root_weight: out += self.lin_skip(x[1]) if self.bias is not None: out += self.bias return out def message(self, k_i: Tensor, q_j: Tensor, v_j: Tensor) -> Tensor: return self.act(k_i + q_j) * v_j def __repr__(self): return '{}({}, {})'.format(self.__class__.__name__, self.in_channels, self.out_channels)
class FiLMConv(MessagePassing): r"""The FiLM graph convolutional operator from the `"GNN-FiLM: Graph Neural Networks with Feature-wise Linear Modulation" <https://arxiv.org/abs/1906.12192>`_ paper .. math:: \mathbf{x}^{\prime}_i = \sum_{r \in \mathcal{R}} \sum_{j \in \mathcal{N}(i)} \sigma \left( \boldsymbol{\gamma}_{r,i} \odot \mathbf{W}_r \mathbf{x}_j + \boldsymbol{\beta}_{r,i} \right) where :math:`\boldsymbol{\beta}_{r,i}, \boldsymbol{\gamma}_{r,i} = g(\mathbf{x}_i)` with :math:`g` being a single linear layer by default. Self-loops are automatically added to the input graph and represented as its own relation type. .. note:: For an example of using FiLM, see `examples/gcn.py <https://github.com/pyg-team/pytorch_geometric/blob/master/examples/ film.py>`_. Args: in_channels (int or tuple): Size of each input sample, or :obj:`-1` to derive the size from the first input(s) to the forward method. A tuple corresponds to the sizes of source and target dimensionalities. out_channels (int): Size of each output sample. num_relations (int, optional): Number of relations. (default: :obj:`1`) nn (torch.nn.Module, optional): The neural network :math:`g` that maps node features :obj:`x_i` of shape :obj:`[-1, in_channels]` to shape :obj:`[-1, 2 * out_channels]`. If set to :obj:`None`, :math:`g` will be implemented as a single linear layer. (default: :obj:`None`) act (callable, optional): Activation function :math:`\sigma`. (default: :meth:`torch.nn.ReLU()`) aggr (string, optional): The aggregation scheme to use (:obj:`"add"`, :obj:`"mean"`, :obj:`"max"`). (default: :obj:`"mean"`) **kwargs (optional): Additional arguments of :class:`torch_geometric.nn.conv.MessagePassing`. Shapes: - **input:** node features :math:`(|\mathcal{V}|, F_{in})` or :math:`((|\mathcal{V_s}|, F_{s}), (|\mathcal{V_t}|, F_{t}))` if bipartite, edge indices :math:`(2, |\mathcal{E}|)`, edge types :math:`(|\mathcal{E}|)` - **output:** node features :math:`(|\mathcal{V}|, F_{out})` or :math:`(|\mathcal{V_t}|, F_{out})` if bipartite """ def __init__( self, in_channels: Union[int, Tuple[int, int]], out_channels: int, num_relations: int = 1, nn: Optional[Callable] = None, act: Optional[Callable] = ReLU(), aggr: str = 'mean', **kwargs, ): super().__init__(aggr=aggr, **kwargs) self.in_channels = in_channels self.out_channels = out_channels self.num_relations = max(num_relations, 1) self.act = act if isinstance(in_channels, int): in_channels = (in_channels, in_channels) self.lins = ModuleList() self.films = ModuleList() for _ in range(num_relations): self.lins.append(Linear(in_channels[0], out_channels, bias=False)) if nn is None: film = Linear(in_channels[1], 2 * out_channels) else: film = copy.deepcopy(nn) self.films.append(film) self.lin_skip = Linear(in_channels[1], self.out_channels, bias=False) if nn is None: self.film_skip = Linear(in_channels[1], 2 * self.out_channels, bias=False) else: self.film_skip = copy.deepcopy(nn) self.reset_parameters() def reset_parameters(self): for lin, film in zip(self.lins, self.films): lin.reset_parameters() reset(film) self.lin_skip.reset_parameters() reset(self.film_skip) def forward(self, x: Union[Tensor, PairTensor], edge_index: Adj, edge_type: OptTensor = None) -> Tensor: """""" if isinstance(x, Tensor): x: PairTensor = (x, x) beta, gamma = self.film_skip(x[1]).split(self.out_channels, dim=-1) out = gamma * self.lin_skip(x[1]) + beta if self.act is not None: out = self.act(out) # propagate_type: (x: Tensor, beta: Tensor, gamma: Tensor) if self.num_relations <= 1: beta, gamma = self.films[0](x[1]).split(self.out_channels, dim=-1) out = out + self.propagate(edge_index, x=self.lins[0](x[0]), beta=beta, gamma=gamma, size=None) else: for i, (lin, film) in enumerate(zip(self.lins, self.films)): beta, gamma = film(x[1]).split(self.out_channels, dim=-1) if isinstance(edge_index, SparseTensor): edge_type = edge_index.storage.value() assert edge_type is not None mask = edge_type == i out = out + self.propagate(masked_select_nnz( edge_index, mask, layout='coo'), x=lin(x[0]), beta=beta, gamma=gamma, size=None) else: assert edge_type is not None mask = edge_type == i out = out + self.propagate(edge_index[:, mask], x=lin(x[0]), beta=beta, gamma=gamma, size=None) return out def message(self, x_j: Tensor, beta_i: Tensor, gamma_i: Tensor) -> Tensor: out = gamma_i * x_j + beta_i if self.act is not None: out = self.act(out) return out def __repr__(self) -> str: return (f'{self.__class__.__name__}({self.in_channels}, ' f'{self.out_channels}, num_relations={self.num_relations})')
class LEConv(MessagePassing): r"""The local extremum graph neural network operator from the `"ASAP: Adaptive Structure Aware Pooling for Learning Hierarchical Graph Representations" <https://arxiv.org/abs/1911.07979>`_ paper, which finds the importance of nodes with respect to their neighbors using the difference operator: .. math:: \mathbf{x}^{\prime}_i = \mathbf{x}_i \cdot \mathbf{\Theta}_1 + \sum_{j \in \mathcal{N}(i)} e_{j,i} \cdot (\mathbf{\Theta}_2 \mathbf{x}_i - \mathbf{\Theta}_3 \mathbf{x}_j) where :math:`e_{j,i}` denotes the edge weight from source node :obj:`j` to target node :obj:`i` (default: :obj:`1`) Args: in_channels (int or tuple): Size of each input sample, or :obj:`-1` to derive the size from the first input(s) to the forward method. A tuple corresponds to the sizes of source and target dimensionalities. out_channels (int): Size of each output sample. bias (bool, optional): If set to :obj:`False`, the layer will not learn an additive bias. (default: :obj:`True`). **kwargs (optional): Additional arguments of :class:`torch_geometric.nn.conv.MessagePassing`. Shapes: - **input:** node features :math:`(|\mathcal{V}|, F_{in})` or :math:`((|\mathcal{V_s}|, F_{s}), (|\mathcal{V_t}|, F_{t}))` if bipartite, edge indices :math:`(2, |\mathcal{E}|)`, edge features :math:`(|\mathcal{E}|, D)` *(optional)* - **output:** node features :math:`(|\mathcal{V}|, F_{out})` or :math:`(|\mathcal{V}_t|, F_{out})` if bipartite """ def __init__(self, in_channels: Union[int, Tuple[int, int]], out_channels: int, bias: bool = True, **kwargs): kwargs.setdefault('aggr', 'add') super().__init__(**kwargs) self.in_channels = in_channels self.out_channels = out_channels if isinstance(in_channels, int): in_channels = (in_channels, in_channels) self.lin1 = Linear(in_channels[0], out_channels, bias=bias) self.lin2 = Linear(in_channels[1], out_channels, bias=False) self.lin3 = Linear(in_channels[1], out_channels, bias=bias) self.reset_parameters() def reset_parameters(self): self.lin1.reset_parameters() self.lin2.reset_parameters() self.lin3.reset_parameters() def forward(self, x: Union[Tensor, PairTensor], edge_index: Adj, edge_weight: OptTensor = None) -> Tensor: """""" if isinstance(x, Tensor): x = (x, x) a = self.lin1(x[0]) b = self.lin2(x[1]) # propagate_type: (a: Tensor, b: Tensor, edge_weight: OptTensor) out = self.propagate(edge_index, a=a, b=b, edge_weight=edge_weight, size=None) return out + self.lin3(x[1]) def message(self, a_j: Tensor, b_i: Tensor, edge_weight: OptTensor) -> Tensor: out = a_j - b_i return out if edge_weight is None else out * edge_weight.view(-1, 1) def __repr__(self) -> str: return (f'{self.__class__.__name__}({self.in_channels}, ' f'{self.out_channels})')
class PNAConv(MessagePassing): r"""The Principal Neighbourhood Aggregation graph convolution operator from the `"Principal Neighbourhood Aggregation for Graph Nets" <https://arxiv.org/abs/2004.05718>`_ paper .. math:: \mathbf{x}_i^{\prime} = \gamma_{\mathbf{\Theta}} \left( \mathbf{x}_i, \underset{j \in \mathcal{N}(i)}{\bigoplus} h_{\mathbf{\Theta}} \left( \mathbf{x}_i, \mathbf{x}_j \right) \right) with .. math:: \bigoplus = \underbrace{\begin{bmatrix} 1 \\ S(\mathbf{D}, \alpha=1) \\ S(\mathbf{D}, \alpha=-1) \end{bmatrix} }_{\text{scalers}} \otimes \underbrace{\begin{bmatrix} \mu \\ \sigma \\ \max \\ \min \end{bmatrix}}_{\text{aggregators}}, where :math:`\gamma_{\mathbf{\Theta}}` and :math:`h_{\mathbf{\Theta}}` denote MLPs. .. note:: For an example of using :obj:`PNAConv`, see `examples/pna.py <https://github.com/pyg-team/pytorch_geometric/blob/master/ examples/pna.py>`_. Args: in_channels (int): Size of each input sample, or :obj:`-1` to derive the size from the first input(s) to the forward method. out_channels (int): Size of each output sample. aggregators (list of str): Set of aggregation function identifiers, namely :obj:`"sum"`, :obj:`"mean"`, :obj:`"min"`, :obj:`"max"`, :obj:`"var"` and :obj:`"std"`. scalers (list of str): Set of scaling function identifiers, namely :obj:`"identity"`, :obj:`"amplification"`, :obj:`"attenuation"`, :obj:`"linear"` and :obj:`"inverse_linear"`. deg (Tensor): Histogram of in-degrees of nodes in the training set, used by scalers to normalize. edge_dim (int, optional): Edge feature dimensionality (in case there are any). (default :obj:`None`) towers (int, optional): Number of towers (default: :obj:`1`). pre_layers (int, optional): Number of transformation layers before aggregation (default: :obj:`1`). post_layers (int, optional): Number of transformation layers after aggregation (default: :obj:`1`). divide_input (bool, optional): Whether the input features should be split between towers or not (default: :obj:`False`). act (str or Callable, optional): Pre- and post-layer activation function to use. (default: :obj:`"relu"`) act_kwargs (Dict[str, Any], optional): Arguments passed to the respective activation function defined by :obj:`act`. (default: :obj:`None`) **kwargs (optional): Additional arguments of :class:`torch_geometric.nn.conv.MessagePassing`. Shapes: - **input:** node features :math:`(|\mathcal{V}|, F_{in})`, edge indices :math:`(2, |\mathcal{E}|)`, edge features :math:`(|\mathcal{E}|, D)` *(optional)* - **output:** node features :math:`(|\mathcal{V}|, F_{out})` """ def __init__( self, in_channels: int, out_channels: int, aggregators: List[str], scalers: List[str], deg: Tensor, edge_dim: Optional[int] = None, towers: int = 1, pre_layers: int = 1, post_layers: int = 1, divide_input: bool = False, act: Union[str, Callable, None] = "relu", act_kwargs: Optional[Dict[str, Any]] = None, **kwargs, ): aggr = DegreeScalerAggregation(aggregators, scalers, deg) super().__init__(aggr=aggr, node_dim=0, **kwargs) if divide_input: assert in_channels % towers == 0 assert out_channels % towers == 0 self.in_channels = in_channels self.out_channels = out_channels self.edge_dim = edge_dim self.towers = towers self.divide_input = divide_input self.F_in = in_channels // towers if divide_input else in_channels self.F_out = self.out_channels // towers if self.edge_dim is not None: self.edge_encoder = Linear(edge_dim, self.F_in) self.pre_nns = ModuleList() self.post_nns = ModuleList() for _ in range(towers): modules = [Linear((3 if edge_dim else 2) * self.F_in, self.F_in)] for _ in range(pre_layers - 1): modules += [activation_resolver(act, **(act_kwargs or {}))] modules += [Linear(self.F_in, self.F_in)] self.pre_nns.append(Sequential(*modules)) in_channels = (len(aggregators) * len(scalers) + 1) * self.F_in modules = [Linear(in_channels, self.F_out)] for _ in range(post_layers - 1): modules += [activation_resolver(act, **(act_kwargs or {}))] modules += [Linear(self.F_out, self.F_out)] self.post_nns.append(Sequential(*modules)) self.lin = Linear(out_channels, out_channels) self.reset_parameters() def reset_parameters(self): if self.edge_dim is not None: self.edge_encoder.reset_parameters() for nn in self.pre_nns: reset(nn) for nn in self.post_nns: reset(nn) self.lin.reset_parameters() def forward(self, x: Tensor, edge_index: Adj, edge_attr: OptTensor = None) -> Tensor: """""" if self.divide_input: x = x.view(-1, self.towers, self.F_in) else: x = x.view(-1, 1, self.F_in).repeat(1, self.towers, 1) # propagate_type: (x: Tensor, edge_attr: OptTensor) out = self.propagate(edge_index, x=x, edge_attr=edge_attr, size=None) out = torch.cat([x, out], dim=-1) outs = [nn(out[:, i]) for i, nn in enumerate(self.post_nns)] out = torch.cat(outs, dim=1) return self.lin(out) def message(self, x_i: Tensor, x_j: Tensor, edge_attr: OptTensor) -> Tensor: h: Tensor = x_i # Dummy. if edge_attr is not None: edge_attr = self.edge_encoder(edge_attr) edge_attr = edge_attr.view(-1, 1, self.F_in) edge_attr = edge_attr.repeat(1, self.towers, 1) h = torch.cat([x_i, x_j, edge_attr], dim=-1) else: h = torch.cat([x_i, x_j], dim=-1) hs = [nn(h[:, i]) for i, nn in enumerate(self.pre_nns)] return torch.stack(hs, dim=1) def __repr__(self): return (f'{self.__class__.__name__}({self.in_channels}, ' f'{self.out_channels}, towers={self.towers}, ' f'edge_dim={self.edge_dim})') @staticmethod def get_degree_histogram(loader) -> Tensor: max_degree = 0 for data in loader: d = degree(data.edge_index[1], num_nodes=data.num_nodes, dtype=torch.long) max_degree = max(max_degree, int(d.max())) # Compute the in-degree histogram tensor deg_histogram = torch.zeros(max_degree + 1, dtype=torch.long) for data in loader: d = degree(data.edge_index[1], num_nodes=data.num_nodes, dtype=torch.long) deg_histogram += torch.bincount(d, minlength=deg_histogram.numel()) return deg_histogram
class PANConv(MessagePassing): r"""The path integral based convolutional operator from the `"Path Integral Based Convolution and Pooling for Graph Neural Networks" <https://arxiv.org/abs/2006.16811>`_ paper .. math:: \mathbf{X}^{\prime} = \mathbf{M} \mathbf{X} \mathbf{W} where :math:`\mathbf{M}` denotes the normalized and learned maximal entropy transition (MET) matrix that includes neighbors up to :obj:`filter_size` hops: .. math:: \mathbf{M} = \mathbf{Z}^{-1/2} \sum_{n=0}^L e^{-\frac{E(n)}{T}} \mathbf{A}^n \mathbf{Z}^{-1/2} Args: in_channels (int): Size of each input sample, or :obj:`-1` to derive the size from the first input(s) to the forward method. out_channels (int): Size of each output sample. filter_size (int): The filter size :math:`L`. **kwargs (optional): Additional arguments of :class:`torch_geometric.nn.conv.MessagePassing`. """ def __init__(self, in_channels: int, out_channels: int, filter_size: int, **kwargs): kwargs.setdefault('aggr', 'add') super(PANConv, self).__init__(**kwargs) self.in_channels = in_channels self.out_channels = out_channels self.filter_size = filter_size self.lin = Linear(in_channels, out_channels) self.weight = Parameter(torch.Tensor(filter_size + 1)) self.reset_parameters() def reset_parameters(self): self.lin.reset_parameters() self.weight.data.fill_(0.5) def forward(self, x: Tensor, edge_index: Adj) -> Tuple[Tensor, SparseTensor]: """""" adj_t: Optional[SparseTensor] = None if isinstance(edge_index, Tensor): adj_t = SparseTensor(row=edge_index[1], col=edge_index[0], sparse_sizes=(x.size(0), x.size(0))) elif isinstance(edge_index, SparseTensor): adj_t = edge_index.set_value(None) assert adj_t is not None adj_t = self.panentropy(adj_t, dtype=x.dtype) deg = adj_t.storage.rowcount().to(x.dtype) deg_inv_sqrt = deg.pow_(-0.5) deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0. M = deg_inv_sqrt.view(1, -1) * adj_t * deg_inv_sqrt.view(-1, 1) out = self.propagate(M, x=x, edge_weight=None, size=None) out = self.lin(out) return out, M def message(self, x_j: Tensor, edge_weight: Tensor) -> Tensor: return edge_weight.view(-1, 1) * x_j def message_and_aggregate(self, adj_t: SparseTensor, x: Tensor) -> Tensor: return matmul(adj_t, x, reduce=self.aggr) def panentropy(self, adj_t: SparseTensor, dtype: Optional[int] = None) -> SparseTensor: tmp = SparseTensor.eye(adj_t.size(0), adj_t.size(1), has_value=True, dtype=dtype, device=adj_t.device()) tmp = tmp.mul_nnz(self.weight[0], layout='coo') outs = [tmp] for i in range(1, self.filter_size + 1): tmp = tmp @ adj_t tmp = tmp.mul_nnz(self.weight[i], layout='coo') outs += [tmp] row = torch.cat([out.storage.row() for out in outs], dim=0) col = torch.cat([out.storage.col() for out in outs], dim=0) value = torch.cat([out.storage.value() for out in outs], dim=0) out = SparseTensor(row=row, col=col, value=value, sparse_sizes=adj_t.sparse_sizes()).coalesce() return out def __repr__(self): return '{}({}, {}, filter_size={})'.format(self.__class__.__name__, self.in_channels, self.out_channels, self.filter_size)
class FAConv(MessagePassing): r"""The Frequency Adaptive Graph Convolution operator from the `"Beyond Low-Frequency Information in Graph Convolutional Networks" <https://arxiv.org/abs/2101.00797>`_ paper .. math:: \mathbf{x}^{\prime}_i= \epsilon \cdot \mathbf{x}^{(0)}_i + \sum_{j \in \mathcal{N}(i)} \frac{\alpha_{i,j}}{\sqrt{d_i d_j}} \mathbf{x}_{j} where :math:`\mathbf{x}^{(0)}_i` and :math:`d_i` denote the initial feature representation and node degree of node :math:`i`, respectively. The attention coefficients :math:`\alpha_{i,j}` are computed as .. math:: \mathbf{\alpha}_{i,j} = \textrm{tanh}(\mathbf{a}^{\top}[\mathbf{x}_i, \mathbf{x}_j]) based on the trainable parameter vector :math:`\mathbf{a}`. Args: channels (int): Size of each input sample, or :obj:`-1` to derive the size from the first input(s) to the forward method. eps (float, optional): :math:`\epsilon`-value. (default: :obj:`0.1`) dropout (float, optional): Dropout probability of the normalized coefficients which exposes each node to a stochastically sampled neighborhood during training. (default: :obj:`0`). cached (bool, optional): If set to :obj:`True`, the layer will cache the computation of :math:`\sqrt{d_i d_j}` on first execution, and will use the cached version for further executions. This parameter should only be set to :obj:`True` in transductive learning scenarios. (default: :obj:`False`) add_self_loops (bool, optional): If set to :obj:`False`, will not add self-loops to the input graph. (default: :obj:`True`) normalize (bool, optional): Whether to add self-loops (if :obj:`add_self_loops` is :obj:`True`) and compute symmetric normalization coefficients on the fly. If set to :obj:`False`, :obj:`edge_weight` needs to be provided in the layer's :meth:`forward` method. (default: :obj:`True`) **kwargs (optional): Additional arguments of :class:`torch_geometric.nn.conv.MessagePassing`. """ _cached_edge_index: Optional[Tuple[Tensor, Tensor]] _cached_adj_t: Optional[SparseTensor] _alpha: OptTensor def __init__(self, channels: int, eps: float = 0.1, dropout: float = 0.0, cached: bool = False, add_self_loops: bool = True, normalize: bool = True, **kwargs): kwargs.setdefault('aggr', 'add') super(FAConv, self).__init__(**kwargs) self.channels = channels self.eps = eps self.dropout = dropout self.cached = cached self.add_self_loops = add_self_loops self.normalize = normalize self._cached_edge_index = None self._cached_adj_t = None self._alpha = None self.att_l = Linear(channels, 1, bias=False) self.att_r = Linear(channels, 1, bias=False) self.reset_parameters() def reset_parameters(self): self.att_l.reset_parameters() self.att_r.reset_parameters() self._cached_edge_index = None self._cached_adj_t = None def forward(self, x: Tensor, x_0: Tensor, edge_index: Adj, edge_weight: OptTensor = None, return_attention_weights=None): # type: (Tensor, Tensor, Tensor, OptTensor, NoneType) -> Tensor # noqa # type: (Tensor, Tensor, SparseTensor, OptTensor, NoneType) -> Tensor # noqa # type: (Tensor, Tensor, Tensor, OptTensor, bool) -> Tuple[Tensor, Tuple[Tensor, Tensor]] # noqa # type: (Tensor, Tensor, SparseTensor, OptTensor, bool) -> Tuple[Tensor, SparseTensor] # noqa r""" Args: return_attention_weights (bool, optional): If set to :obj:`True`, will additionally return the tuple :obj:`(edge_index, attention_weights)`, holding the computed attention weights for each edge. (default: :obj:`None`) """ if self.normalize: if isinstance(edge_index, Tensor): assert edge_weight is None cache = self._cached_edge_index if cache is None: edge_index, edge_weight = gcn_norm( # yapf: disable edge_index, None, x.size(self.node_dim), False, self.add_self_loops, dtype=x.dtype) if self.cached: self._cached_edge_index = (edge_index, edge_weight) else: edge_index, edge_weight = cache[0], cache[1] elif isinstance(edge_index, SparseTensor): assert not edge_index.has_value() cache = self._cached_adj_t if cache is None: edge_index = gcn_norm( # yapf: disable edge_index, None, x.size(self.node_dim), False, self.add_self_loops, dtype=x.dtype) if self.cached: self._cached_adj_t = edge_index else: edge_index = cache else: if isinstance(edge_index, Tensor): assert edge_weight is not None elif isinstance(edge_index, SparseTensor): assert edge_index.has_value() alpha_l = self.att_l(x) alpha_r = self.att_r(x) # propagate_type: (x: Tensor, alpha: PairTensor, edge_weight: OptTensor) # noqa out = self.propagate(edge_index, x=x, alpha=(alpha_l, alpha_r), edge_weight=edge_weight, size=None) alpha = self._alpha self._alpha = None if self.eps != 0.0: out += self.eps * x_0 if isinstance(return_attention_weights, bool): assert alpha is not None if isinstance(edge_index, Tensor): return out, (edge_index, alpha) elif isinstance(edge_index, SparseTensor): return out, edge_index.set_value(alpha, layout='coo') else: return out def message(self, x_j: Tensor, alpha_j: Tensor, alpha_i: Tensor, edge_weight: OptTensor) -> Tensor: assert edge_weight is not None alpha = (alpha_j + alpha_i).tanh().squeeze(-1) self._alpha = alpha alpha = F.dropout(alpha, p=self.dropout, training=self.training) return x_j * (alpha * edge_weight).view(-1, 1) def __repr__(self): return '{}({}, eps={})'.format(self.__class__.__name__, self.channels, self.eps)
class GATConv(MessagePassing): r"""The graph attentional operator from the `"Graph Attention Networks" <https://arxiv.org/abs/1710.10903>`_ paper .. math:: \mathbf{x}^{\prime}_i = \alpha_{i,i}\mathbf{\Theta}\mathbf{x}_{i} + \sum_{j \in \mathcal{N}(i)} \alpha_{i,j}\mathbf{\Theta}\mathbf{x}_{j}, where the attention coefficients :math:`\alpha_{i,j}` are computed as .. math:: \alpha_{i,j} = \frac{ \exp\left(\mathrm{LeakyReLU}\left(\mathbf{a}^{\top} [\mathbf{\Theta}\mathbf{x}_i \, \Vert \, \mathbf{\Theta}\mathbf{x}_j] \right)\right)} {\sum_{k \in \mathcal{N}(i) \cup \{ i \}} \exp\left(\mathrm{LeakyReLU}\left(\mathbf{a}^{\top} [\mathbf{\Theta}\mathbf{x}_i \, \Vert \, \mathbf{\Theta}\mathbf{x}_k] \right)\right)}. If the graph has multi-dimensional edge features :math:`\mathbf{e}_{i,j}`, the attention coefficients :math:`\alpha_{i,j}` are computed as .. math:: \alpha_{i,j} = \frac{ \exp\left(\mathrm{LeakyReLU}\left(\mathbf{a}^{\top} [\mathbf{\Theta}\mathbf{x}_i \, \Vert \, \mathbf{\Theta}\mathbf{x}_j \, \Vert \, \mathbf{\Theta}_{e} \mathbf{e}_{i,j}]\right)\right)} {\sum_{k \in \mathcal{N}(i) \cup \{ i \}} \exp\left(\mathrm{LeakyReLU}\left(\mathbf{a}^{\top} [\mathbf{\Theta}\mathbf{x}_i \, \Vert \, \mathbf{\Theta}\mathbf{x}_k \, \Vert \, \mathbf{\Theta}_{e} \mathbf{e}_{i,k}]\right)\right)}. Args: in_channels (int or tuple): Size of each input sample, or :obj:`-1` to derive the size from the first input(s) to the forward method. A tuple corresponds to the sizes of source and target dimensionalities. out_channels (int): Size of each output sample. heads (int, optional): Number of multi-head-attentions. (default: :obj:`1`) concat (bool, optional): If set to :obj:`False`, the multi-head attentions are averaged instead of concatenated. (default: :obj:`True`) negative_slope (float, optional): LeakyReLU angle of the negative slope. (default: :obj:`0.2`) dropout (float, optional): Dropout probability of the normalized attention coefficients which exposes each node to a stochastically sampled neighborhood during training. (default: :obj:`0`) add_self_loops (bool, optional): If set to :obj:`False`, will not add self-loops to the input graph. (default: :obj:`True`) edge_dim (int, optional): Edge feature dimensionality (in case there are any). (default: :obj:`None`) fill_value (float or Tensor or str, optional): The way to generate edge features of self-loops (in case :obj:`edge_dim != None`). If given as :obj:`float` or :class:`torch.Tensor`, edge features of self-loops will be directly given by :obj:`fill_value`. If given as :obj:`str`, edge features of self-loops are computed by aggregating all features of edges that point to the specific node, according to a reduce operation. (:obj:`"add"`, :obj:`"mean"`, :obj:`"min"`, :obj:`"max"`, :obj:`"mul"`). (default: :obj:`"mean"`) bias (bool, optional): If set to :obj:`False`, the layer will not learn an additive bias. (default: :obj:`True`) **kwargs (optional): Additional arguments of :class:`torch_geometric.nn.conv.MessagePassing`. Shapes: - **input:** node features :math:`(|\mathcal{V}|, F_{in})` or :math:`((|\mathcal{V_s}|, F_{s}), (|\mathcal{V_t}|, F_{t}))` if bipartite, edge indices :math:`(2, |\mathcal{E}|)`, edge features :math:`(|\mathcal{E}|, D)` *(optional)* - **output:** node features :math:`(|\mathcal{V}|, H * F_{out})` or :math:`((|\mathcal{V}_t|, H * F_{out})` if bipartite. If :obj:`return_attention_weights=True`, then :math:`((|\mathcal{V}|, H * F_{out}), ((2, |\mathcal{E}|), (|\mathcal{E}|, H)))` or :math:`((|\mathcal{V_t}|, H * F_{out}), ((2, |\mathcal{E}|), (|\mathcal{E}|, H)))` if bipartite """ _alpha: OptTensor def __init__( self, in_channels: Union[int, Tuple[int, int]], out_channels: int, heads: int = 1, concat: bool = True, negative_slope: float = 0.2, dropout: float = 0.0, add_self_loops: bool = True, edge_dim: Optional[int] = None, fill_value: Union[float, Tensor, str] = 'mean', bias: bool = True, **kwargs, ): kwargs.setdefault('aggr', 'add') super().__init__(node_dim=0, **kwargs) self.in_channels = in_channels self.out_channels = out_channels self.heads = heads self.concat = concat self.negative_slope = negative_slope self.dropout = dropout self.add_self_loops = add_self_loops self.edge_dim = edge_dim self.fill_value = fill_value # In case we are operating in bipartite graphs, we apply separate # transformations 'lin_src' and 'lin_dst' to source and target nodes: if isinstance(in_channels, int): self.lin_src = Linear(in_channels, heads * out_channels, bias=False, weight_initializer='glorot') self.lin_dst = self.lin_src else: self.lin_src = Linear(in_channels[0], heads * out_channels, False, weight_initializer='glorot') self.lin_dst = Linear(in_channels[1], heads * out_channels, False, weight_initializer='glorot') # The learnable parameters to compute attention coefficients: self.att_src = Parameter(torch.Tensor(1, heads, out_channels)) self.att_dst = Parameter(torch.Tensor(1, heads, out_channels)) if edge_dim is not None: self.lin_edge = Linear(edge_dim, heads * out_channels, bias=False, weight_initializer='glorot') self.att_edge = Parameter(torch.Tensor(1, heads, out_channels)) else: self.lin_edge = None self.register_parameter('att_edge', None) if bias and concat: self.bias = Parameter(torch.Tensor(heads * out_channels)) elif bias and not concat: self.bias = Parameter(torch.Tensor(out_channels)) else: self.register_parameter('bias', None) self._alpha = None self.reset_parameters() def reset_parameters(self): self.lin_src.reset_parameters() self.lin_dst.reset_parameters() if self.lin_edge is not None: self.lin_edge.reset_parameters() glorot(self.att_src) glorot(self.att_dst) glorot(self.att_edge) zeros(self.bias) def forward(self, x: Union[Tensor, OptPairTensor], edge_index: Adj, edge_attr: OptTensor = None, size: Size = None, return_attention_weights=None): # type: (Union[Tensor, OptPairTensor], Tensor, OptTensor, Size, NoneType) -> Tensor # noqa # type: (Union[Tensor, OptPairTensor], SparseTensor, OptTensor, Size, NoneType) -> Tensor # noqa # type: (Union[Tensor, OptPairTensor], Tensor, OptTensor, Size, bool) -> Tuple[Tensor, Tuple[Tensor, Tensor]] # noqa # type: (Union[Tensor, OptPairTensor], SparseTensor, OptTensor, Size, bool) -> Tuple[Tensor, SparseTensor] # noqa r""" Args: return_attention_weights (bool, optional): If set to :obj:`True`, will additionally return the tuple :obj:`(edge_index, attention_weights)`, holding the computed attention weights for each edge. (default: :obj:`None`) """ # NOTE: attention weights will be returned whenever # `return_attention_weights` is set to a value, regardless of its # actual value (might be `True` or `False`). This is a current somewhat # hacky workaround to allow for TorchScript support via the # `torch.jit._overload` decorator, as we can only change the output # arguments conditioned on type (`None` or `bool`), not based on its # actual value. H, C = self.heads, self.out_channels # We first transform the input node features. If a tuple is passed, we # transform source and target node features via separate weights: if isinstance(x, Tensor): assert x.dim() == 2, "Static graphs not supported in 'GATConv'" x_src = x_dst = self.lin_src(x).view(-1, H, C) else: # Tuple of source and target node features: x_src, x_dst = x assert x_src.dim() == 2, "Static graphs not supported in 'GATConv'" x_src = self.lin_src(x_src).view(-1, H, C) if x_dst is not None: x_dst = self.lin_dst(x_dst).view(-1, H, C) x = (x_src, x_dst) # Next, we compute node-level attention coefficients, both for source # and target nodes (if present): alpha_src = (x_src * self.att_src).sum(dim=-1) alpha_dst = None if x_dst is None else (x_dst * self.att_dst).sum(-1) alpha = (alpha_src, alpha_dst) if self.add_self_loops: if isinstance(edge_index, Tensor): # We only want to add self-loops for nodes that appear both as # source and target nodes: num_nodes = x_src.size(0) if x_dst is not None: num_nodes = min(num_nodes, x_dst.size(0)) num_nodes = min(size) if size is not None else num_nodes edge_index, edge_attr = remove_self_loops( edge_index, edge_attr) edge_index, edge_attr = add_self_loops( edge_index, edge_attr, fill_value=self.fill_value, num_nodes=num_nodes) elif isinstance(edge_index, SparseTensor): if self.edge_dim is None: edge_index = set_diag(edge_index) else: raise NotImplementedError( "The usage of 'edge_attr' and 'add_self_loops' " "simultaneously is currently not yet supported for " "'edge_index' in a 'SparseTensor' form") # propagate_type: (x: OptPairTensor, alpha: OptPairTensor, edge_attr: OptTensor) # noqa out = self.propagate(edge_index, x=x, alpha=alpha, edge_attr=edge_attr, size=size) alpha = self._alpha assert alpha is not None self._alpha = None if self.concat: out = out.view(-1, self.heads * self.out_channels) else: out = out.mean(dim=1) if self.bias is not None: out += self.bias if isinstance(return_attention_weights, bool): if isinstance(edge_index, Tensor): return out, (edge_index, alpha) elif isinstance(edge_index, SparseTensor): return out, edge_index.set_value(alpha, layout='coo') else: return out def message(self, x_j: Tensor, alpha_j: Tensor, alpha_i: OptTensor, edge_attr: OptTensor, index: Tensor, ptr: OptTensor, size_i: Optional[int]) -> Tensor: # Given edge-level attention coefficients for source and target nodes, # we simply need to sum them up to "emulate" concatenation: alpha = alpha_j if alpha_i is None else alpha_j + alpha_i if edge_attr is not None: if edge_attr.dim() == 1: edge_attr = edge_attr.view(-1, 1) assert self.lin_edge is not None edge_attr = self.lin_edge(edge_attr) edge_attr = edge_attr.view(-1, self.heads, self.out_channels) alpha_edge = (edge_attr * self.att_edge).sum(dim=-1) alpha = alpha + alpha_edge alpha = F.leaky_relu(alpha, self.negative_slope) alpha = softmax(alpha, index, ptr, size_i) self._alpha = alpha # Save for later use. alpha = F.dropout(alpha, p=self.dropout, training=self.training) return x_j * alpha.unsqueeze(-1) def __repr__(self) -> str: return (f'{self.__class__.__name__}({self.in_channels}, ' f'{self.out_channels}, heads={self.heads})')
class FeaStConv(MessagePassing): r"""The (translation-invariant) feature-steered convolutional operator from the `"FeaStNet: Feature-Steered Graph Convolutions for 3D Shape Analysis" <https://arxiv.org/abs/1706.05206>`_ paper .. math:: \mathbf{x}^{\prime}_i = \frac{1}{|\mathcal{N}(i)|} \sum_{j \in \mathcal{N}(i)} \sum_{h=1}^H q_h(\mathbf{x}_i, \mathbf{x}_j) \mathbf{W}_h \mathbf{x}_j with :math:`q_h(\mathbf{x}_i, \mathbf{x}_j) = \mathrm{softmax}_j (\mathbf{u}_h^{\top} (\mathbf{x}_j - \mathbf{x}_i) + c_h)`, where :math:`H` denotes the number of attention heads, and :math:`\mathbf{W}_h`, :math:`\mathbf{u}_h` and :math:`c_h` are trainable parameters. Args: in_channels (int): Size of each input sample, or :obj:`-1` to derive the size from the first input(s) to the forward method. out_channels (int): Size of each output sample. heads (int, optional): Number of attention heads :math:`H`. (default: :obj:`1`) add_self_loops (bool, optional): If set to :obj:`False`, will not add self-loops to the input graph. (default: :obj:`True`) bias (bool, optional): If set to :obj:`False`, the layer will not learn an additive bias. (default: :obj:`True`) **kwargs (optional): Additional arguments of :class:`torch_geometric.nn.conv.MessagePassing`. """ def __init__(self, in_channels: int, out_channels: int, heads: int = 1, add_self_loops: bool = True, bias: bool = True, **kwargs): kwargs.setdefault('aggr', 'mean') super(FeaStConv, self).__init__(**kwargs) self.in_channels = in_channels self.out_channels = out_channels self.heads = heads self.add_self_loops = add_self_loops self.lin = Linear(in_channels, heads * out_channels, bias=False, weight_initializer='uniform') self.u = Linear(in_channels, heads, bias=False, weight_initializer='uniform') self.c = Parameter(torch.Tensor(heads)) if bias: self.bias = Parameter(torch.Tensor(out_channels)) else: self.register_parameter('bias', None) self.reset_parameters() def reset_parameters(self): self.lin.reset_parameters() self.u.reset_parameters() normal(self.c, mean=0, std=0.1) normal(self.bias, mean=0, std=0.1) def forward(self, x: Union[Tensor, PairTensor], edge_index: Adj) -> Tensor: """""" if isinstance(x, Tensor): x: PairTensor = (x, x) if self.add_self_loops: if isinstance(edge_index, Tensor): edge_index, _ = remove_self_loops(edge_index) edge_index, _ = add_self_loops(edge_index, num_nodes=x[1].size(0)) elif isinstance(edge_index, SparseTensor): edge_index = set_diag(edge_index) # propagate_type: (x: PairTensor) out = self.propagate(edge_index, x=x, size=None) if self.bias is not None: out += self.bias return out def message(self, x_i: Tensor, x_j: Tensor) -> Tensor: q = self.u(x_j - x_i) + self.c # Translation invariance. q = F.softmax(q, dim=1) x_j = self.lin(x_j).view(x_j.size(0), self.heads, -1) return (x_j * q.view(-1, self.heads, 1)).sum(dim=1) def __repr__(self): return '{}({}, {}, heads={})'.format(self.__class__.__name__, self.in_channels, self.out_channels, self.heads)
class SplineConv(MessagePassing): r"""The spline-based convolutional operator from the `"SplineCNN: Fast Geometric Deep Learning with Continuous B-Spline Kernels" <https://arxiv.org/abs/1711.08920>`_ paper .. math:: \mathbf{x}^{\prime}_i = \frac{1}{|\mathcal{N}(i)|} \sum_{j \in \mathcal{N}(i)} \mathbf{x}_j \cdot h_{\mathbf{\Theta}}(\mathbf{e}_{i,j}), where :math:`h_{\mathbf{\Theta}}` denotes a kernel function defined over the weighted B-Spline tensor product basis. .. note:: Pseudo-coordinates must lay in the fixed interval :math:`[0, 1]` for this method to work as intended. Args: in_channels (int or tuple): Size of each input sample, or :obj:`-1` to derive the size from the first input(s) to the forward method. A tuple corresponds to the sizes of source and target dimensionalities. out_channels (int): Size of each output sample. dim (int): Pseudo-coordinate dimensionality. kernel_size (int or [int]): Size of the convolving kernel. is_open_spline (bool or [bool], optional): If set to :obj:`False`, the operator will use a closed B-spline basis in this dimension. (default :obj:`True`) degree (int, optional): B-spline basis degrees. (default: :obj:`1`) aggr (string, optional): The aggregation operator to use (:obj:`"add"`, :obj:`"mean"`, :obj:`"max"`). (default: :obj:`"mean"`) root_weight (bool, optional): If set to :obj:`False`, the layer will not add transformed root node features to the output. (default: :obj:`True`) bias (bool, optional): If set to :obj:`False`, the layer will not learn an additive bias. (default: :obj:`True`) **kwargs (optional): Additional arguments of :class:`torch_geometric.nn.conv.MessagePassing`. """ def __init__( self, in_channels: Union[int, Tuple[int, int]], out_channels: int, dim: int, kernel_size: Union[int, List[int]], is_open_spline: bool = True, degree: int = 1, aggr: str = 'mean', root_weight: bool = True, bias: bool = True, **kwargs, ): super().__init__(aggr=aggr, **kwargs) if spline_basis is None: raise ImportError("'SplineConv' requires 'torch-spline-conv'") self.in_channels = in_channels self.out_channels = out_channels self.dim = dim self.degree = degree self.root_weight = root_weight kernel_size = torch.tensor(repeat(kernel_size, dim), dtype=torch.long) self.register_buffer('kernel_size', kernel_size) is_open_spline = repeat(is_open_spline, dim) is_open_spline = torch.tensor(is_open_spline, dtype=torch.uint8) self.register_buffer('is_open_spline', is_open_spline) if isinstance(in_channels, int): in_channels = (in_channels, in_channels) self.K = kernel_size.prod().item() if in_channels[0] > 0: self.weight = Parameter( torch.Tensor(self.K, in_channels[0], out_channels)) else: self.weight = torch.nn.parameter.UninitializedParameter() self._hook = self.register_forward_pre_hook( self.initialize_parameters) if root_weight: self.lin = Linear(in_channels[1], out_channels, bias=False, weight_initializer='uniform') if bias: self.bias = Parameter(torch.Tensor(out_channels)) else: self.register_parameter('bias', None) self.reset_parameters() def reset_parameters(self): if not isinstance(self.weight, nn.UninitializedParameter): size = self.weight.size(0) * self.weight.size(1) uniform(size, self.weight) if self.root_weight: self.lin.reset_parameters() zeros(self.bias) def forward(self, x: Union[Tensor, OptPairTensor], edge_index: Adj, edge_attr: OptTensor = None, size: Size = None) -> Tensor: """""" if isinstance(x, Tensor): x: OptPairTensor = (x, x) if not x[0].is_cuda: warnings.warn( 'We do not recommend using the non-optimized CPU version of ' '`SplineConv`. If possible, please move your data to GPU.') # propagate_type: (x: OptPairTensor, edge_attr: OptTensor) out = self.propagate(edge_index, x=x, edge_attr=edge_attr, size=size) x_r = x[1] if x_r is not None and self.root_weight: out += self.lin(x_r) if self.bias is not None: out += self.bias return out def message(self, x_j: Tensor, edge_attr: Tensor) -> Tensor: data = spline_basis(edge_attr, self.kernel_size, self.is_open_spline, self.degree) return spline_weighting(x_j, self.weight, *data) @torch.no_grad() def initialize_parameters(self, module, input): if isinstance(self.weight, torch.nn.parameter.UninitializedParameter): x = input[0][0] if isinstance(input, tuple) else input[0] in_channels = x.size(-1) self.weight.materialize((self.K, in_channels, self.out_channels)) size = self.weight.size(0) * self.weight.size(1) uniform(size, self.weight) module._hook.remove() delattr(module, '_hook') def __repr__(self) -> str: return (f'{self.__class__.__name__}({self.in_channels}, ' f'{self.out_channels}, dim={self.dim})')
class AGAEMD(nn.Module): def __init__(self, n_in_features: int, n_hid_layers: int, hid_features: list, n_heads: list, n_rna: int, n_dis: int, add_layer_attn: bool, residual: bool, dropout: float = 0.6): super(AGAEMD, self).__init__() assert n_hid_layers == len(hid_features) == len( n_heads), f'Enter valid arch params.' self.n_rna = n_rna self.n_dis = n_dis self.n_hid_layers = n_hid_layers self.dropout = nn.Dropout(dropout) # stack graph attention layers self.conv = nn.ModuleList() tmp = [n_in_features] + hid_features for i in range(n_hid_layers): self.conv.append( GraphAttentionLayer(tmp[i], tmp[i + 1], n_heads[i], residual=residual), ) if n_in_features != hid_features[0]: self.proj = Linear(n_in_features, hid_features[0], weight_initializer='glorot', bias=True) else: self.register_parameter('proj', None) if add_layer_attn: self.JK = JumpingKnowledge('lstm', tmp[-1], n_hid_layers + 1) else: self.register_parameter('JK', None) if self.proj is not None: self.proj.reset_parameters() def forward(self, x, edge_idx): # encoder embd_tmp = x embd_list = [ self.proj(embd_tmp) if self.proj is not None else embd_tmp ] for i in range(self.n_hid_layers): embd_tmp = self.conv[i](embd_tmp, edge_idx) embd_list.append(embd_tmp) if self.JK is not None: embd_tmp = self.JK(embd_list) final_embd = self.dropout(embd_tmp) # InnerProductDecoder rna_embd = final_embd[:self.n_rna, :] dis_embd = final_embd[self.n_rna:, :] ret = torch.mm(rna_embd, dis_embd.T) return ret
class GMMConv(MessagePassing): r"""The gaussian mixture model convolutional operator from the `"Geometric Deep Learning on Graphs and Manifolds using Mixture Model CNNs" <https://arxiv.org/abs/1611.08402>`_ paper .. math:: \mathbf{x}^{\prime}_i = \frac{1}{|\mathcal{N}(i)|} \sum_{j \in \mathcal{N}(i)} \frac{1}{K} \sum_{k=1}^K \mathbf{w}_k(\mathbf{e}_{i,j}) \odot \mathbf{\Theta}_k \mathbf{x}_j, where .. math:: \mathbf{w}_k(\mathbf{e}) = \exp \left( -\frac{1}{2} {\left( \mathbf{e} - \mathbf{\mu}_k \right)}^{\top} \Sigma_k^{-1} \left( \mathbf{e} - \mathbf{\mu}_k \right) \right) denotes a weighting function based on trainable mean vector :math:`\mathbf{\mu}_k` and diagonal covariance matrix :math:`\mathbf{\Sigma}_k`. .. note:: The edge attribute :math:`\mathbf{e}_{ij}` is usually given by :math:`\mathbf{e}_{ij} = \mathbf{p}_j - \mathbf{p}_i`, where :math:`\mathbf{p}_i` denotes the position of node :math:`i` (see :class:`torch_geometric.transform.Cartesian`). Args: in_channels (int or tuple): Size of each input sample, or :obj:`-1` to derive the size from the first input(s) to the forward method. A tuple corresponds to the sizes of source and target dimensionalities. out_channels (int): Size of each output sample. dim (int): Pseudo-coordinate dimensionality. kernel_size (int): Number of kernels :math:`K`. separate_gaussians (bool, optional): If set to :obj:`True`, will learn separate GMMs for every pair of input and output channel, inspired by traditional CNNs. (default: :obj:`False`) aggr (string, optional): The aggregation operator to use (:obj:`"add"`, :obj:`"mean"`, :obj:`"max"`). (default: :obj:`"mean"`) root_weight (bool, optional): If set to :obj:`False`, the layer will not add transformed root node features to the output. (default: :obj:`True`) bias (bool, optional): If set to :obj:`False`, the layer will not learn an additive bias. (default: :obj:`True`) **kwargs (optional): Additional arguments of :class:`torch_geometric.nn.conv.MessagePassing`. """ def __init__(self, in_channels: Union[int, Tuple[int, int]], out_channels: int, dim: int, kernel_size: int, separate_gaussians: bool = False, aggr: str = 'mean', root_weight: bool = True, bias: bool = True, **kwargs): super(GMMConv, self).__init__(aggr=aggr, **kwargs) self.in_channels = in_channels self.out_channels = out_channels self.dim = dim self.kernel_size = kernel_size self.separate_gaussians = separate_gaussians self.root_weight = root_weight if isinstance(in_channels, int): in_channels = (in_channels, in_channels) self.rel_in_channels = in_channels[0] if in_channels[0] > 0: self.g = Parameter( Tensor(in_channels[0], out_channels * kernel_size)) if not self.separate_gaussians: self.mu = Parameter(Tensor(kernel_size, dim)) self.sigma = Parameter(Tensor(kernel_size, dim)) if self.separate_gaussians: self.mu = Parameter( Tensor(in_channels[0], out_channels, kernel_size, dim)) self.sigma = Parameter( Tensor(in_channels[0], out_channels, kernel_size, dim)) else: self.g = torch.nn.parameter.UninitializedParameter() self.mu = torch.nn.parameter.UninitializedParameter() self.sigma = torch.nn.parameter.UninitializedParameter() self._hook = self.register_forward_pre_hook( self.initialize_parameters) if root_weight: self.root = Linear(in_channels[1], out_channels, bias=False, weight_initializer='glorot') if bias: self.bias = Parameter(torch.Tensor(out_channels)) else: self.register_parameter('bias', None) self.reset_parameters() def reset_parameters(self): if not isinstance(self.g, torch.nn.UninitializedParameter): glorot(self.g) glorot(self.mu) glorot(self.sigma) if self.root_weight: self.root.reset_parameters() zeros(self.bias) def forward(self, x: Union[Tensor, OptPairTensor], edge_index: Adj, edge_attr: OptTensor = None, size: Size = None): """""" if isinstance(x, Tensor): x: OptPairTensor = (x, x) # propagate_type: (x: OptPairTensor, edge_attr: OptTensor) if not self.separate_gaussians: out: OptPairTensor = (torch.matmul(x[0], self.g), x[1]) out = self.propagate(edge_index, x=out, edge_attr=edge_attr, size=size) else: out = self.propagate(edge_index, x=x, edge_attr=edge_attr, size=size) x_r = x[1] if x_r is not None and self.root is not None: out += self.root(x_r) if self.bias is not None: out += self.bias return out def message(self, x_j: Tensor, edge_attr: Tensor): EPS = 1e-15 F, M = self.rel_in_channels, self.out_channels (E, D), K = edge_attr.size(), self.kernel_size if not self.separate_gaussians: gaussian = -0.5 * (edge_attr.view(E, 1, D) - self.mu.view(1, K, D)).pow(2) gaussian = gaussian / (EPS + self.sigma.view(1, K, D).pow(2)) gaussian = torch.exp(gaussian.sum(dim=-1)) # [E, K] return (x_j.view(E, K, M) * gaussian.view(E, K, 1)).sum(dim=-2) else: gaussian = -0.5 * (edge_attr.view(E, 1, 1, 1, D) - self.mu.view(1, F, M, K, D)).pow(2) gaussian = gaussian / (EPS + self.sigma.view(1, F, M, K, D).pow(2)) gaussian = torch.exp(gaussian.sum(dim=-1)) # [E, F, M, K] gaussian = gaussian * self.g.view(1, F, M, K) gaussian = gaussian.sum(dim=-1) # [E, F, M] return (x_j.view(E, F, 1) * gaussian).sum(dim=-2) # [E, M] @torch.no_grad() def initialize_parameters(self, module, input): if isinstance(self.g, torch.nn.parameter.UninitializedParameter): x = input[0][0] if isinstance(input, tuple) else input[0] in_channels = x.size(-1) out_channels, kernel_size = self.out_channels, self.kernel_size self.g.materialize((in_channels, out_channels * kernel_size)) if not self.separate_gaussians: self.mu.materialize((kernel_size, self.dim)) self.sigma.materialize((kernel_size, self.dim)) else: self.mu.materialize( (in_channels, out_channels, kernel_size, self.dim)) self.sigma.materialize( (in_channels, out_channels, kernel_size, self.dim)) glorot(self.g) glorot(self.mu) glorot(self.sigma) module._hook.remove() delattr(module, '_hook') def __repr__(self) -> str: return '{}({}, {}, dim={})'.format(self.__class__.__name__, self.in_channels, self.out_channels, self.dim)
class SuperGATConv(MessagePassing): r"""The self-supervised graph attentional operator from the `"How to Find Your Friendly Neighborhood: Graph Attention Design with Self-Supervision" <https://openreview.net/forum?id=Wi5KUNlqWty>`_ paper .. math:: \mathbf{x}^{\prime}_i = \alpha_{i,i}\mathbf{\Theta}\mathbf{x}_{i} + \sum_{j \in \mathcal{N}(i)} \alpha_{i,j}\mathbf{\Theta}\mathbf{x}_{j}, where the two types of attention :math:`\alpha_{i,j}^{\mathrm{MX\ or\ SD}}` are computed as: .. math:: \alpha_{i,j}^{\mathrm{MX\ or\ SD}} &= \frac{ \exp\left(\mathrm{LeakyReLU}\left( e_{i,j}^{\mathrm{MX\ or\ SD}} \right)\right)} {\sum_{k \in \mathcal{N}(i) \cup \{ i \}} \exp\left(\mathrm{LeakyReLU}\left( e_{i,k}^{\mathrm{MX\ or\ SD}} \right)\right)} e_{i,j}^{\mathrm{MX}} &= \mathbf{a}^{\top} [\mathbf{\Theta}\mathbf{x}_i \, \Vert \, \mathbf{\Theta}\mathbf{x}_j] \cdot \sigma \left( \left( \mathbf{\Theta}\mathbf{x}_i \right)^{\top} \mathbf{\Theta}\mathbf{x}_j \right) e_{i,j}^{\mathrm{SD}} &= \frac{ \left( \mathbf{\Theta}\mathbf{x}_i \right)^{\top} \mathbf{\Theta}\mathbf{x}_j }{ \sqrt{d} } The self-supervised task is a link prediction using the attention values as input to predict the likelihood :math:`\phi_{i,j}^{\mathrm{MX\ or\ SD}}` that an edge exists between nodes: .. math:: \phi_{i,j}^{\mathrm{MX}} &= \sigma \left( \left( \mathbf{\Theta}\mathbf{x}_i \right)^{\top} \mathbf{\Theta}\mathbf{x}_j \right) \phi_{i,j}^{\mathrm{SD}} &= \sigma \left( \frac{ \left( \mathbf{\Theta}\mathbf{x}_i \right)^{\top} \mathbf{\Theta}\mathbf{x}_j }{ \sqrt{d} } \right) .. note:: For an example of using SuperGAT, see `examples/super_gat.py <https://github.com/pyg-team/pytorch_geometric/blob/master/examples/ super_gat.py>`_. Args: in_channels (int): Size of each input sample, or :obj:`-1` to derive the size from the first input(s) to the forward method. out_channels (int): Size of each output sample. heads (int, optional): Number of multi-head-attentions. (default: :obj:`1`) concat (bool, optional): If set to :obj:`False`, the multi-head attentions are averaged instead of concatenated. (default: :obj:`True`) negative_slope (float, optional): LeakyReLU angle of the negative slope. (default: :obj:`0.2`) dropout (float, optional): Dropout probability of the normalized attention coefficients which exposes each node to a stochastically sampled neighborhood during training. (default: :obj:`0`) add_self_loops (bool, optional): If set to :obj:`False`, will not add self-loops to the input graph. (default: :obj:`True`) bias (bool, optional): If set to :obj:`False`, the layer will not learn an additive bias. (default: :obj:`True`) attention_type (string, optional): Type of attention to use. (:obj:`'MX'`, :obj:`'SD'`). (default: :obj:`'MX'`) neg_sample_ratio (float, optional): The ratio of the number of sampled negative edges to the number of positive edges. (default: :obj:`0.5`) edge_sample_ratio (float, optional): The ratio of samples to use for training among the number of training edges. (default: :obj:`1.0`) is_undirected (bool, optional): Whether the input graph is undirected. If not given, will be automatically computed with the input graph when negative sampling is performed. (default: :obj:`False`) **kwargs (optional): Additional arguments of :class:`torch_geometric.nn.conv.MessagePassing`. Shapes: - **input:** node features :math:`(|\mathcal{V}|, F_{in})`, edge indices :math:`(2, |\mathcal{E}|)`, negative edge indices :math:`(2, |\mathcal{E}^{(-)}|)` *(optional)* - **output:** node features :math:`(|\mathcal{V}|, H * F_{out})` """ att_x: OptTensor att_y: OptTensor def __init__(self, in_channels: int, out_channels: int, heads: int = 1, concat: bool = True, negative_slope: float = 0.2, dropout: float = 0.0, add_self_loops: bool = True, bias: bool = True, attention_type: str = 'MX', neg_sample_ratio: float = 0.5, edge_sample_ratio: float = 1.0, is_undirected: bool = False, **kwargs): kwargs.setdefault('aggr', 'add') super().__init__(node_dim=0, **kwargs) self.in_channels = in_channels self.out_channels = out_channels self.heads = heads self.concat = concat self.negative_slope = negative_slope self.dropout = dropout self.add_self_loops = add_self_loops self.attention_type = attention_type self.neg_sample_ratio = neg_sample_ratio self.edge_sample_ratio = edge_sample_ratio self.is_undirected = is_undirected assert attention_type in ['MX', 'SD'] assert 0.0 < neg_sample_ratio and 0.0 < edge_sample_ratio <= 1.0 self.lin = Linear(in_channels, heads * out_channels, bias=False, weight_initializer='glorot') if self.attention_type == 'MX': self.att_l = Parameter(torch.Tensor(1, heads, out_channels)) self.att_r = Parameter(torch.Tensor(1, heads, out_channels)) else: # self.attention_type == 'SD' self.register_parameter('att_l', None) self.register_parameter('att_r', None) self.att_x = self.att_y = None # x/y for self-supervision if bias and concat: self.bias = Parameter(torch.Tensor(heads * out_channels)) elif bias and not concat: self.bias = Parameter(torch.Tensor(out_channels)) else: self.register_parameter('bias', None) self.reset_parameters() def reset_parameters(self): self.lin.reset_parameters() glorot(self.att_l) glorot(self.att_r) zeros(self.bias) def forward(self, x: Tensor, edge_index: Tensor, neg_edge_index: OptTensor = None, batch: OptTensor = None) -> Tensor: r""" Args: neg_edge_index (Tensor, optional): The negative edges to train against. If not given, uses negative sampling to calculate negative edges. (default: :obj:`None`) """ N, H, C = x.size(0), self.heads, self.out_channels if self.add_self_loops: edge_index, _ = remove_self_loops(edge_index) edge_index, _ = add_self_loops(edge_index, num_nodes=N) x = self.lin(x).view(-1, H, C) # propagate_type: (x: Tensor) out = self.propagate(edge_index, x=x, size=None) if self.training: pos_edge_index = self.positive_sampling(edge_index) pos_att = self.get_attention( edge_index_i=pos_edge_index[1], x_i=x[pos_edge_index[1]], x_j=x[pos_edge_index[0]], num_nodes=x.size(0), return_logits=True, ) if neg_edge_index is None: neg_edge_index = self.negative_sampling(edge_index, N, batch) neg_att = self.get_attention( edge_index_i=neg_edge_index[1], x_i=x[neg_edge_index[1]], x_j=x[neg_edge_index[0]], num_nodes=x.size(0), return_logits=True, ) self.att_x = torch.cat([pos_att, neg_att], dim=0) self.att_y = self.att_x.new_zeros(self.att_x.size(0)) self.att_y[:pos_edge_index.size(1)] = 1. if self.concat is True: out = out.view(-1, self.heads * self.out_channels) else: out = out.mean(dim=1) if self.bias is not None: out += self.bias return out def message(self, edge_index_i: Tensor, x_i: Tensor, x_j: Tensor, size_i: Optional[int]) -> Tensor: alpha = self.get_attention(edge_index_i, x_i, x_j, num_nodes=size_i) alpha = F.dropout(alpha, p=self.dropout, training=self.training) return x_j * alpha.view(-1, self.heads, 1) def negative_sampling(self, edge_index: Tensor, num_nodes: int, batch: OptTensor = None) -> Tensor: num_neg_samples = int(self.neg_sample_ratio * self.edge_sample_ratio * edge_index.size(1)) if not self.is_undirected and not is_undirected(edge_index, num_nodes=num_nodes): edge_index = to_undirected(edge_index, num_nodes=num_nodes) if batch is None: neg_edge_index = negative_sampling(edge_index, num_nodes, num_neg_samples=num_neg_samples) else: neg_edge_index = batched_negative_sampling( edge_index, batch, num_neg_samples=num_neg_samples) return neg_edge_index def positive_sampling(self, edge_index: Tensor) -> Tensor: pos_edge_index, _ = dropout_adj(edge_index, p=1. - self.edge_sample_ratio, training=self.training) return pos_edge_index def get_attention(self, edge_index_i: Tensor, x_i: Tensor, x_j: Tensor, num_nodes: Optional[int], return_logits: bool = False) -> Tensor: if self.attention_type == 'MX': logits = (x_i * x_j).sum(dim=-1) if return_logits: return logits alpha = (x_j * self.att_l).sum(-1) + (x_i * self.att_r).sum(-1) alpha = alpha * logits.sigmoid() else: # self.attention_type == 'SD' alpha = (x_i * x_j).sum(dim=-1) / math.sqrt(self.out_channels) if return_logits: return alpha alpha = F.leaky_relu(alpha, self.negative_slope) alpha = softmax(alpha, edge_index_i, num_nodes=num_nodes) return alpha def get_attention_loss(self) -> Tensor: r"""Compute the self-supervised graph attention loss.""" if not self.training: return torch.tensor([0], device=self.lin.weight.device) return F.binary_cross_entropy_with_logits( self.att_x.mean(dim=-1), self.att_y, ) def __repr__(self) -> str: return (f'{self.__class__.__name__}({self.in_channels}, ' f'{self.out_channels}, heads={self.heads}, ' f'type={self.attention_type})')
class HypergraphConv(MessagePassing): r"""The hypergraph convolutional operator from the `"Hypergraph Convolution and Hypergraph Attention" <https://arxiv.org/abs/1901.08150>`_ paper .. math:: \mathbf{X}^{\prime} = \mathbf{D}^{-1} \mathbf{H} \mathbf{W} \mathbf{B}^{-1} \mathbf{H}^{\top} \mathbf{X} \mathbf{\Theta} where :math:`\mathbf{H} \in {\{ 0, 1 \}}^{N \times M}` is the incidence matrix, :math:`\mathbf{W} \in \mathbb{R}^M` is the diagonal hyperedge weight matrix, and :math:`\mathbf{D}` and :math:`\mathbf{B}` are the corresponding degree matrices. For example, in the hypergraph scenario :math:`\mathcal{G} = (\mathcal{V}, \mathcal{E})` with :math:`\mathcal{V} = \{ 0, 1, 2, 3 \}` and :math:`\mathcal{E} = \{ \{ 0, 1, 2 \}, \{ 1, 2, 3 \} \}`, the :obj:`hyperedge_index` is represented as: .. code-block:: python hyperedge_index = torch.tensor([ [0, 1, 2, 1, 2, 3], [0, 0, 0, 1, 1, 1], ]) Args: in_channels (int): Size of each input sample, or :obj:`-1` to derive the size from the first input(s) to the forward method. out_channels (int): Size of each output sample. use_attention (bool, optional): If set to :obj:`True`, attention will be added to this layer. (default: :obj:`False`) heads (int, optional): Number of multi-head-attentions. (default: :obj:`1`) concat (bool, optional): If set to :obj:`False`, the multi-head attentions are averaged instead of concatenated. (default: :obj:`True`) negative_slope (float, optional): LeakyReLU angle of the negative slope. (default: :obj:`0.2`) dropout (float, optional): Dropout probability of the normalized attention coefficients which exposes each node to a stochastically sampled neighborhood during training. (default: :obj:`0`) bias (bool, optional): If set to :obj:`False`, the layer will not learn an additive bias. (default: :obj:`True`) **kwargs (optional): Additional arguments of :class:`torch_geometric.nn.conv.MessagePassing`. Shapes: - **input:** node features :math:`(|\mathcal{V}|, F_{in})`, hyperedge indices :math:`(|\mathcal{V}|, |\mathcal{E}|)`, hyperedge weights :math:`(|\mathcal{E}|)` *(optional)* hyperedge features :math:`(|\mathcal{E}|, D)` *(optional)* - **output:** node features :math:`(|\mathcal{V}|, F_{out})` """ def __init__(self, in_channels, out_channels, use_attention=False, heads=1, concat=True, negative_slope=0.2, dropout=0, bias=True, **kwargs): kwargs.setdefault('aggr', 'add') super().__init__(flow='source_to_target', node_dim=0, **kwargs) self.in_channels = in_channels self.out_channels = out_channels self.use_attention = use_attention if self.use_attention: self.heads = heads self.concat = concat self.negative_slope = negative_slope self.dropout = dropout self.lin = Linear(in_channels, heads * out_channels, bias=False, weight_initializer='glorot') self.att = Parameter(torch.Tensor(1, heads, 2 * out_channels)) else: self.heads = 1 self.concat = True self.lin = Linear(in_channels, out_channels, bias=False, weight_initializer='glorot') if bias and concat: self.bias = Parameter(torch.Tensor(heads * out_channels)) elif bias and not concat: self.bias = Parameter(torch.Tensor(out_channels)) else: self.register_parameter('bias', None) self.reset_parameters() def reset_parameters(self): self.lin.reset_parameters() if self.use_attention: glorot(self.att) zeros(self.bias) def forward(self, x: Tensor, hyperedge_index: Tensor, hyperedge_weight: Optional[Tensor] = None, hyperedge_attr: Optional[Tensor] = None) -> Tensor: r""" Args: x (Tensor): Node feature matrix :math:`\mathbf{X} \in \mathbb{R}^{N \times F}`. hyperedge_index (LongTensor): The hyperedge indices, *i.e.* the sparse incidence matrix :math:`\mathbf{H} \in {\{ 0, 1 \}}^{N \times M}` mapping from nodes to edges. hyperedge_weight (Tensor, optional): Hyperedge weights :math:`\mathbf{W} \in \mathbb{R}^M`. (default: :obj:`None`) hyperedge_attr (Tensor, optional): Hyperedge feature matrix in :math:`\mathbb{R}^{M \times F}`. These features only need to get passed in case :obj:`use_attention=True`. (default: :obj:`None`) """ num_nodes, num_edges = x.size(0), 0 if hyperedge_index.numel() > 0: num_edges = int(hyperedge_index[1].max()) + 1 if hyperedge_weight is None: hyperedge_weight = x.new_ones(num_edges) x = self.lin(x) alpha = None if self.use_attention: assert hyperedge_attr is not None x = x.view(-1, self.heads, self.out_channels) hyperedge_attr = self.lin(hyperedge_attr) hyperedge_attr = hyperedge_attr.view(-1, self.heads, self.out_channels) x_i = x[hyperedge_index[0]] x_j = hyperedge_attr[hyperedge_index[1]] alpha = (torch.cat([x_i, x_j], dim=-1) * self.att).sum(dim=-1) alpha = F.leaky_relu(alpha, self.negative_slope) alpha = softmax(alpha, hyperedge_index[0], num_nodes=x.size(0)) alpha = F.dropout(alpha, p=self.dropout, training=self.training) D = scatter_add(hyperedge_weight[hyperedge_index[1]], hyperedge_index[0], dim=0, dim_size=num_nodes) D = 1.0 / D D[D == float("inf")] = 0 B = scatter_add(x.new_ones(hyperedge_index.size(1)), hyperedge_index[1], dim=0, dim_size=num_edges) B = 1.0 / B B[B == float("inf")] = 0 out = self.propagate(hyperedge_index, x=x, norm=B, alpha=alpha, size=(num_nodes, num_edges)) out = self.propagate(hyperedge_index.flip([0]), x=out, norm=D, alpha=alpha, size=(num_edges, num_nodes)) if self.concat is True: out = out.view(-1, self.heads * self.out_channels) else: out = out.mean(dim=1) if self.bias is not None: out = out + self.bias return out def message(self, x_j: Tensor, norm_i: Tensor, alpha: Tensor) -> Tensor: H, F = self.heads, self.out_channels out = norm_i.view(-1, 1, 1) * x_j.view(-1, H, F) if alpha is not None: out = alpha.view(-1, self.heads, 1) * out return out
class SignedConv(MessagePassing): r"""The signed graph convolutional operator from the `"Signed Graph Convolutional Network" <https://arxiv.org/abs/1808.06354>`_ paper .. math:: \mathbf{x}_v^{(\textrm{pos})} &= \mathbf{\Theta}^{(\textrm{pos})} \left[ \frac{1}{|\mathcal{N}^{+}(v)|} \sum_{w \in \mathcal{N}^{+}(v)} \mathbf{x}_w , \mathbf{x}_v \right] \mathbf{x}_v^{(\textrm{neg})} &= \mathbf{\Theta}^{(\textrm{neg})} \left[ \frac{1}{|\mathcal{N}^{-}(v)|} \sum_{w \in \mathcal{N}^{-}(v)} \mathbf{x}_w , \mathbf{x}_v \right] if :obj:`first_aggr` is set to :obj:`True`, and .. math:: \mathbf{x}_v^{(\textrm{pos})} &= \mathbf{\Theta}^{(\textrm{pos})} \left[ \frac{1}{|\mathcal{N}^{+}(v)|} \sum_{w \in \mathcal{N}^{+}(v)} \mathbf{x}_w^{(\textrm{pos})}, \frac{1}{|\mathcal{N}^{-}(v)|} \sum_{w \in \mathcal{N}^{-}(v)} \mathbf{x}_w^{(\textrm{neg})}, \mathbf{x}_v^{(\textrm{pos})} \right] \mathbf{x}_v^{(\textrm{neg})} &= \mathbf{\Theta}^{(\textrm{pos})} \left[ \frac{1}{|\mathcal{N}^{+}(v)|} \sum_{w \in \mathcal{N}^{+}(v)} \mathbf{x}_w^{(\textrm{neg})}, \frac{1}{|\mathcal{N}^{-}(v)|} \sum_{w \in \mathcal{N}^{-}(v)} \mathbf{x}_w^{(\textrm{pos})}, \mathbf{x}_v^{(\textrm{neg})} \right] otherwise. In case :obj:`first_aggr` is :obj:`False`, the layer expects :obj:`x` to be a tensor where :obj:`x[:, :in_channels]` denotes the positive node features :math:`\mathbf{X}^{(\textrm{pos})}` and :obj:`x[:, in_channels:]` denotes the negative node features :math:`\mathbf{X}^{(\textrm{neg})}`. Args: in_channels (int or tuple): Size of each input sample, or :obj:`-1` to derive the size from the first input(s) to the forward method. A tuple corresponds to the sizes of source and target dimensionalities. out_channels (int): Size of each output sample. first_aggr (bool): Denotes which aggregation formula to use. bias (bool, optional): If set to :obj:`False`, the layer will not learn an additive bias. (default: :obj:`True`) **kwargs (optional): Additional arguments of :class:`torch_geometric.nn.conv.MessagePassing`. """ def __init__(self, in_channels: int, out_channels: int, first_aggr: bool, bias: bool = True, **kwargs): kwargs.setdefault('aggr', 'mean') super(SignedConv, self).__init__(**kwargs) self.in_channels = in_channels self.out_channels = out_channels self.first_aggr = first_aggr if first_aggr: self.lin_pos_l = Linear(in_channels, out_channels, False) self.lin_pos_r = Linear(in_channels, out_channels, bias) self.lin_neg_l = Linear(in_channels, out_channels, False) self.lin_neg_r = Linear(in_channels, out_channels, bias) else: self.lin_pos_l = Linear(2 * in_channels, out_channels, False) self.lin_pos_r = Linear(in_channels, out_channels, bias) self.lin_neg_l = Linear(2 * in_channels, out_channels, False) self.lin_neg_r = Linear(in_channels, out_channels, bias) self.reset_parameters() def reset_parameters(self): self.lin_pos_l.reset_parameters() self.lin_pos_r.reset_parameters() self.lin_neg_l.reset_parameters() self.lin_neg_r.reset_parameters() def forward(self, x: Union[Tensor, PairTensor], pos_edge_index: Adj, neg_edge_index: Adj): """""" if isinstance(x, Tensor): x: PairTensor = (x, x) # propagate_type: (x: PairTensor) if self.first_aggr: out_pos = self.propagate(pos_edge_index, x=x, size=None) out_pos = self.lin_pos_l(out_pos) out_pos += self.lin_pos_r(x[1]) out_neg = self.propagate(neg_edge_index, x=x, size=None) out_neg = self.lin_neg_l(out_neg) out_neg += self.lin_neg_r(x[1]) return torch.cat([out_pos, out_neg], dim=-1) else: F_in = self.in_channels out_pos1 = self.propagate(pos_edge_index, size=None, x=(x[0][..., :F_in], x[1][..., :F_in])) out_pos2 = self.propagate(neg_edge_index, size=None, x=(x[0][..., F_in:], x[1][..., F_in:])) out_pos = torch.cat([out_pos1, out_pos2], dim=-1) out_pos = self.lin_pos_l(out_pos) out_pos += self.lin_pos_r(x[1][..., :F_in]) out_neg1 = self.propagate(pos_edge_index, size=None, x=(x[0][..., F_in:], x[1][..., F_in:])) out_neg2 = self.propagate(neg_edge_index, size=None, x=(x[0][..., :F_in], x[1][..., :F_in])) out_neg = torch.cat([out_neg1, out_neg2], dim=-1) out_neg = self.lin_neg_l(out_neg) out_neg += self.lin_neg_r(x[1][..., F_in:]) return torch.cat([out_pos, out_neg], dim=-1) def message(self, x_j: Tensor) -> Tensor: return x_j def message_and_aggregate(self, adj_t: SparseTensor, x: PairTensor) -> Tensor: adj_t = adj_t.set_value(None, layout=None) return matmul(adj_t, x[0], reduce=self.aggr) def __repr__(self): return '{}({}, {}, first_aggr={})'.format(self.__class__.__name__, self.in_channels, self.out_channels, self.first_aggr)
class ClusterGCNConv(MessagePassing): r"""The ClusterGCN graph convolutional operator from the `"Cluster-GCN: An Efficient Algorithm for Training Deep and Large Graph Convolutional Networks" <https://arxiv.org/abs/1905.07953>`_ paper .. math:: \mathbf{X}^{\prime} = \left( \mathbf{\hat{A}} + \lambda \cdot \textrm{diag}(\mathbf{\hat{A}}) \right) \mathbf{X} \mathbf{W}_1 + \mathbf{X} \mathbf{W}_2 where :math:`\mathbf{\hat{A}} = {(\mathbf{D} + \mathbf{I})}^{-1}(\mathbf{A} + \mathbf{I})`. Args: in_channels (int): Size of each input sample, or :obj:`-1` to derive the size from the first input(s) to the forward method. out_channels (int): Size of each output sample. diag_lambda (float, optional): Diagonal enhancement value :math:`\lambda`. (default: :obj:`0.`) add_self_loops (bool, optional): If set to :obj:`False`, will not add self-loops to the input graph. (default: :obj:`True`) bias (bool, optional): If set to :obj:`False`, the layer will not learn an additive bias. (default: :obj:`True`) **kwargs (optional): Additional arguments of :class:`torch_geometric.nn.conv.MessagePassing`. Shapes: - **input:** node features :math:`(|\mathcal{V}|, F_{in})`, edge indices :math:`(2, |\mathcal{E}|)` - **output:** node features :math:`(|\mathcal{V}|, F_{out})` """ def __init__(self, in_channels: int, out_channels: int, diag_lambda: float = 0., add_self_loops: bool = True, bias: bool = True, **kwargs): kwargs.setdefault('aggr', 'add') super().__init__(**kwargs) self.in_channels = in_channels self.out_channels = out_channels self.diag_lambda = diag_lambda self.add_self_loops = add_self_loops self.lin_out = Linear(in_channels, out_channels, bias=bias, weight_initializer='glorot') self.lin_root = Linear(in_channels, out_channels, bias=False, weight_initializer='glorot') self.reset_parameters() def reset_parameters(self): self.lin_out.reset_parameters() self.lin_root.reset_parameters() def forward(self, x: Tensor, edge_index: Adj) -> Tensor: """""" edge_weight: OptTensor = None if isinstance(edge_index, Tensor): num_nodes = x.size(self.node_dim) if self.add_self_loops: edge_index, _ = remove_self_loops(edge_index) edge_index, _ = add_self_loops(edge_index, num_nodes=num_nodes) row, col = edge_index[0], edge_index[1] deg_inv = 1. / degree(col, num_nodes=num_nodes).clamp_(1.) edge_weight = deg_inv[col] edge_weight[row == col] += self.diag_lambda * deg_inv elif isinstance(edge_index, SparseTensor): if self.add_self_loops: edge_index = set_diag(edge_index) col, row, _ = edge_index.coo() # Transposed. deg_inv = 1. / sparsesum(edge_index, dim=1).clamp_(1.) edge_weight = deg_inv[col] edge_weight[row == col] += self.diag_lambda * deg_inv edge_index = edge_index.set_value(edge_weight, layout='coo') # propagate_type: (x: Tensor, edge_weight: OptTensor) out = self.propagate(edge_index, x=x, edge_weight=edge_weight, size=None) out = self.lin_out(out) + self.lin_root(x) return out def message(self, x_j: Tensor, edge_weight: Tensor) -> Tensor: return edge_weight.view(-1, 1) * x_j def message_and_aggregate(self, adj_t: SparseTensor, x: Tensor) -> Tensor: return matmul(adj_t, x, reduce=self.aggr) def __repr__(self) -> str: return (f'{self.__class__.__name__}({self.in_channels}, ' f'{self.out_channels}, diag_lambda={self.diag_lambda})')
class GATv2Conv(MessagePassing): r"""The GATv2 operator from the `"How Attentive are Graph Attention Networks?" <https://arxiv.org/abs/2105.14491>`_ paper, which fixes the static attention problem of the standard :class:`~torch_geometric.conv.GATConv` layer: since the linear layers in the standard GAT are applied right after each other, the ranking of attended nodes is unconditioned on the query node. In contrast, in GATv2, every node can attend to any other node. .. math:: \mathbf{x}^{\prime}_i = \alpha_{i,i}\mathbf{\Theta}\mathbf{x}_{i} + \sum_{j \in \mathcal{N}(i)} \alpha_{i,j}\mathbf{\Theta}\mathbf{x}_{j}, where the attention coefficients :math:`\alpha_{i,j}` are computed as .. math:: \alpha_{i,j} = \frac{ \exp\left(\mathbf{a}^{\top}\mathrm{LeakyReLU}\left(\mathbf{\Theta} [\mathbf{x}_i \, \Vert \, \mathbf{x}_j] \right)\right)} {\sum_{k \in \mathcal{N}(i) \cup \{ i \}} \exp\left(\mathbf{a}^{\top}\mathrm{LeakyReLU}\left(\mathbf{\Theta} [\mathbf{x}_i \, \Vert \, \mathbf{x}_k] \right)\right)}. If the graph has multi-dimensional edge features :math:`\mathbf{e}_{i,j}`, the attention coefficients :math:`\alpha_{i,j}` are computed as .. math:: \alpha_{i,j} = \frac{ \exp\left(\mathbf{a}^{\top}\mathrm{LeakyReLU}\left(\mathbf{\Theta} [\mathbf{x}_i \, \Vert \, \mathbf{x}_j \, \Vert \, \mathbf{e}_{i,j}] \right)\right)} {\sum_{k \in \mathcal{N}(i) \cup \{ i \}} \exp\left(\mathbf{a}^{\top}\mathrm{LeakyReLU}\left(\mathbf{\Theta} [\mathbf{x}_i \, \Vert \, \mathbf{x}_k \, \Vert \, \mathbf{e}_{i,k}] \right)\right)}. Args: in_channels (int or tuple): Size of each input sample, or :obj:`-1` to derive the size from the first input(s) to the forward method. A tuple corresponds to the sizes of source and target dimensionalities. out_channels (int): Size of each output sample. heads (int, optional): Number of multi-head-attentions. (default: :obj:`1`) concat (bool, optional): If set to :obj:`False`, the multi-head attentions are averaged instead of concatenated. (default: :obj:`True`) negative_slope (float, optional): LeakyReLU angle of the negative slope. (default: :obj:`0.2`) dropout (float, optional): Dropout probability of the normalized attention coefficients which exposes each node to a stochastically sampled neighborhood during training. (default: :obj:`0`) add_self_loops (bool, optional): If set to :obj:`False`, will not add self-loops to the input graph. (default: :obj:`True`) edge_dim (int, optional): Edge feature dimensionality (in case there are any). (default: :obj:`None`) fill_value (float or Tensor or str, optional): The way to generate edge features of self-loops (in case :obj:`edge_dim != None`). If given as :obj:`float` or :class:`torch.Tensor`, edge features of self-loops will be directly given by :obj:`fill_value`. If given as :obj:`str`, edge features of self-loops are computed by aggregating all features of edges that point to the specific node, according to a reduce operation. (:obj:`"add"`, :obj:`"mean"`, :obj:`"min"`, :obj:`"max"`, :obj:`"mul"`). (default: :obj:`"mean"`) bias (bool, optional): If set to :obj:`False`, the layer will not learn an additive bias. (default: :obj:`True`) share_weights (bool, optional): If set to :obj:`True`, the same matrix will be applied to the source and the target node of every edge. (default: :obj:`False`) **kwargs (optional): Additional arguments of :class:`torch_geometric.nn.conv.MessagePassing`. """ _alpha: OptTensor def __init__( self, in_channels: Union[int, Tuple[int, int]], out_channels: int, heads: int = 1, concat: bool = True, negative_slope: float = 0.2, dropout: float = 0.0, add_self_loops: bool = True, edge_dim: Optional[int] = None, fill_value: Union[float, Tensor, str] = 'mean', bias: bool = True, share_weights: bool = False, **kwargs, ): super().__init__(node_dim=0, **kwargs) self.in_channels = in_channels self.out_channels = out_channels self.heads = heads self.concat = concat self.negative_slope = negative_slope self.dropout = dropout self.add_self_loops = add_self_loops self.edge_dim = edge_dim self.fill_value = fill_value self.share_weights = share_weights if isinstance(in_channels, int): self.lin_l = Linear(in_channels, heads * out_channels, bias=bias, weight_initializer='glorot') if share_weights: self.lin_r = self.lin_l else: self.lin_r = Linear(in_channels, heads * out_channels, bias=bias, weight_initializer='glorot') else: self.lin_l = Linear(in_channels[0], heads * out_channels, bias=bias, weight_initializer='glorot') if share_weights: self.lin_r = self.lin_l else: self.lin_r = Linear(in_channels[1], heads * out_channels, bias=bias, weight_initializer='glorot') self.att = Parameter(torch.Tensor(1, heads, out_channels)) if edge_dim is not None: self.lin_edge = Linear(edge_dim, heads * out_channels, bias=False, weight_initializer='glorot') else: self.lin_edge = None if bias and concat: self.bias = Parameter(torch.Tensor(heads * out_channels)) elif bias and not concat: self.bias = Parameter(torch.Tensor(out_channels)) else: self.register_parameter('bias', None) self._alpha = None self.reset_parameters() def reset_parameters(self): self.lin_l.reset_parameters() self.lin_r.reset_parameters() if self.lin_edge is not None: self.lin_edge.reset_parameters() glorot(self.att) zeros(self.bias) def forward(self, x: Union[Tensor, PairTensor], edge_index: Adj, edge_attr: OptTensor = None, size: Size = None, return_attention_weights: bool = None): # type: (Union[Tensor, PairTensor], Tensor, OptTensor, Size, NoneType) -> Tensor # noqa # type: (Union[Tensor, PairTensor], SparseTensor, OptTensor, Size, NoneType) -> Tensor # noqa # type: (Union[Tensor, PairTensor], Tensor, OptTensor, Size, bool) -> Tuple[Tensor, Tuple[Tensor, Tensor]] # noqa # type: (Union[Tensor, PairTensor], SparseTensor, OptTensor, Size, bool) -> Tuple[Tensor, SparseTensor] # noqa r""" Args: return_attention_weights (bool, optional): If set to :obj:`True`, will additionally return the tuple :obj:`(edge_index, attention_weights)`, holding the computed attention weights for each edge. (default: :obj:`None`) """ H, C = self.heads, self.out_channels x_l: OptTensor = None x_r: OptTensor = None if isinstance(x, Tensor): assert x.dim() == 2 x_l = self.lin_l(x).view(-1, H, C) if self.share_weights: x_r = x_l else: x_r = self.lin_r(x).view(-1, H, C) else: x_l, x_r = x[0], x[1] assert x[0].dim() == 2 x_l = self.lin_l(x_l).view(-1, H, C) if x_r is not None: x_r = self.lin_r(x_r).view(-1, H, C) assert x_l is not None assert x_r is not None if self.add_self_loops: if isinstance(edge_index, Tensor): num_nodes = x_l.size(0) if x_r is not None: num_nodes = min(num_nodes, x_r.size(0)) if size is not None: num_nodes = min(size[0], size[1]) edge_index, edge_attr = remove_self_loops( edge_index, edge_attr) edge_index, edge_attr = add_self_loops( edge_index, edge_attr, fill_value=self.fill_value, num_nodes=num_nodes) elif isinstance(edge_index, SparseTensor): if self.edge_dim is None: edge_index = set_diag(edge_index) else: raise NotImplementedError( "The usage of 'edge_attr' and 'add_self_loops' " "simultaneously is currently not yet supported for " "'edge_index' in a 'SparseTensor' form") # propagate_type: (x: PairTensor, edge_attr: OptTensor) out = self.propagate(edge_index, x=(x_l, x_r), edge_attr=edge_attr, size=size) alpha = self._alpha self._alpha = None if self.concat: out = out.view(-1, self.heads * self.out_channels) else: out = out.mean(dim=1) if self.bias is not None: out += self.bias if isinstance(return_attention_weights, bool): assert alpha is not None if isinstance(edge_index, Tensor): return out, (edge_index, alpha) elif isinstance(edge_index, SparseTensor): return out, edge_index.set_value(alpha, layout='coo') else: return out def message(self, x_j: Tensor, x_i: Tensor, edge_attr: OptTensor, index: Tensor, ptr: OptTensor, size_i: Optional[int]) -> Tensor: x = x_i + x_j if edge_attr is not None: if edge_attr.dim() == 1: edge_attr = edge_attr.view(-1, 1) assert self.lin_edge is not None edge_attr = self.lin_edge(edge_attr) edge_attr = edge_attr.view(-1, self.heads, self.out_channels) x += edge_attr x = F.leaky_relu(x, self.negative_slope) alpha = (x * self.att).sum(dim=-1) alpha = softmax(alpha, index, ptr, size_i) self._alpha = alpha alpha = F.dropout(alpha, p=self.dropout, training=self.training) return x_j * alpha.unsqueeze(-1) def __repr__(self) -> str: return (f'{self.__class__.__name__}({self.in_channels}, ' f'{self.out_channels}, heads={self.heads})')
class GeneralConv(MessagePassing): r"""A general GNN layer adapted from the `"Design Space for Graph Neural Networks" <https://arxiv.org/abs/2011.08843>`_ paper. Args: in_channels (int or tuple): Size of each input sample, or :obj:`-1` to derive the size from the first input(s) to the forward method. A tuple corresponds to the sizes of source and target dimensionalities. out_channels (int): Size of each output sample. in_edge_channels (int, optional): Size of each input edge. (default: :obj:`None`) aggr (string, optional): The aggregation scheme to use (:obj:`"add"`, :obj:`"mean"`, :obj:`"max"`). (default: :obj:`"mean"`) skip_linear (bool, optional): Whether apply linear function in skip connection. (default: :obj:`False`) directed_msg (bool, optional): If message passing is directed; otherwise, message passing is bi-directed. (default: :obj:`True`) heads (int, optional): Number of message passing ensembles. If :obj:`heads > 1`, the GNN layer will output an ensemble of multiple messages. If attention is used (:obj:`attention=True`), this corresponds to multi-head attention. (default: :obj:`1`) attention (bool, optional): Whether to add attention to message computation. (default: :obj:`False`) attention_type (str, optional): Type of attention: :obj:`"additive"`, :obj:`"dot_product"`. (default: :obj:`"additive"`) l2_normalize (bool, optional): If set to :obj:`True`, output features will be :math:`\ell_2`-normalized, *i.e.*, :math:`\frac{\mathbf{x}^{\prime}_i} {\| \mathbf{x}^{\prime}_i \|_2}`. (default: :obj:`False`) bias (bool, optional): If set to :obj:`False`, the layer will not learn an additive bias. (default: :obj:`True`) **kwargs (optional): Additional arguments of :class:`torch_geometric.nn.conv.MessagePassing`. """ def __init__(self, in_channels: Union[int, Tuple[int, int]], out_channels: Optional[int], in_edge_channels: int = None, aggr: str = 'add', skip_linear: str = False, directed_msg: bool = True, heads: int = 1, attention: bool = False, attention_type: str = 'additive', l2_normalize: bool = False, bias: bool = True, **kwargs): # yapf: disable kwargs.setdefault('aggr', aggr) super(GeneralConv, self).__init__(node_dim=0, **kwargs) # todo: a better way to connecting different layers together # in a GNN layer without implementing a new GNN layer # https://github.com/vrtex-team/pytorch_geometric/pull/30#discussion_r649692299 self.in_channels = in_channels self.out_channels = out_channels self.in_edge_channels = in_edge_channels self.aggr = aggr self.skip_linear = skip_linear self.directed_msg = directed_msg self.heads = heads self.attention = attention self.attention_type = attention_type self.normalize_l2 = l2_normalize if isinstance(in_channels, int): in_channels = (in_channels, in_channels) if self.directed_msg: self.lin_msg = Linear(in_channels[0], out_channels * self.heads, bias=bias) else: self.lin_msg = Linear(in_channels[0], out_channels * self.heads, bias=bias) self.lin_msg_i = Linear(in_channels[0], out_channels * self.heads, bias=bias) if self.skip_linear or self.in_channels != self.out_channels: self.lin_self = Linear(in_channels[1], out_channels, bias=bias) else: self.lin_self = torch.nn.Identity() if self.in_edge_channels is not None: self.lin_edge = Linear(in_edge_channels, out_channels * self.heads, bias=bias) # todo: A general torch_geometric.nn.AttentionLayer if self.attention: if self.attention_type == 'additive': self.att_msg = Parameter( torch.Tensor(1, self.heads, self.out_channels)) elif self.attention_type == 'dot_product': self.scaler = torch.sqrt( torch.tensor(out_channels, dtype=torch.float)) else: raise ValueError('attention_type: {} not supported'.format( self.attention_type)) self.reset_parameters() def reset_parameters(self): self.lin_msg.reset_parameters() self.lin_self.reset_parameters() if self.in_edge_channels is not None: self.lin_edge.reset_parameters() if self.attention and self.attention_type == 'additive': glorot(self.att_msg) def forward(self, x: Union[Tensor, OptPairTensor], edge_index: Adj, size: Size = None, edge_feature: Tensor = None) -> Tensor: """""" if isinstance(x, Tensor): x: OptPairTensor = (x, x) x_self = x[1] # propagate_type: (x: OptPairTensor) out = self.propagate(edge_index, x=x, size=size, edge_feature=edge_feature) out = out.mean(dim=1) # todo: other approach to aggregate heads out += self.lin_self(x_self) if self.normalize_l2: out = F.normalize(out, p=2, dim=-1) return out def message_basic(self, x_i: Tensor, x_j: Tensor, edge_feature: Tensor): if self.directed_msg: x_j = self.lin_msg(x_j) else: x_j = self.lin_msg(x_j) + self.lin_msg_i(x_i) if edge_feature is not None: x_j = x_j + self.lin_edge(edge_feature) return x_j def message(self, x_i: Tensor, x_j: Tensor, edge_index_i: Tensor, size_i: Tensor, edge_feature: Tensor) -> Tensor: x_j_out = self.message_basic(x_i, x_j, edge_feature) x_j_out = x_j_out.view(-1, self.heads, self.out_channels) if self.attention: if self.attention_type == 'dot_product': x_i_out = self.message_basic(x_j, x_i, edge_feature) x_i_out = x_i_out.view(-1, self.heads, self.out_channels) alpha = (x_i_out * x_j_out).sum(dim=-1) / self.scaler else: alpha = (x_j_out * self.att_msg).sum(dim=-1) alpha = F.leaky_relu(alpha, negative_slope=0.2) alpha = softmax(alpha, edge_index_i, num_nodes=size_i) alpha = alpha.view(-1, self.heads, 1) return x_j_out * alpha else: return x_j_out def __repr__(self): return '{}({}, {})'.format(self.__class__.__name__, self.in_channels, self.out_channels)
class HEATConv(MessagePassing): r"""The heterogeneous edge-enhanced graph attentional operator from the `"Heterogeneous Edge-Enhanced Graph Attention Network For Multi-Agent Trajectory Prediction" <https://arxiv.org/abs/2106.07161>`_ paper, which enhances :class:`~torch_geometric.nn.conv.GATConv` by: 1. type-specific transformations of nodes of different types 2. edge type and edge feature incorporation, in which edges are assumed to have different types but contain the same kind of attributes Args: in_channels (int): Size of each input sample, or :obj:`-1` to derive the size from the first input(s) to the forward method. out_channels (int): Size of each output sample. num_node_types (int): The number of node types. num_edge_types (int): The number of edge types. edge_type_emb_dim (int): The embedding size of edge types. edge_dim (int): Edge feature dimensionality. edge_attr_emb_dim (int): The embedding size of edge features. heads (int, optional): Number of multi-head-attentions. (default: :obj:`1`) concat (bool, optional): If set to :obj:`False`, the multi-head attentions are averaged instead of concatenated. (default: :obj:`True`) negative_slope (float, optional): LeakyReLU angle of the negative slope. (default: :obj:`0.2`) dropout (float, optional): Dropout probability of the normalized attention coefficients which exposes each node to a stochastically sampled neighborhood during training. (default: :obj:`0`) root_weight (bool, optional): If set to :obj:`False`, the layer will not add transformed root node features to the output. (default: :obj:`True`) bias (bool, optional): If set to :obj:`False`, the layer will not learn an additive bias. (default: :obj:`True`) **kwargs (optional): Additional arguments of :class:`torch_geometric.nn.conv.MessagePassing`. """ def __init__(self, in_channels: int, out_channels: int, num_node_types: int, num_edge_types: int, edge_type_emb_dim: int, edge_dim: int, edge_attr_emb_dim: int, heads: int = 1, concat: bool = True, negative_slope: float = 0.2, dropout: float = 0.0, root_weight: bool = True, bias: bool = True, **kwargs): kwargs.setdefault('aggr', 'add') super().__init__(node_dim=0, **kwargs) self.in_channels = in_channels self.out_channels = out_channels self.heads = heads self.concat = concat self.negative_slope = negative_slope self.dropout = dropout self.root_weight = root_weight self.hetero_lin = HeteroLinear(in_channels, out_channels, num_node_types, bias=bias) self.edge_type_emb = Embedding(num_edge_types, edge_type_emb_dim) self.edge_attr_emb = Linear(edge_dim, edge_attr_emb_dim, bias=False) self.att = Linear(2 * out_channels + edge_type_emb_dim + edge_attr_emb_dim, self.heads, bias=False) self.lin = Linear(out_channels + edge_attr_emb_dim, out_channels, bias=bias) self.reset_parameters() def reset_parameters(self): self.hetero_lin.reset_parameters() self.edge_type_emb.reset_parameters() self.edge_attr_emb.reset_parameters() self.att.reset_parameters() self.lin.reset_parameters() def forward(self, x: Tensor, edge_index: Adj, node_type: Tensor, edge_type: Tensor, edge_attr: OptTensor = None, size: Size = None) -> Tensor: """""" x = self.hetero_lin(x, node_type) edge_type_emb = F.leaky_relu(self.edge_type_emb(edge_type), self.negative_slope) # propagate_type: (x: Tensor, edge_type_emb: Tensor, edge_attr: OptTensor) # noqa out = self.propagate(edge_index, x=x, edge_type_emb=edge_type_emb, edge_attr=edge_attr, size=size) if self.concat: if self.root_weight: out += x.view(-1, 1, self.out_channels) out = out.view(-1, self.heads * self.out_channels) else: out = out.mean(dim=1) if self.root_weight: out += x return out def message(self, x_i: Tensor, x_j: Tensor, edge_type_emb: Tensor, edge_attr: Tensor, index: Tensor, ptr: OptTensor, size_i: Optional[int]) -> Tensor: edge_attr = F.leaky_relu(self.edge_attr_emb(edge_attr), self.negative_slope) alpha = torch.cat([x_i, x_j, edge_type_emb, edge_attr], dim=-1) alpha = F.leaky_relu(self.att(alpha), self.negative_slope) alpha = softmax(alpha, index, ptr, size_i) alpha = F.dropout(alpha, p=self.dropout, training=self.training) out = self.lin(torch.cat([x_j, edge_attr], dim=-1)).unsqueeze(-2) return out * alpha.unsqueeze(-1) def __repr__(self) -> str: return (f'{self.__class__.__name__}({self.in_channels}, ' f'{self.out_channels}, heads={self.heads})')
class SAGEConv(MessagePassing): r"""The GraphSAGE operator from the `"Inductive Representation Learning on Large Graphs" <https://arxiv.org/abs/1706.02216>`_ paper .. math:: \mathbf{x}^{\prime}_i = \mathbf{W}_1 \mathbf{x}_i + \mathbf{W}_2 \cdot \mathrm{mean}_{j \in \mathcal{N(i)}} \mathbf{x}_j Args: in_channels (int or tuple): Size of each input sample, or :obj:`-1` to derive the size from the first input(s) to the forward method. A tuple corresponds to the sizes of source and target dimensionalities. out_channels (int): Size of each output sample. normalize (bool, optional): If set to :obj:`True`, output features will be :math:`\ell_2`-normalized, *i.e.*, :math:`\frac{\mathbf{x}^{\prime}_i} {\| \mathbf{x}^{\prime}_i \|_2}`. (default: :obj:`False`) root_weight (bool, optional): If set to :obj:`False`, the layer will not add transformed root node features to the output. (default: :obj:`True`) bias (bool, optional): If set to :obj:`False`, the layer will not learn an additive bias. (default: :obj:`True`) **kwargs (optional): Additional arguments of :class:`torch_geometric.nn.conv.MessagePassing`. """ def __init__(self, in_channels: Union[int, Tuple[int, int]], out_channels: int, normalize: bool = False, root_weight: bool = True, bias: bool = True, **kwargs): kwargs.setdefault('aggr', 'mean') super().__init__(**kwargs) self.in_channels = in_channels self.out_channels = out_channels self.normalize = normalize self.root_weight = root_weight if isinstance(in_channels, int): in_channels = (in_channels, in_channels) self.lin_l = Linear(in_channels[0], out_channels, bias=bias) if self.root_weight: self.lin_r = Linear(in_channels[1], out_channels, bias=False) self.reset_parameters() def reset_parameters(self): self.lin_l.reset_parameters() if self.root_weight: self.lin_r.reset_parameters() def forward(self, x: Union[Tensor, OptPairTensor], edge_index: Adj, size: Size = None) -> Tensor: """""" if isinstance(x, Tensor): x: OptPairTensor = (x, x) # propagate_type: (x: OptPairTensor) out = self.propagate(edge_index, x=x, size=size) out = self.lin_l(out) x_r = x[1] if self.root_weight and x_r is not None: out += self.lin_r(x_r) if self.normalize: out = F.normalize(out, p=2., dim=-1) return out def message(self, x_j: Tensor) -> Tensor: return x_j def message_and_aggregate(self, adj_t: SparseTensor, x: OptPairTensor) -> Tensor: adj_t = adj_t.set_value(None, layout=None) return matmul(adj_t, x[0], reduce=self.aggr)
class NNConv(MessagePassing): r"""The continuous kernel-based convolutional operator from the `"Neural Message Passing for Quantum Chemistry" <https://arxiv.org/abs/1704.01212>`_ paper. This convolution is also known as the edge-conditioned convolution from the `"Dynamic Edge-Conditioned Filters in Convolutional Neural Networks on Graphs" <https://arxiv.org/abs/1704.02901>`_ paper (see :class:`torch_geometric.nn.conv.ECConv` for an alias): .. math:: \mathbf{x}^{\prime}_i = \mathbf{\Theta} \mathbf{x}_i + \sum_{j \in \mathcal{N}(i)} \mathbf{x}_j \cdot h_{\mathbf{\Theta}}(\mathbf{e}_{i,j}), where :math:`h_{\mathbf{\Theta}}` denotes a neural network, *.i.e.* a MLP. Args: in_channels (int or tuple): Size of each input sample, or :obj:`-1` to derive the size from the first input(s) to the forward method. A tuple corresponds to the sizes of source and target dimensionalities. out_channels (int): Size of each output sample. nn (torch.nn.Module): A neural network :math:`h_{\mathbf{\Theta}}` that maps edge features :obj:`edge_attr` of shape :obj:`[-1, num_edge_features]` to shape :obj:`[-1, in_channels * out_channels]`, *e.g.*, defined by :class:`torch.nn.Sequential`. aggr (string, optional): The aggregation scheme to use (:obj:`"add"`, :obj:`"mean"`, :obj:`"max"`). (default: :obj:`"add"`) root_weight (bool, optional): If set to :obj:`False`, the layer will not add the transformed root node features to the output. (default: :obj:`True`) bias (bool, optional): If set to :obj:`False`, the layer will not learn an additive bias. (default: :obj:`True`) **kwargs (optional): Additional arguments of :class:`torch_geometric.nn.conv.MessagePassing`. """ def __init__(self, in_channels: Union[int, Tuple[int, int]], out_channels: int, nn: Callable, aggr: str = 'add', root_weight: bool = True, bias: bool = True, **kwargs): super().__init__(aggr=aggr, **kwargs) self.in_channels = in_channels self.out_channels = out_channels self.nn = nn self.root_weight = root_weight if isinstance(in_channels, int): in_channels = (in_channels, in_channels) self.in_channels_l = in_channels[0] if root_weight: self.lin = Linear(in_channels[1], out_channels, bias=False, weight_initializer='uniform') if bias: self.bias = Parameter(torch.Tensor(out_channels)) else: self.register_parameter('bias', None) self.reset_parameters() def reset_parameters(self): reset(self.nn) if self.root_weight: self.lin.reset_parameters() zeros(self.bias) def forward(self, x: Union[Tensor, OptPairTensor], edge_index: Adj, edge_attr: OptTensor = None, size: Size = None) -> Tensor: """""" if isinstance(x, Tensor): x: OptPairTensor = (x, x) # propagate_type: (x: OptPairTensor, edge_attr: OptTensor) out = self.propagate(edge_index, x=x, edge_attr=edge_attr, size=size) x_r = x[1] if x_r is not None and self.root_weight: out += self.lin(x_r) if self.bias is not None: out += self.bias return out def message(self, x_j: Tensor, edge_attr: Tensor) -> Tensor: weight = self.nn(edge_attr) weight = weight.view(-1, self.in_channels_l, self.out_channels) return torch.matmul(x_j.unsqueeze(1), weight).squeeze(1) def __repr__(self) -> str: return (f'{self.__class__.__name__}({self.in_channels}, ' f'{self.out_channels}, aggr={self.aggr}, nn={self.nn})')
class PointTransformerConv(MessagePassing): r"""The Point Transformer layer from the `"Point Transformer" <https://arxiv.org/abs/2012.09164>`_ paper .. math:: \mathbf{x}^{\prime}_i = \sum_{j \in \mathcal{N}(i) \cup \{ i \}} \alpha_{i,j} \left(\mathbf{W}_3 \mathbf{x}_j + \delta_{ij} \right), where the attention coefficients :math:`\alpha_{i,j}` and positional embedding :math:`\delta_{ij}` are computed as .. math:: \alpha_{i,j}= \textrm{softmax} \left( \gamma_\mathbf{\Theta} (\mathbf{W}_1 \mathbf{x}_i - \mathbf{W}_2 \mathbf{x}_j + \delta_{i,j}) \right) and .. math:: \delta_{i,j}= h_{\mathbf{\Theta}}(\mathbf{p}_i - \mathbf{p}_j), with :math:`\gamma_\mathbf{\Theta}` and :math:`h_\mathbf{\Theta}` denoting neural networks, *i.e.* MLPs, and :math:`\mathbf{P} \in \mathbb{R}^{N \times D}` defines the position of each point. Args: in_channels (int or tuple): Size of each input sample, or :obj:`-1` to derive the size from the first input(s) to the forward method. A tuple corresponds to the sizes of source and target dimensionalities. out_channels (int): Size of each output sample. pos_nn : (torch.nn.Module, optional): A neural network :math:`h_\mathbf{\Theta}` which maps relative spatial coordinates :obj:`pos_j - pos_i` of shape :obj:`[-1, 3]` to shape :obj:`[-1, out_channels]`. Will default to a :class:`torch.nn.Linear` transformation if not further specified. (default: :obj:`None`) attn_nn : (torch.nn.Module, optional): A neural network :math:`\gamma_\mathbf{\Theta}` which maps transformed node features of shape :obj:`[-1, out_channels]` to shape :obj:`[-1, out_channels]`. (default: :obj:`None`) add_self_loops (bool, optional) : If set to :obj:`False`, will not add self-loops to the input graph. (default: :obj:`True`) **kwargs (optional): Additional arguments of :class:`torch_geometric.nn.conv.MessagePassing`. Shapes: - **input:** node features :math:`(|\mathcal{V}|, F_{in})` or :math:`((|\mathcal{V_s}|, F_{s}), (|\mathcal{V_t}|, F_{t}))` if bipartite, positions :math:`(|\mathcal{V}|, 3)` or :math:`((|\mathcal{V_s}|, 3), (|\mathcal{V_t}|, 3))` if bipartite, edge indices :math:`(2, |\mathcal{E}|)` - **output:** node features :math:`(|\mathcal{V}|, F_{out})` or :math:`(|\mathcal{V}_t|, F_{out})` if bipartite """ def __init__(self, in_channels: Union[int, Tuple[int, int]], out_channels: int, pos_nn: Optional[Callable] = None, attn_nn: Optional[Callable] = None, add_self_loops: bool = True, **kwargs): kwargs.setdefault('aggr', 'mean') super().__init__(**kwargs) self.in_channels = in_channels self.out_channels = out_channels self.add_self_loops = add_self_loops if isinstance(in_channels, int): in_channels = (in_channels, in_channels) self.pos_nn = pos_nn if self.pos_nn is None: self.pos_nn = Linear(3, out_channels) self.attn_nn = attn_nn self.lin = Linear(in_channels[0], out_channels, bias=False) self.lin_src = Linear(in_channels[0], out_channels, bias=False) self.lin_dst = Linear(in_channels[1], out_channels, bias=False) self.reset_parameters() def reset_parameters(self): reset(self.pos_nn) if self.attn_nn is not None: reset(self.attn_nn) self.lin.reset_parameters() self.lin_src.reset_parameters() self.lin_dst.reset_parameters() def forward( self, x: Union[Tensor, PairTensor], pos: Union[Tensor, PairTensor], edge_index: Adj, ) -> Tensor: """""" if isinstance(x, Tensor): alpha = (self.lin_src(x), self.lin_dst(x)) x: PairTensor = (self.lin(x), x) else: alpha = (self.lin_src(x[0]), self.lin_dst(x[1])) x = (self.lin(x[0]), x[1]) if isinstance(pos, Tensor): pos: PairTensor = (pos, pos) if self.add_self_loops: if isinstance(edge_index, Tensor): edge_index, _ = remove_self_loops(edge_index) edge_index, _ = add_self_loops(edge_index, num_nodes=min( pos[0].size(0), pos[1].size(0))) elif isinstance(edge_index, SparseTensor): edge_index = set_diag(edge_index) # propagate_type: (x: PairTensor, pos: PairTensor, alpha: PairTensor) out = self.propagate(edge_index, x=x, pos=pos, alpha=alpha, size=None) return out def message(self, x_j: Tensor, pos_i: Tensor, pos_j: Tensor, alpha_i: Tensor, alpha_j: Tensor, index: Tensor, ptr: OptTensor, size_i: Optional[int]) -> Tensor: delta = self.pos_nn(pos_i - pos_j) alpha = alpha_i - alpha_j + delta if self.attn_nn is not None: alpha = self.attn_nn(alpha) alpha = softmax(alpha, index, ptr, size_i) return alpha * (x_j + delta) def __repr__(self) -> str: return (f'{self.__class__.__name__}({self.in_channels}, ' f'{self.out_channels})')
class GraphAttentionLayer(MessagePassing): def __init__(self, in_features: int, out_features: int, n_heads: int, residual: bool, dropout: float = 0.6, slope: float = 0.2, activation: nn.Module = nn.ELU()): super(GraphAttentionLayer, self).__init__(aggr='add', node_dim=0) self.in_features = in_features self.out_features = out_features self.heads = n_heads self.residual = residual self.attn_dropout = nn.Dropout(dropout) self.feat_dropout = nn.Dropout(dropout) self.leakyrelu = nn.LeakyReLU(negative_slope=slope) self.activation = activation self.feat_lin = Linear(in_features, out_features * n_heads, bias=True, weight_initializer='glorot') self.attn_vec = nn.Parameter(torch.Tensor(1, n_heads, out_features)) # use 'residual' parameters to instantiate residual structure if residual: self.proj_r = Linear(in_features, out_features, bias=False, weight_initializer='glorot') else: self.register_parameter('proj_r', None) self.reset_parameters() def reset_parameters(self): glorot(self.attn_vec) self.feat_lin.reset_parameters() if self.proj_r is not None: self.proj_r.reset_parameters() def forward(self, x, edge_idx, size=None): # normalize input feature matrix x = self.feat_dropout(x) x_r = x_l = self.feat_lin(x).view(-1, self.heads, self.out_features) # calculate normal transformer components Q, K, V output = self.propagate(edge_index=edge_idx, x=(x_l, x_r), size=size) if self.proj_r is not None: output = (output.transpose(0, 1) + self.proj_r(x)).transpose(1, 0) # output = self.activation(output) output = output.mean(dim=1) return output def message(self, x_i, x_j, index, ptr, size_i): x = x_i + x_j x = self.leakyrelu(x) alpha = (x * self.attn_vec).sum(dim=-1) alpha = softmax(alpha, index, ptr, size_i) alpha = self.attn_dropout(alpha) return x_j * alpha.unsqueeze(-1)
class SGConv(MessagePassing): r"""The simple graph convolutional operator from the `"Simplifying Graph Convolutional Networks" <https://arxiv.org/abs/1902.07153>`_ paper .. math:: \mathbf{X}^{\prime} = {\left(\mathbf{\hat{D}}^{-1/2} \mathbf{\hat{A}} \mathbf{\hat{D}}^{-1/2} \right)}^K \mathbf{X} \mathbf{\Theta}, where :math:`\mathbf{\hat{A}} = \mathbf{A} + \mathbf{I}` denotes the adjacency matrix with inserted self-loops and :math:`\hat{D}_{ii} = \sum_{j=0} \hat{A}_{ij}` its diagonal degree matrix. The adjacency matrix can include other values than :obj:`1` representing edge weights via the optional :obj:`edge_weight` tensor. Args: in_channels (int): Size of each input sample, or :obj:`-1` to derive the size from the first input(s) to the forward method. out_channels (int): Size of each output sample. K (int, optional): Number of hops :math:`K`. (default: :obj:`1`) cached (bool, optional): If set to :obj:`True`, the layer will cache the computation of :math:`{\left(\mathbf{\hat{D}}^{-1/2} \mathbf{\hat{A}} \mathbf{\hat{D}}^{-1/2} \right)}^K \mathbf{X}` on first execution, and will use the cached version for further executions. This parameter should only be set to :obj:`True` in transductive learning scenarios. (default: :obj:`False`) add_self_loops (bool, optional): If set to :obj:`False`, will not add self-loops to the input graph. (default: :obj:`True`) bias (bool, optional): If set to :obj:`False`, the layer will not learn an additive bias. (default: :obj:`True`) **kwargs (optional): Additional arguments of :class:`torch_geometric.nn.conv.MessagePassing`. Shapes: - **input:** node features :math:`(|\mathcal{V}|, F_{in})`, edge indices :math:`(2, |\mathcal{E}|)`, edge weights :math:`(|\mathcal{E}|)` *(optional)* - **output:** node features :math:`(|\mathcal{V}|, F_{out})` """ _cached_x: Optional[Tensor] def __init__(self, in_channels: int, out_channels: int, K: int = 1, cached: bool = False, add_self_loops: bool = True, bias: bool = True, **kwargs): kwargs.setdefault('aggr', 'add') super().__init__(**kwargs) self.in_channels = in_channels self.out_channels = out_channels self.K = K self.cached = cached self.add_self_loops = add_self_loops self._cached_x = None self.lin = Linear(in_channels, out_channels, bias=bias) self.reset_parameters() def reset_parameters(self): self.lin.reset_parameters() self._cached_x = None def forward(self, x: Tensor, edge_index: Adj, edge_weight: OptTensor = None) -> Tensor: """""" cache = self._cached_x if cache is None: if isinstance(edge_index, Tensor): edge_index, edge_weight = gcn_norm( # yapf: disable edge_index, edge_weight, x.size(self.node_dim), False, self.add_self_loops, dtype=x.dtype) elif isinstance(edge_index, SparseTensor): edge_index = gcn_norm( # yapf: disable edge_index, edge_weight, x.size(self.node_dim), False, self.add_self_loops, dtype=x.dtype) for k in range(self.K): # propagate_type: (x: Tensor, edge_weight: OptTensor) x = self.propagate(edge_index, x=x, edge_weight=edge_weight, size=None) if self.cached: self._cached_x = x else: x = cache.detach() return self.lin(x) def message(self, x_j: Tensor, edge_weight: Tensor) -> Tensor: return edge_weight.view(-1, 1) * x_j def message_and_aggregate(self, adj_t: SparseTensor, x: Tensor) -> Tensor: return matmul(adj_t, x, reduce=self.aggr) def __repr__(self) -> str: return (f'{self.__class__.__name__}({self.in_channels}, ' f'{self.out_channels}, K={self.K})')
class SAGEConv(MessagePassing): r"""The GraphSAGE operator from the `"Inductive Representation Learning on Large Graphs" <https://arxiv.org/abs/1706.02216>`_ paper .. math:: \mathbf{x}^{\prime}_i = \mathbf{W}_1 \mathbf{x}_i + \mathbf{W}_2 \cdot \mathrm{mean}_{j \in \mathcal{N(i)}} \mathbf{x}_j If :obj:`project = True`, then :math:`\mathbf{x}_j` will first get projected via .. math:: \mathbf{x}_j \leftarrow \sigma ( \mathbf{W}_3 \mathbf{x}_j + \mathbf{b}) as described in Eq. (3) of the paper. Args: in_channels (int or tuple): Size of each input sample, or :obj:`-1` to derive the size from the first input(s) to the forward method. A tuple corresponds to the sizes of source and target dimensionalities. out_channels (int): Size of each output sample. aggr (string or Aggregation, optional): The aggregation scheme to use. Any aggregation of :obj:`torch_geometric.nn.aggr` can be used, *e.g.*, :obj:`"mean"`, :obj:`"max"`, or :obj:`"lstm"`. (default: :obj:`"mean"`) normalize (bool, optional): If set to :obj:`True`, output features will be :math:`\ell_2`-normalized, *i.e.*, :math:`\frac{\mathbf{x}^{\prime}_i} {\| \mathbf{x}^{\prime}_i \|_2}`. (default: :obj:`False`) root_weight (bool, optional): If set to :obj:`False`, the layer will not add transformed root node features to the output. (default: :obj:`True`) project (bool, optional): If set to :obj:`True`, the layer will apply a linear transformation followed by an activation function before aggregation (as described in Eq. (3) of the paper). (default: :obj:`False`) bias (bool, optional): If set to :obj:`False`, the layer will not learn an additive bias. (default: :obj:`True`) **kwargs (optional): Additional arguments of :class:`torch_geometric.nn.conv.MessagePassing`. Shapes: - **inputs:** node features :math:`(|\mathcal{V}|, F_{in})` or :math:`((|\mathcal{V_s}|, F_{s}), (|\mathcal{V_t}|, F_{t}))` if bipartite, edge indices :math:`(2, |\mathcal{E}|)` - **outputs:** node features :math:`(|\mathcal{V}|, F_{out})` or :math:`(|\mathcal{V_t}|, F_{out})` if bipartite """ def __init__( self, in_channels: Union[int, Tuple[int, int]], out_channels: int, aggr: Optional[Union[str, List[str], Aggregation]] = "mean", normalize: bool = False, root_weight: bool = True, project: bool = False, bias: bool = True, **kwargs, ): self.in_channels = in_channels self.out_channels = out_channels self.normalize = normalize self.root_weight = root_weight self.project = project if isinstance(in_channels, int): in_channels = (in_channels, in_channels) if aggr == 'lstm': kwargs.setdefault('aggr_kwargs', {}) kwargs['aggr_kwargs'].setdefault('in_channels', in_channels[0]) kwargs['aggr_kwargs'].setdefault('out_channels', in_channels[0]) super().__init__(aggr, **kwargs) if self.project: self.lin = Linear(in_channels[0], in_channels[0], bias=True) if self.aggr is None: self.fuse = False # No "fused" message_and_aggregate. self.lstm = LSTM(in_channels[0], in_channels[0], batch_first=True) if isinstance(self.aggr_module, MultiAggregation): aggr_out_channels = self.aggr_module.get_out_channels( in_channels[0]) else: aggr_out_channels = in_channels[0] self.lin_l = Linear(aggr_out_channels, out_channels, bias=bias) if self.root_weight: self.lin_r = Linear(in_channels[1], out_channels, bias=False) self.reset_parameters() def reset_parameters(self): if self.project: self.lin.reset_parameters() self.aggr_module.reset_parameters() self.lin_l.reset_parameters() if self.root_weight: self.lin_r.reset_parameters() def forward(self, x: Union[Tensor, OptPairTensor], edge_index: Adj, size: Size = None) -> Tensor: """""" if isinstance(x, Tensor): x: OptPairTensor = (x, x) if self.project and hasattr(self, 'lin'): x = (self.lin(x[0]).relu(), x[1]) # propagate_type: (x: OptPairTensor) out = self.propagate(edge_index, x=x, size=size) out = self.lin_l(out) x_r = x[1] if self.root_weight and x_r is not None: out += self.lin_r(x_r) if self.normalize: out = F.normalize(out, p=2., dim=-1) return out def message(self, x_j: Tensor) -> Tensor: return x_j def message_and_aggregate(self, adj_t: SparseTensor, x: OptPairTensor) -> Tensor: adj_t = adj_t.set_value(None, layout=None) return matmul(adj_t, x[0], reduce=self.aggr) def __repr__(self) -> str: return (f'{self.__class__.__name__}({self.in_channels}, ' f'{self.out_channels}, aggr={self.aggr})')