def __init__(self, in_feats, out_feats, norm='none', weight=True, bias=True, activation=None): super(MedianConv, self).__init__() if norm not in ('none', 'both', 'right', 'left'): raise DGLError('Invalid norm value. Must be either "none", "both", "right" or "left".' ' But got "{}".'.format(norm)) self._in_feats = in_feats self._out_feats = out_feats self._norm = norm if weight: self.weight = nn.Parameter(th.Tensor(in_feats, out_feats)) else: self.register_parameter('weight', None) if bias: self.bias = nn.Parameter(th.Tensor(out_feats)) else: self.register_parameter('bias', None) self.reset_parameters() self._activation = activation
def forward(self, graph, feat, weight=None): """Compute graph convolution. Normalizer constant :math:`c_{ij}` is stored as two node data "ci" and "cj". Parameters ---------- graph : DGLGraph The graph. feat : torch.Tensor The input feature weight : torch.Tensor, optional Optional external weight tensor. dropout : torch.nn.Dropout, optional Optional external dropout layer. Returns ------- torch.Tensor The output feature """ with graph.local_scope(): if isinstance(feat, tuple): feat, _ = feat # dst feature not used cj = graph.srcdata['cj'] ci = graph.dstdata['ci'] if self.device is not None: cj = cj.to(self.device) ci = ci.to(self.device) if weight is not None: if self.weight is not None: raise DGLError( 'External weight is provided while at the same time the' ' module has defined its own weight parameter. Please' ' create the module with flag weight=False.') else: weight = self.weight if weight is not None: feat = dot_or_identity(feat, weight, self.device) feat = feat * self.dropout(cj) graph.srcdata['h'] = feat graph.update_all(fn.copy_src(src='h', out='m'), fn.sum(msg='m', out='h')) rst = graph.dstdata['h'] rst = rst * ci return rst
def __init__(self, h_dim, embedding_range=None, norm='none', activation=None, allow_zero_in_degree=False, attention_mechanism=False, self_loop=True, ent_drop=0, rel_drop=0): super().__init__() if norm not in ('none', 'both', 'right'): raise DGLError('Invalid norm value. Must be either "none", "both" or "right".' ' But got "{}".'.format(norm)) self._norm = norm self.self_loop = self_loop self._allow_zero_in_degree = allow_zero_in_degree self.pi = 3.14159265358979323846 if self.self_loop: self.loop_weight = nn.Parameter(torch.Tensor(h_dim, h_dim)) nn.init.xavier_uniform_(self.loop_weight, gain=nn.init.calculate_gain('relu')) if embedding_range is None: self.register_parameter('embedding_range', None) else: self.register_parameter('embedding_range', embedding_range) self.activation = activation self.ent_dropout = nn.Dropout(p=ent_drop) self.rel_dropout = nn.Dropout(p=rel_drop) # TODO add attention aggregation self.attention_mechanism = attention_mechanism
def forward(self, graph, feat, weight=None, edge_weight=None): r""" Description ----------- Compute graph convolution. Parameters ---------- graph : DGLGraph The graph. feat : torch.Tensor or pair of torch.Tensor If a torch.Tensor is given, it represents the input feature of shape :math:`(N, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. If a pair of torch.Tensor is given, which is the case for bipartite graph, the pair must contain two tensors of shape :math:`(N_{in}, D_{in_{src}})` and :math:`(N_{out}, D_{in_{dst}})`. weight : torch.Tensor, optional Optional external weight tensor. edge_weight : torch.Tensor, optional Optional tensor on the edge. If given, the convolution will weight with regard to the message. Returns ------- torch.Tensor The output feature Raises ------ DGLError Case 1: If there are 0-in-degree nodes in the input graph, it will raise DGLError since no message will be passed to those nodes. This will cause invalid output. The error can be ignored by setting ``allow_zero_in_degree`` parameter to ``True``. Case 2: External weight is provided while at the same time the module has defined its own weight parameter. Note ---- * Input shape: :math:`(N, *, \text{in_feats})` where * means any number of additional dimensions, :math:`N` is the number of nodes. * Output shape: :math:`(N, *, \text{out_feats})` where all but the last dimension are the same shape as the input. * Weight shape: :math:`(\text{in_feats}, \text{out_feats})`. """ with graph.local_scope(): aggregate_fn = fn.copy_src('h', 'm') if edge_weight is not None: assert edge_weight.shape[0] == graph.number_of_edges() graph.edata['_edge_weight'] = edge_weight aggregate_fn = fn.u_mul_e('h', '_edge_weight', 'm') # (BarclayII) For RGCN on heterogeneous graphs we need to support GCN on bipartite. feat_src, feat_dst = expand_as_pair(feat, graph) if self._norm in ['left', 'both']: degs = graph.out_degrees().float().clamp(min=1) if self._norm == 'both': norm = th.pow(degs, -0.5) else: norm = 1.0 / degs shp = norm.shape + (1,) * (feat_src.dim() - 1) norm = th.reshape(norm, shp) feat_src = feat_src * norm if weight is not None: if self.weight is not None: raise DGLError('External weight is provided while at the same time the' ' module has defined its own weight parameter. Please' ' create the module with flag weight=False.') else: weight = self.weight if self._in_feats > self._out_feats: # mult W first to reduce the feature size for aggregation. if weight is not None: feat_src = th.matmul(feat_src, weight) graph.srcdata['h'] = feat_src graph.update_all(aggregate_fn, median_reduce) rst = graph.dstdata['h'] else: # aggregate first then mult W graph.srcdata['h'] = feat_src graph.update_all(aggregate_fn, median_reduce) rst = graph.dstdata['h'] if weight is not None: rst = th.matmul(rst, weight) if self._norm in ['right', 'both']: degs = graph.in_degrees().float().clamp(min=1) if self._norm == 'both': norm = th.pow(degs, -0.5) else: norm = 1.0 / degs shp = norm.shape + (1,) * (feat_dst.dim() - 1) norm = th.reshape(norm, shp) rst = rst * norm if self.bias is not None: rst = rst + self.bias if self._activation is not None: rst = self._activation(rst) return rst
def forward(self, graph, feat, weight=None): r""" Description ----------- Compute graph convolution. Parameters ---------- graph : DGLGraph The graph. feat : torch.Tensor or pair of torch.Tensor If a torch.Tensor is given, it represents the input feature of shape :math:`(N, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. If a pair of torch.Tensor is given, which is the case for bipartite graph, the pair must contain two tensors of shape :math:`(N_{in}, D_{in_{src}})` and :math:`(N_{out}, D_{in_{dst}})`. eweight : torch.Tensor of shape (E, 1) Edge weights, E for the number of edges. weight : torch.Tensor, optional Optional external weight tensor. Returns ------- torch.Tensor The output feature Raises ------ DGLError Case 1: If there are 0-in-degree nodes in the input graph, it will raise DGLError since no message will be passed to those nodes. This will cause invalid output. The error can be ignored by setting ``allow_zero_in_degree`` parameter to ``True``. Case 2: External weight is provided while at the same time the module has defined its own weight parameter. Note ---- * Input shape: :math:`(N, *, \text{in_feats})` where * means any number of additional dimensions, :math:`N` is the number of nodes. * Output shape: :math:`(N, *, \text{out_feats})` where all but the last dimension are the same shape as the input. * Weight shape: :math:`(\text{in_feats}, \text{out_feats})`. """ with graph.local_scope(): if not self._allow_zero_in_degree: if (graph.in_degrees() == 0).any(): raise DGLError( 'There are 0-in-degree nodes in the graph, ' 'output for those nodes will be invalid. ' 'This is harmful for some applications, ' 'causing silent performance regression. ' 'Adding self-loop on the input graph by ' 'calling `g = dgl.add_self_loop(g)` will resolve ' 'the issue. Setting ``allow_zero_in_degree`` ' 'to be `True` when constructing this module will ' 'suppress the check and let the code run.') # (BarclayII) For RGCN on heterogeneous graphs we need to support GCN on bipartite. feat_src, feat_dst = expand_as_pair(feat, graph) if self._norm == 'both': degs = graph.out_degrees().float().clamp(min=1) norm = torch.pow(degs, -0.5) shp = norm.shape + (1, ) * (feat_src.dim() - 1) norm = torch.reshape(norm, shp) feat_src = feat_src * norm if weight is not None: if self.weight is not None: raise DGLError( 'External weight is provided while at the same time the' ' module has defined its own weight parameter. Please' ' create the module with flag weight=False.') else: weight = self.weight # Set edge weights # graph.edata['w'] = eweight if self._in_feats > self._out_feats: # mult W first to reduce the feature size for aggregation. if weight is not None: feat_src = torch.matmul(feat_src, weight) graph.srcdata['h'] = feat_src # Changed from fn.copy_src to fn.u_mul_e graph.update_all( fn.u_mul_e(lhs_field='h', rhs_field='weight', out='m'), fn.sum(msg='m', out='h')) rst = graph.dstdata['h'] else: # aggregate first then mult W graph.srcdata['h'] = feat_src # Changed from fn.copy_src to fn.u_mul_e graph.update_all( fn.u_mul_e(lhs_field='h', rhs_field='weight', out='m'), fn.sum(msg='m', out='h')) rst = graph.dstdata['h'] if weight is not None: rst = torch.matmul(rst, weight) if self._norm != 'none': degs = graph.in_degrees().float().clamp(min=1) if self._norm == 'both': norm = torch.pow(degs, -0.5) else: norm = 1.0 / degs shp = norm.shape + (1, ) * (feat_dst.dim() - 1) norm = torch.reshape(norm, shp) rst = rst * norm if self.bias is not None: rst = rst + self.bias if self._activation is not None: rst = self._activation(rst) return rst
def _forward(self, graph, feat, get_attention=False): r""" Description ----------- Compute graph attention network layer. Parameters ---------- graph : DGLGraph The graph. feat : torch.Tensor or pair of torch.Tensor If a torch.Tensor is given, the input feature of shape :math:`(N, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. If a pair of torch.Tensor is given, the pair must contain two tensors of shape :math:`(N_{in}, D_{in_{src}})` and :math:`(N_{out}, D_{in_{dst}})`. get_attention : bool, optional Whether to return the attention values. Default to False. Returns ------- torch.Tensor The output feature of shape :math:`(N, H, D_{out})` where :math:`H` is the number of heads, and :math:`D_{out}` is size of output feature. torch.Tensor, optional The attention values of shape :math:`(E, H, 1)`, where :math:`E` is the number of edges. This is returned only when :attr:`get_attention` is ``True``. Raises ------ DGLError If there are 0-in-degree nodes in the input graph, it will raise DGLError since no message will be passed to those nodes. This will cause invalid output. The error can be ignored by setting ``allow_zero_in_degree`` parameter to ``True``. """ with graph.local_scope(): if not self._allow_zero_in_degree: if (graph.in_degrees() == 0).any(): raise DGLError('There are 0-in-degree nodes in the graph, ' 'output for those nodes will be invalid. ' 'This is harmful for some applications, ' 'causing silent performance regression. ' 'Adding self-loop on the input graph by ' 'calling `g = dgl.add_self_loop(g)` will resolve ' 'the issue. Setting ``allow_zero_in_degree`` ' 'to be `True` when constructing this module will ' 'suppress the check and let the code run.') if isinstance(feat, tuple): h_src = self.feat_drop(feat[0]) h_dst = self.feat_drop(feat[1]) basis_coef = softmax(self._basis_coef, dim=-1).reshape(-1, 1, 1) # if not hasattr(self, 'fc_src'): params_src = (self._basis[0] * basis_coef).sum(dim=0) params_dst = (self._basis[1] * basis_coef).sum(dim=0) feat_src = (params_src @ h_src.T).view(-1, self._num_heads, self._out_feats) feat_dst = (params_dst @ h_dst.T).view(-1, self._num_heads, self._out_feats) # # feat_src = self.fc(h_src).view(-1, self._num_heads, self._out_feats) # # feat_dst = self.fc(h_dst).view(-1, self._num_heads, self._out_feats) # else: # params = self._basis * basis_coef # feat_src = (params @ h_src.T).view(-1, self._num_heads, self._out_feats) # feat_dst = (params @ h_dst.T).view(-1, self._num_heads, self._out_feats) # # feat_src = self.fc_src(h_src).view(-1, self._num_heads, self._out_feats) # # feat_dst = self.fc_dst(h_dst).view(-1, self._num_heads, self._out_feats) else: h_src = h_dst = self.feat_drop(feat) basis_coef = softmax(self._basis_coef, dim=-1).reshape(-1, 1, 1) params = (self._basis * basis_coef).sum(dim=0) feat_src = feat_dst = (params @ h_src.T).view(-1, self._num_heads, self._out_feats) # feat_src = feat_dst = self.fc(h_src).view( # -1, self._num_heads, self._out_feats) if graph.is_block: feat_dst = feat_src[:graph.number_of_dst_nodes()] # NOTE: GAT paper uses "first concatenation then linear projection" # to compute attention scores, while ours is "first projection then # addition", the two approaches are mathematically equivalent: # We decompose the weight vector a mentioned in the paper into # [a_l || a_r], then # a^T [Wh_i || Wh_j] = a_l Wh_i + a_r Wh_j # Our implementation is much efficient because we do not need to # save [Wh_i || Wh_j] on edges, which is not memory-efficient. Plus, # addition could be optimized with DGL's built-in function u_add_v, # which further speeds up computation and saves memory footprint. attn_l_param = (self._attn_basis[0] * basis_coef).sum(dim=0) attn_r_param = (self._attn_basis[1] * basis_coef).sum(dim=0) el = (feat_src * attn_l_param).sum(dim=-1).unsqueeze(-1) er = (feat_dst * attn_r_param).sum(dim=-1).unsqueeze(-1) # el = (feat_src * self.attn_l).sum(dim=-1).unsqueeze(-1) # er = (feat_dst * self.attn_r).sum(dim=-1).unsqueeze(-1) graph.srcdata.update({'ft': feat_src, 'el': el}) graph.dstdata.update({'er': er}) # compute edge attention, el and er are a_l Wh_i and a_r Wh_j respectively. graph.apply_edges(fn.u_add_v('el', 'er', 'e')) e = self.leaky_relu(graph.edata.pop('e')) # compute softmax graph.edata['a'] = self.attn_drop(edge_softmax(graph, e)) # message passing graph.update_all(fn.u_mul_e('ft', 'a', 'm'), fn.sum('m', 'ft')) rst = graph.dstdata['ft'] # residual if self.res_fc is not None: resval = self.res_fc(h_dst).view(h_dst.shape[0], self._num_heads, self._out_feats) rst = rst + resval # bias if self.bias is not None: rst = rst + self.bias.view(1, self._num_heads, self._out_feats) # activation if self.activation: rst = self.activation(rst) if get_attention: return rst, graph.edata['a'] else: return rst