def rep_per_node(prefix, num_community): """ Used on Libra partitioned data. This function reports number of split-copes per node (replication) of a partitioned graph Parameters ---------- prefix: Partition folder location (contains replicationlist.csv) num_community: number of partitions or communities """ ifile = os.path.join(prefix, 'replicationlist.csv') fhandle = open(ifile, "r") r_dt = {} fline = fhandle.readline() ## reading first line, contains the comment. print(fline) for line in fhandle: if line[0] == '#': raise DGLError("[Bug] Read Hash char in rep_per_node func.") node = line.strip('\n') if r_dt.get(node, -100) == -100: r_dt[node] = 1 else: r_dt[node] += 1 fhandle.close() ## sanity checks for v in r_dt.values(): if v >= num_community: raise DGLError("[Bug] Unexpected event in rep_per_node() in tools.py.") return r_dt
def __init__(self, in_feats, out_feats, rank_dim, norm='both', weight=True, bias=True, activation=None, allow_zero_in_degree=True): super(DGLGraphConv, self).__init__() if norm not in ('none', 'both', 'right'): raise DGLError( 'Invalid norm value. Must be either "none", "both" or "right".' ' But got "{}".'.format(norm)) self._in_feats = in_feats self._out_feats = out_feats self._rank_dim = rank_dim self._norm = norm self._allow_zero_in_degree = allow_zero_in_degree self.batch_norm = nn.BatchNorm1d(rank_dim) if weight: self.w1 = nn.Parameter(th.Tensor(in_feats, out_feats)) self.w2 = nn.Parameter(th.Tensor(in_feats + 1, rank_dim)) self.v = nn.Parameter(th.Tensor(rank_dim, out_feats)) #self.weight_sum = nn.Parameter(th.Tensor(in_feats, out_feats)) #self.weight2 = nn.Parameter(th.Tensor(rank_dim, out_feats)) #self.bias = nn.Parameter(th.Tensor(rank_dim)) else: self.register_parameter('weight', None) self.reset_parameters() self._activation = activation
def forward(self, graph, feat): with graph.local_scope(): if not self._allow_zero_in_degree: if (graph.in_degrees() == 0).any(): raise DGLError('There are 0-in-degree nodes in the graph, ' 'output for those nodes will be invalid. ' 'This is harmful for some applications, ' 'causing silent performance regression. ' 'Adding self-loop on the input graph by ' 'calling `g = dgl.add_self_loop(g)` will resolve ' 'the issue. Setting ``allow_zero_in_degree`` ' 'to be `True` when constructing this module will ' 'suppress the check and let the code run.') if self._cached_h is not None: feat_list = self._cached_h result = torch.zeros(feat_list[0].shape[0], self.out_feats).to(feat_list[0].device) feat_km1 = feat_list[0] for i, k_feat in enumerate(feat_list): _dLambda = self._lambada_act(self._lambada_fun(torch.cat([feat_km1, result], dim=1))) result += self.fc(k_feat * _dLambda) feat_km1 = k_feat else: feat_list = [] # compute normalization degs = graph.in_degrees().float().clamp(min=1) norm = th.pow(degs, -0.5) norm = norm.to(feat.device).unsqueeze(1) feat_list.append(feat.float()) # compute (D^-1 A^k D)^k X for i in range(self._k): # norm = D^-1/2 feat = feat * norm feat = feat.float() graph.ndata['h'] = feat graph.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'h')) feat = graph.ndata.pop('h') # compute (D^-1 A^k D)^k X feat = feat * norm feat_list.append(feat) result = torch.zeros(feat_list[0].shape[0], self.out_feats).to(feat_list[0].device) feat_km1 = feat_list[0] for i, k_feat in enumerate(feat_list): _dLambda = self._lambada_act(self._lambada_fun(torch.cat([feat_km1, result], dim=1))) result += self.fc(k_feat * _dLambda) feat_km1 = k_feat if self.norm is not None: result = self.norm(result) # cache feature if self._cached: self._cached_h = feat_list return result
def __init__(self, in_feats, out_feats, rank_dim, norm='both', weight=True, bias=True, activation=None, allow_zero_in_degree=False): super(DGLGraphConv, self).__init__() if norm not in ('none', 'both', 'right'): raise DGLError( 'Invalid norm value. Must be either "none", "both" or "right".' ' But got "{}".'.format(norm)) self._in_feats = in_feats self._out_feats = out_feats self._rank_dim = rank_dim self._allow_zero_in_degree = allow_zero_in_degree self.att1 = nn.Linear(out_feats, 1, bias=False) self.att2 = nn.Linear(out_feats, 1, bias=False) self.att_vec = nn.Linear(2, 2, bias=False) self.weight_sum = nn.Parameter(th.Tensor(in_feats, out_feats)) self.weight_prod = nn.Parameter(th.Tensor(in_feats + 1, rank_dim)) self.v = nn.Parameter(th.Tensor(rank_dim, out_feats)) self.reset_parameters() self._activation = activation
def drpa_create_buckets(self): inner_nodex = self.ndata['inner_node'].tolist() ##.count(1) n = len(inner_nodex) idx = inner_nodex.count(1) #nrounds_ = self.nrounds self.selected_nodes = [ [] for i in range(self.nrounds)] ## #native nodes # randomly divide the nodes in 5 rounds for comms total_alien_nodes = inner_nodex.count(0) ## count split nodes alien_nodes_per_round = int((total_alien_nodes + self.nrounds -1) / self.nrounds) counter = 0 pos = 0 r = 0 while counter < n: if inner_nodex[counter] == 0: ##split node self.selected_nodes[r].append(counter) pos += 1 if pos % alien_nodes_per_round == 0: r = r + 1 counter += 1 if (counter != len(inner_nodex)): print("counter: ", counter, " ", len(inner_nodex)) #assert counter == len(inner_nodex), "assertion" if counter != len(inner_nodex): raise DGLError("Error: Issue in selected nodes.")
def forward(self, graph, memory, ts): graph = graph.local_var() # Using local scope for graph if not self._allow_zero_in_degree: if (graph.in_degrees() == 0).any(): raise DGLError( 'There are 0-in-degree nodes in the graph, ' 'output for those nodes will be invalid. ' 'This is harmful for some applications, ' 'causing silent performance regression. ' 'Adding self-loop on the input graph by ' 'calling `g = dgl.add_self_loop(g)` will resolve ' 'the issue. Setting ``allow_zero_in_degree`` ' 'to be `True` when constructing this module will ' 'suppress the check and let the code run.') #print("Shape: ",memory.shape,ts.shape) graph.srcdata.update({'s': memory, 'timestamp': ts}) graph.dstdata.update({'s': memory, 'timestamp': ts}) # Dot product Calculate the attentio weight graph.apply_edges(self.weight_fn) # Edge softmax graph.edata['sa'] = edge_softmax( graph, graph.edata['a']) / (self._out_feats**0.5) # Update dst node Here msg_fn include edge feature graph.update_all(self.msg_fn, fn.sum('attn', 'agg_u')) rst = graph.dstdata['agg_u'] # Implement skip connection rst = self.merge(rst.view(-1, self._num_heads * self._out_feats), graph.dstdata['s']) return rst
def __init__(self, norm='both'): super(SmoothFilter, self).__init__() if norm not in ('none', 'both', 'right'): raise DGLError( 'Invalid norm value. Must be either "none", "both" or "right".' ' But got "{}".'.format(norm)) self._norm = norm
def __init__(self, in_feats, out_feats, norm='both', weight=True, bias=True, activation=None): super(GraphConv, self).__init__() if norm not in ('none', 'both', 'right'): raise DGLError( 'Invalid norm value. Must be either "none", "both" or "right".' ' But got "{}".'.format(norm)) self._in_feats = in_feats self._out_feats = out_feats self._norm = norm if weight: self.weight = nn.Parameter(th.Tensor(in_feats, out_feats)) else: self.register_parameter('weight', None) if bias: self.bias = nn.Parameter(th.Tensor(out_feats)) else: self.register_parameter('bias', None) self.reset_parameters() self._activation = activation
def forward(self, graph, feat, weight=None, edge_weight=None): with graph.local_scope(): if not self._allow_zero_in_degree: if (graph.in_degrees() == 0).any(): raise DGLError( 'There are 0-in-degree nodes in the graph, ' 'output for those nodes will be invalid. ' 'This is harmful for some applications, ' 'causing silent performance regression. ' 'Adding self-loop on the input graph by ' 'calling `g = dgl.add_self_loop(g)` will resolve ' 'the issue. Setting ``allow_zero_in_degree`` ' 'to be `True` when constructing this module will ' 'suppress the check and let the code run.') #aggregate_fn = fn.copy_src('h', 'm') if edge_weight is not None: assert edge_weight.shape[0] == graph.number_of_edges() graph.edata['_edge_weight'] = edge_weight aggregate_fn = fn.u_mul_e('h', '_edge_weight', 'm') # (BarclayII) For RGCN on heterogeneous graphs we need to support GCN on bipartite. feat_src, feat_dst = expand_as_pair(feat, graph) if self._norm == 'both': degs = graph.out_degrees().float().clamp(min=1) norm = th.pow(degs, -0.5) shp = norm.shape + (1, ) * (feat_src.dim() - 1) norm = th.reshape(norm, shp) feat_src = feat_src * norm feat_sumsrc = th.matmul(feat_src, self.w1) feat_prodsrc = (th.matmul( th.cat( (feat_src, th.ones([feat_src.shape[0], 1]).to('cuda:0')), 1), self.w2)) graph.srcdata['h_sum'] = feat_sumsrc graph.srcdata['h_prod'] = feat_prodsrc graph.update_all(fn.copy_src('h_sum', 'm_sum'), self._elementwise_sum) graph.update_all(fn.copy_src('h_prod', 'm_prod'), self._elementwise_product) rst = graph.dstdata['h_sum'] + th.matmul(graph.dstdata['h_prod'], self.v) if self._norm != 'none': degs = graph.in_degrees().float().clamp(min=1) if self._norm == 'both': norm = th.pow(degs, -0.5) else: norm = 1.0 / degs shp = norm.shape + (1, ) * (feat_dst.dim() - 1) norm = th.reshape(norm, shp) rst = rst * norm if self._activation is not None: rst = self._activation(rst) return rst
def update_all(self, message_func, reduce_func, apply_node_func=None, etype=None): #assert self.rank != -1, "drpa not initialized !!!" if self.rank == -1: raise DGLError("Error: drpa not initialized!") mean = 0 if reduce_func.name == "mean": reduce_func = fn.sum('m', 'neigh') mean = 1 ## Local Aggregate tic = time.time() DGLHeteroGraph.update_all(self, message_func, reduce_func) toc = time.time() if self.rank == 0 and display: print("Time for local aggregate: {:0.4f}, nrounds {}".format(toc - tic, self.nrounds)) if self.nrounds == -1: if mean == 1: feat_dst = self.dstdata['h'] self.r_in_degs = DGLHeteroGraph.in_degrees(self).to(feat_dst) self.dstdata['neigh'] = self.dstdata['neigh'] / self.r_in_degs.unsqueeze(-1) return neigh = self.dstdata['neigh'] adj = self.dstdata['adj'] inner_node = self.dstdata['inner_node'] lftensor = self.dstdata['lf'] feat_dst = self.dstdata['h'] epoch = self.epochs_ar[self.epochi] ## Remote Aggregate tic = time.time() self.r_in_degs = DGLHeteroGraph.in_degrees(self).to(feat_dst) self.dstdata['neigh'] = call_drpa_core(neigh, adj, inner_node, lftensor, self.selected_nodes, self.node_map, self.num_parts, self.rank, epoch, self.dist, self.r_in_degs, self.nrounds) self.epochs_ar[self.epochi] += 1 self.epochi = (self.epochi + 1) % (self.nlayers) toc = time.time() if self.rank == 0 and display: print("Time for remote aggregate: {:0.4f}".format(toc - tic)) if mean == 1: self.dstdata['neigh'] = self.dstdata['neigh'] / self.r_in_degs.unsqueeze(-1)
def find_partition(nid, node_map): if nid == -1: return 1000 pos = 0 for nnodes in node_map: if nid < nnodes: return pos pos = pos + 1 raise DGLError("Error: Unexpected event in find_partition( func.")
def __init__(self, sampler_type='topk', k=10): super(TemporalSampler, self).__init__(1, False) if sampler_type == 'topk': self.sampler = partial( dgl.sampling.select_topk, k=k, weight='timestamp') elif sampler_type == 'uniform': self.sampler = partial(dgl.sampling.sample_neighbors, fanout=k) else: raise DGLError( "Sampler string invalid please use \'topk\' or \'uniform\'")
def __init__(self, in_feats, out_feats, num_layers, net): super(StackedEncoder, self).__init__() self.in_feats = in_feats self.out_feats = out_feats self.num_layers = num_layers self.net = net self.layers = nn.ModuleList() if self.num_layers <= 0: raise DGLError("Layer Number must be greater than 0! ") self.layers.append( GraphGRUCell(self.in_feats, self.out_feats, self.net)) for _ in range(self.num_layers - 1): self.layers.append( GraphGRUCell(self.out_feats, self.out_feats, self.net))
def download_proteins(): print("Downloading dataset...") print("This might a take while..") url = "https://portal.nersc.gov/project/m1982/GNN/" file_name = "subgraph3_iso_vs_iso_30_70length_ALL.m100.propermm.mtx" url = url + file_name try: r = requests.get(url) except: raise DGLError( "Error: Failed to download Proteins dataset!! Aborting..") with open("proteins.mtx", "wb") as handle: handle.write(r.content)
def rep_per_node(prefix, nc): ifile = os.path.join(prefix, 'replicationlist.csv') f = open(ifile, "r") r_dt = {} fline = f.readline() for line in f: if line[0] == '#': raise DGLError("Error: Read hash in rep_per_node func.") node = line.strip('\n') if r_dt.get(node, -100) == -100: r_dt[node] = 1 else: r_dt[node] += 1 f.close() ## checks for v in r_dt.values(): if v >= nc: raise DGLError("Error: Unexpected event in rep_per_node func.") return r_dt
def forward(self, graph, feat, get_attention=False): # Check in degree and generate error if (graph.in_degrees()==0).any(): raise DGLError('There are 0-in-degree nodes in the graph, ' 'output for those nodes will be invalid. ' 'This is harmful for some applications, ' 'causing silent performance regression. ' 'Adding self-loop on the input graph by ' 'calling `g = dgl.add_self_loop(g)` will resolve ' 'the issue. Setting ``allow_zero_in_degree`` ' 'to be `True` when constructing this module will ' 'suppress the check and let the code run.') # projection process to get importance vector y graph.ndata['y'] = torch.abs(torch.matmul(self.p,feat.T).view(-1))/torch.norm(self.p,p=2) # Use edge message passing function to get the weight from src node graph.apply_edges(fn.copy_u('y','y')) # Select Top k neighbors subgraph = select_topk(graph,self.k,'y') # Sigmoid as information threshold subgraph.ndata['y'] = torch.sigmoid(subgraph.ndata['y']) # Using vector matrix elementwise mul for acceleration feat = subgraph.ndata['y'].view(-1,1)*feat feat = self.feat_drop(feat) h = self.fc(feat).view(-1, self.num_heads, self.out_feats) el = (h * self.attn_l).sum(dim=-1).unsqueeze(-1) er = (h * self.attn_r).sum(dim=-1).unsqueeze(-1) # Assign the value on the subgraph subgraph.srcdata.update({'ft': h, 'el': el}) subgraph.dstdata.update({'er': er}) # compute edge attention, el and er are a_l Wh_i and a_r Wh_j respectively. subgraph.apply_edges(fn.u_add_v('el', 'er', 'e')) e = self.leaky_relu(subgraph.edata.pop('e')) # compute softmax subgraph.edata['a'] = self.attn_drop(edge_softmax(subgraph, e)) # message passing subgraph.update_all(fn.u_mul_e('ft', 'a', 'm'), fn.sum('m', 'ft')) rst = subgraph.dstdata['ft'] # activation if self.activation: rst = self.activation(rst) # Residual if self.residual: rst = rst + self.residual_module(feat).view(feat.shape[0],-1,self.out_feats) if get_attention: return rst, subgraph.edata['a'] else: return rst
def forward(self, graph, feat, weight=None): graph = graph.local_var() if self._norm == 'both': degs = graph.out_degrees().to(feat.device).float().clamp(min=1) norm = th.pow(degs, -0.5) shp = norm.shape + (1,) * (feat.dim() - 1) norm = th.reshape(norm, shp) # feat = feat * norm if weight is not None: if self.weight is not None: raise DGLError('External weight is provided while at the same time the' ' module has defined its own weight parameter. Please' ' create the module with flag weight=False.') else: weight = self.weight if self._in_feats > self._out_feats: # mult W first to reduce the feature size for aggregation. if weight is not None: feat = th.matmul(feat, weight) feat = feat * norm graph.srcdata['h'] = feat graph.update_all(fn.copy_src(src='h', out='m'), fn.sum(msg='m', out='h')) rst = graph.dstdata['h'] else: # aggregate first then mult W graph.srcdata['h'] = feat graph.update_all(fn.copy_src(src='h', out='m'), fn.sum(msg='m', out='h')) rst = graph.dstdata['h'] if weight is not None: rst = th.matmul(rst, weight) if self._norm != 'none': degs = graph.in_degrees().to(feat.device).float().clamp(min=1) if self._norm == 'both': norm = th.pow(degs, -0.5) else: norm = 1.0 / degs shp = norm.shape + (1,) * (feat.dim() - 1) norm = th.reshape(norm, shp) rst = rst * norm if self._activation is not None: rst = self._activation(rst) return rst
def forward(self, graph, feat, soft_label): with graph.local_scope(): if not self._allow_zero_in_degree: if (graph.in_degrees() == 0).any(): raise DGLError('There are 0-in-degree nodes in the graph, ' 'output for those nodes will be invalid. ' 'This is harmful for some applications, ' 'causing silent performance regression. ' 'Adding self-loop on the input graph by ' 'calling `g = dgl.add_self_loop(g)` will resolve ' 'the issue. Setting ``allow_zero_in_degree`` ' 'to be `True` when constructing this module will ' 'suppress the check and let the code run.') if self.ptype == 'ind': feat_src = h_dst = self.feat_drop(feat) el = (feat_src * self.attn_l).sum(dim=-1).unsqueeze(-1) er = th.zeros(graph.num_nodes(), device=graph.device) elif self.ptype == 'tra': feat_src = self.feat_drop(self.fc_emb) feat_dst = h_dst = th.zeros(graph.num_nodes(), device=graph.device) el = feat_src er = feat_dst cog_label = soft_label graph.srcdata.update({'ft': cog_label, 'el': el}) graph.dstdata.update({'er': er}) # # compute edge attention, el and er are a_l Wh_i and a_r Wh_j respectively. graph.apply_edges(fn.u_add_v('el', 'er', 'e')) # graph.edata['e'] = th.ones(graph.num_edges(), device=graph.device) # non-parameterized PLP e = graph.edata.pop('e') # compute softmax graph.edata['a'] = self.attn_drop(edge_softmax(graph, e)) att = graph.edata['a'].squeeze() # message passing graph.update_all(fn.u_mul_e('ft', 'a', 'm'), fn.sum('m', 'ft')) if self.mlp_layers > 0: rst = th.sigmoid(self.lr_alpha) * graph.dstdata['ft'] + \ th.sigmoid(-self.lr_alpha) * self.mlp(feat) else: rst = graph.dstdata['ft'] # residual if self.res_fc is not None: resval = self.res_fc(h_dst) rst = rst + resval # activation if self.activation: rst = self.activation(rst) return rst, att, th.sigmoid(self.lr_alpha).squeeze(), el.squeeze(), er.squeeze()
def forward(self, graph, feat, soft_label): graph = graph.local_var() if not self._allow_zero_in_degree: if (graph.in_degrees() == 0).any(): raise DGLError('There are 0-in-degree nodes in the graph, ' 'output for those nodes will be invalid. ' 'This is harmful for some applications, ' 'causing silent performance regression. ' 'Adding self-loop on the input graph by ' 'calling `g = dgl.add_self_loop(g)` will resolve ' 'the issue. Setting ``allow_zero_in_degree`` ' 'to be `True` when constructing this module will ' 'suppress the check and let the code run.') h_src = feat feat_src = feat_dst = self.fc(h_src) if graph.is_block: feat_dst = feat_src[:graph.number_of_dst_nodes()] # Assign features to nodes graph.srcdata.update({'ft': feat_src}) graph.dstdata.update({'ft': feat_dst}) # Step 1. dot product graph.apply_edges(fn.u_dot_v('ft', 'ft', 'a')) # graph.edata['a'] = th.ones(graph.num_edges(), device=graph.device) # Step 2. edge softmax to compute attention scores graph.edata['sa'] = edge_softmax(graph, graph.edata['a']) att = graph.edata['sa'].squeeze() cog_label = soft_label # cog_label = self.fc2(feat) # cog_label = th.sigmoid(self.lr_alpha) * soft_label + th.sigmoid(-self.lr_alpha) * self.fc2(feat) graph.srcdata.update({'ft': cog_label}) graph.dstdata.update({'ft': cog_label}) # Step 3. Broadcast softmax value to each edge, and aggregate dst node graph.update_all(fn.u_mul_e('ft', 'sa', 'attn'), fn.sum('attn', 'agg_u')) # output results to the destination nodes rst = graph.dstdata['agg_u'] return rst, att, th.sigmoid(self.lr_alpha).squeeze()
def forward(self, graph, feat, attn_feat): with graph.local_scope(): if not self._allow_zero_in_degree: if (graph.in_degrees() == 0).any(): raise DGLError( 'There are 0-in-degree nodes in the graph, ' 'output for those nodes will be invalid. ' 'This is harmful for some applications, ' 'causing silent performance regression. ' 'Adding self-loop on the input graph by ' 'calling `g = dgl.add_self_loop(g)` will resolve ' 'the issue. Setting ``allow_zero_in_degree`` ' 'to be `True` when constructing this module will ' 'suppress the check and let the code run.') h_src = self.feat_drop(feat) attn_h_src = self.feat_drop(attn_feat) feat_src = self.fc(h_src).view(-1, self._num_heads, self._out_feats) attn_feat_src = attn_feat_dst = self.fc_attn(attn_h_src).view( -1, self._num_heads, self._out_feats) if graph.is_block: attn_feat_dst = attn_feat_src[:graph.number_of_dst_nodes()] el = (attn_feat_src * self.attn_l).sum(dim=-1).unsqueeze(-1) er = (attn_feat_dst * self.attn_r).sum(dim=-1).unsqueeze(-1) graph.srcdata.update({'ft': feat_src, 'el': el}) graph.dstdata.update({'er': er}) # compute edge attention, el and er are a_l Wh_i and a_r Wh_j respectively. graph.apply_edges(dgl.function.u_add_v('el', 'er', 'e')) e = self.leaky_relu(graph.edata.pop('e')) # compute softmax graph.edata['a'] = self.attn_drop(edge_softmax(graph, e)) # message passing graph.update_all(dgl.function.u_mul_e('ft', 'a', 'm'), dgl.function.sum('m', 'ft')) rst = graph.dstdata['ft'] return rst
def forward(self, graph, feat, weight=None, edge_weights=None): with graph.local_scope(): if not self._allow_zero_in_degree: if (graph.in_degrees() == 0).any(): raise DGLError( 'There are 0-in-degree nodes in the graph, ' 'output for those nodes will be invalid. ' 'This is harmful for some applications, ' 'causing silent performance regression. ' 'Adding self-loop on the input graph by ' 'calling `g = dgl.add_self_loop(g)` will resolve ' 'the issue. Setting ``allow_zero_in_degree`` ' 'to be `True` when constructing this module will ' 'suppress the check and let the code run.') # (BarclayII) For RGCN on heterogeneous graphs we need to support GCN on bipartite. feat_src, feat_dst = expand_as_pair(feat, graph) if self._norm == 'both': degs = graph.out_degrees().float().clamp(min=1) norm = th.pow(degs, -0.5) shp = norm.shape + (1, ) * (feat_src.dim() - 1) norm = th.reshape(norm, shp) feat_src = feat_src * norm if weight is not None: if self.weight is not None: raise DGLError( 'External weight is provided while at the same time the' ' module has defined its own weight parameter. Please' ' create the module with flag weight=False.') else: weight = self.weight if self._in_feats > self._out_feats: # mult W first to reduce the feature size for aggregation. if weight is not None: feat_src = th.matmul(feat_src, weight) graph.srcdata['h'] = feat_src if edge_weights is None: graph.update_all(fn.copy_src(src='h', out='m'), fn.sum(msg='m', out='h')) else: graph.edata['a'] = edge_weights graph.update_all(fn.u_mul_e('h', 'a', 'm'), fn.sum(msg='m', out='h')) rst = graph.dstdata['h'] else: # aggregate first then mult W graph.srcdata['h'] = feat_src if edge_weights is None: graph.update_all(fn.copy_src(src='h', out='m'), fn.sum(msg='m', out='h')) else: graph.edata['a'] = edge_weights graph.update_all(fn.u_mul_e('h', 'a', 'm'), fn.sum(msg='m', out='h')) rst = graph.dstdata['h'] if weight is not None: rst = th.matmul(rst, weight) if self._norm != 'none': degs = graph.in_degrees().float().clamp(min=1) if self._norm == 'both': norm = th.pow(degs, -0.5) else: norm = 1.0 / degs shp = norm.shape + (1, ) * (feat_dst.dim() - 1) norm = th.reshape(norm, shp) rst = rst * norm if self.bias is not None: rst = rst + self.bias if self._activation is not None: rst = self._activation(rst) return rst
def forward(self, graph, feat, weight=None): r"""Compute graph convolution. Notes ----- * Input shape: :math:`(N, *, \text{in_feats})` where * means any number of additional dimensions, :math:`N` is the number of nodes. * Output shape: :math:`(N, *, \text{out_feats})` where all but the last dimension are the same shape as the input. * Weight shape: "math:`(\text{in_feats}, \text{out_feats})`. Parameters ---------- graph : DGLGraph The graph. feat : torch.Tensor The input feature weight : torch.Tensor, optional Optional external weight tensor. Returns ------- torch.Tensor The output feature """ graph = graph.local_var() if self._norm == 'both': degs = graph.out_degrees().to(feat.device).float().clamp(min=1) norm = th.pow(degs, -0.5) shp = norm.shape + (1, ) * (feat.dim() - 1) norm = th.reshape(norm, shp) feat = feat * norm if weight is not None: if self.weight is not None: raise DGLError( 'External weight is provided while at the same time the' ' module has defined its own weight parameter. Please' ' create the module with flag weight=False.') else: weight = self.weight # print(self._in_feats, self._out_feats) if self._in_feats > self._out_feats: # mult W first to reduce the feature size for aggregation. if weight is not None: feat = th.matmul(feat, weight) graph.srcdata['h'] = feat ####### graph.ndata['feat'] = feat graph.apply_edges(lambda edges: { 'e': th.sum((th.mul(edges.src['h'], th.tanh(edges.dst['h']))), 1) }) e = self.leaky_relu(graph.edata.pop('e')) e_soft = edge_softmax(graph, e) graph.ndata.pop('feat') ####### graph.update_all(fn.copy_src(src='h', out='m'), fn.sum(msg='m', out='h')) rst = graph.dstdata['h'] else: # aggregate first then mult W graph.srcdata['h'] = feat ####### graph.ndata['feat'] = feat graph.apply_edges(lambda edges: { 'e': th.sum((th.mul(edges.src['h'], th.tanh(edges.dst['h']))), 1) }) e = self.leaky_relu(graph.edata.pop('e')) e_soft = edge_softmax(graph, e) graph.ndata.pop('feat') ####### graph.update_all(fn.copy_src(src='h', out='m'), fn.sum(msg='m', out='h')) rst = graph.dstdata['h'] if weight is not None: rst = th.matmul(rst, weight) if self._norm != 'none': degs = graph.in_degrees().to(feat.device).float().clamp(min=1) if self._norm == 'both': norm = th.pow(degs, -0.5) else: norm = 1.0 / degs shp = norm.shape + (1, ) * (feat.dim() - 1) norm = th.reshape(norm, shp) rst = rst * norm if self.bias is not None: rst = rst + self.bias if self._activation is not None: rst = self._activation(rst) return rst, e_soft
def forward(self, graph, feat, edge_weight=None, weight=None): r""" Description ----------- Compute graph convolution. Parameters ---------- graph : DGLGraph The graph. feat : torch.Tensor or pair of torch.Tensor If a torch.Tensor is given, it represents the input feature of shape :math:`(N, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. If a pair of torch.Tensor is given, which is the case for bipartite graph, the pair must contain two tensors of shape :math:`(N_{in}, D_{in_{src}})` and :math:`(N_{out}, D_{in_{dst}})`. edge_weight : torch.Tensor of shape (E, 1) Edge weights, E for the number of edges. weight : torch.Tensor, optional Optional external weight tensor. Returns ------- torch.Tensor The output feature Raises ------ DGLError Case 1: If there are 0-in-degree nodes in the input graph, it will raise DGLError since no message will be passed to those nodes. This will cause invalid output. The error can be ignored by setting ``allow_zero_in_degree`` parameter to ``True``. Case 2: External weight is provided while at the same time the module has defined its own weight parameter. Note ---- * Input shape: :math:`(N, *, \text{in_feats})` where * means any number of additional dimensions, :math:`N` is the number of nodes. * Output shape: :math:`(N, *, \text{out_feats})` where all but the last dimension are the same shape as the input. * Weight shape: :math:`(\text{in_feats}, \text{out_feats})`. """ with graph.local_scope(): if not self._allow_zero_in_degree: if (graph.in_degrees() == 0).any(): raise DGLError('There are 0-in-degree nodes in the graph, ' 'output for those nodes will be invalid. ' 'This is harmful for some applications, ' 'causing silent performance regression. ' 'Adding self-loop on the input graph by ' 'calling `g = dgl.add_self_loop(g)` will resolve ' 'the issue. Setting ``allow_zero_in_degree`` ' 'to be `True` when constructing this module will ' 'suppress the check and let the code run.') # (BarclayII) For RGCN on heterogeneous graphs we need to support GCN on bipartite. feat_src, feat_dst = expand_as_pair(feat, graph) if self._norm == 'both': degs = graph.out_degrees().float().clamp(min=1) norm = th.pow(degs, -0.5) shp = norm.shape + (1,) * (feat_src.dim() - 1) norm = th.reshape(norm, shp) feat_src = feat_src * norm if weight is not None: if self.weight is not None: raise DGLError('External weight is provided while at the same time the' ' module has defined its own weight parameter. Please' ' create the module with flag weight=False.') else: weight = self.weight # Set edge weight graph.edata['w'] = edge_weight if self._in_feats > self._out_feats: # mult W first to reduce the feature size for aggregation. if weight is not None: feat_src = th.matmul(feat_src, weight) graph.srcdata['h'] = feat_src # Changed from fn.copy_src to fn.u_mul_e graph.update_all(fn.u_mul_e(lhs_field='h', rhs_field='w', out='m'), fn.sum(msg='m', out='h')) rst = graph.dstdata['h'] else: # aggregate first then mult W graph.srcdata['h'] = feat_src # Changed from fn.copy_src to fn.u_mul_e graph.update_all(fn.u_mul_e(lhs_field='h', rhs_field='w', out='m'), fn.sum(msg='m', out='h')) rst = graph.dstdata['h'] if weight is not None: rst = th.matmul(rst, weight) if self._norm != 'none': degs = graph.in_degrees().float().clamp(min=1) if self._norm == 'both': norm = th.pow(degs, -0.5) else: norm = 1.0 / degs shp = norm.shape + (1,) * (feat_dst.dim() - 1) norm = th.reshape(norm, shp) rst = rst * norm if self.bias is not None: rst = rst + self.bias if self._activation is not None: rst = self._activation(rst) return rst
def partition_graph(num_community, G, resultdir): """ Performs vertex-cut based graph partitioning and converts the partitioning output to DGL input format. Given a graph, this function will create a folder named ``XCommunities`` where ``X`` stands for the number of communities. It will contain ``X`` files named ``communityZ.txt`` for each partition Z (from 0 to X-1); each such file contains a list of edges assigned to that partition. These files constitute the output of Libra graph partitioner. The folder also contains X subfolders named ``partZ``, each of these folders stores DGL/DistGNN graphs for partition Z; these graph files are used as input to DistGNN. The folder also contains a json file which contains partitions' information. Currently we require the graph's node data to contain the following columns: * ``features`` for node features. * ``label`` for node labels. * ``train_mask`` as a boolean mask of training node set. * ``val_mask`` as a boolean mask of validation node set. * ``test_mask`` as a boolean mask of test node set. Parameters ---------- num_community : int Number of partitions to create. G : DGLGraph Input graph to be partitioned. resultdir : str Output location for storing the partitioned graphs. """ print("num partitions: ", num_community) print("output location: ", resultdir) ## create ouptut directory try: os.makedirs(resultdir, mode=0o775, exist_ok=True) except: raise DGLError("Error: Could not create directory: ", resultdir) tic = time.time() print( "####################################################################") print("Executing parititons: ", num_community) ltic = time.time() try: resultdir = os.path.join(resultdir, str(num_community) + "Communities") os.makedirs(resultdir, mode=0o775, exist_ok=True) except: raise DGLError("Error: Could not create sub-directory: ", resultdir) ## Libra partitioning libra_partition(num_community, G, resultdir) ltoc = time.time() print("Time taken by {} partitions {:0.4f} sec".format( num_community, ltoc - ltic)) print() toc = time.time() print("Generated ", num_community, " partitions in {:0.4f} sec".format(toc - tic), flush=True) print("Partitioning completed successfully !!!")
def forward(self, graph, feat, get_attention=False): with graph.local_scope(): if not self._allow_zero_in_degree: if (graph.in_degrees() == 0).any(): raise DGLError('There are 0-in-degree nodes in the graph, ' 'output for those nodes will be invalid. ' 'This is harmful for some applications, ' 'causing silent performance regression. ' 'Adding self-loop on the input graph by ' 'calling `g = dgl.add_self_loop(g)` will resolve ' 'the issue. Setting ``allow_zero_in_degree`` ' 'to be `True` when constructing this module will ' 'suppress the check and let the code run.') if isinstance(feat, tuple): h_src = self.feat_drop(feat[0]) h_dst = self.feat_drop(feat[1]) if not hasattr(self, 'fc_src'): feat_src = self.fc( h_src).view(-1, self._num_heads, self._out_feats) feat_dst = self.fc( h_dst).view(-1, self._num_heads, self._out_feats) else: feat_src = self.fc_src( h_src).view(-1, self._num_heads, self._out_feats) feat_dst = self.fc_dst( h_dst).view(-1, self._num_heads, self._out_feats) else: h_src = h_dst = self.feat_drop(feat) feat_src = feat_dst = self.fc(h_src).view( -1, self._num_heads, self._out_feats) if graph.is_block: feat_dst = feat_src[:graph.number_of_dst_nodes()] # NOTE: GAT paper uses "first concatenation then linear projection" # to compute attention scores, while ours is "first projection then # addition", the two approaches are mathematically equivalent: # We decompose the weight vector a mentioned in the paper into # [a_l || a_r], then # a^T [Wh_i || Wh_j] = a_l Wh_i + a_r Wh_j # Our implementation is much efficient because we do not need to # save [Wh_i || Wh_j] on edges, which is not memory-efficient. Plus, # addition could be optimized with DGL's built-in function u_add_v, # which further speeds up computation and saves memory footprint. el = (feat_src * self.attn_l).sum(dim=-1).unsqueeze(-1) er = (feat_dst * self.attn_r).sum(dim=-1).unsqueeze(-1) graph.srcdata.update({'ft': feat_src, 'el': el}) graph.dstdata.update({'er': er}) # compute edge attention, el and er are a_l Wh_i and a_r Wh_j respectively. graph.apply_edges(fn.u_add_v('el', 'er', 'e')) e = self.leaky_relu(graph.edata.pop('e')) # compute softmax graph.edata['a'] = self.attn_drop(edge_softmax(graph, e)) # compute weighted attention graph.edata['a'] = (graph.edata['a'].permute( 1, 2, 0)*graph.edata['weight']).permute(2, 0, 1) # message passing graph.update_all(fn.u_mul_e('ft', 'a', 'm'), fn.sum('m', 'ft')) rst = graph.dstdata['ft'] # residual if self.res_fc is not None: resval = self.res_fc(h_dst).view( h_dst.shape[0], -1, self._out_feats) rst = rst + resval # activation if self.activation: rst = self.activation(rst) if get_attention: return rst, graph.edata['a'] else: return rst
def forward(ctx, feat, adj, inner_node, lftensor, selected_nodes, node_map, num_parts, rank, epoch, dist, in_degs, nrounds): """ feat : graph (partition node features) adj : list of remote clone nodes inner_node : mark split node or not lftensor : root node for split nodes selected_nodes : split nodes binned by r(delay) number of rounds node_map : Node ID range for all the partitions num_parts : Total mpi ranks or partitions rank : My MPI rank epoch : Current epoch dist : PyTorch distributed object in_degs : node degree norunds: r (delay factor) """ prof = [] ## runtime profile nrounds_update = nrounds feat_size = feat.shape[1] int_threshold = pow(2, 31)/4 - 1 ##bytes base_chunk_size = int(int_threshold / num_parts) ##bytes base_chunk_size_fs = floor(base_chunk_size / (feat_size + 1) ) roundn = epoch % nrounds node_map_t = torch.tensor(node_map, dtype=torch.int32) selected_nodes_t = [] for sn in selected_nodes: selected_nodes_t.append(torch.tensor(sn, dtype=torch.int32)) buckets = torch.tensor([0 for i in range(num_parts)], dtype=torch.int32) width = adj.shape[1] ## feature vector length tic = time.time() # 1. Get bucket sizes to be sent to all the ranks # And get those remote nodes locations ver2part = torch.empty(len(selected_nodes[roundn]), dtype=torch.int32) ver2part_index = torch.empty(len(selected_nodes[roundn]), dtype=torch.int32) fdrpa_comm_buckets(adj, selected_nodes_t[roundn], ver2part, ver2part_index, node_map_t, buckets, lftensor, width, num_parts, rank) message(rank, "Time for bucketing: {:0.4f}", (time.time() - tic)) ## comms to gather the bucket sizes for all-to-all feats comms input_sr = [] for i in range(0, num_parts): input_sr.append(torch.tensor([buckets[i]], dtype=torch.int64)) output_sr = [torch.zeros(1, dtype=torch.int64) for i in range(0, num_parts)] sync_req = dist.all_to_all(output_sr, input_sr, async_op=True) sync_req.wait() ## recv the #nodes communicated send_feat_len = 0 in_size = [] for i in range(num_parts): in_size.append(int(buckets[i]) * (feat_size + 1)) send_feat_len += in_size[i] ## 2. Split the data if the communication volume is beyond MPI limit ############################################################################## tic = time.time() cum = 0; flg = 0 for i in output_sr: cum += int(i) * (feat_size + 1) if int(i) >= base_chunk_size_fs: flg = 1 for i in input_sr: if int(i) >= base_chunk_size_fs: flg = 1 nsplit_comm = 1 if cum >= int_threshold or send_feat_len >= int_threshold or flg: for i in range(num_parts): val = ceil((int(input_sr[i]) ) / base_chunk_size_fs) if val > nsplit_comm: nsplit_comm = val nsplit_comm_t = torch.tensor(nsplit_comm) ## communicate how many splits (iters) for the communication req_nsplit_comm = torch.distributed.all_reduce(nsplit_comm_t, op=torch.distributed.ReduceOp.MAX, async_op=True) lim = 1 soffset_base = [0 for i in range(num_parts)] ## min chunk size soffset_cur = [0 for i in range(num_parts)] ## send list of ints roffset_cur = [0 for i in range(num_parts)] ## recv list of intrs j=0 while j < lim: tsend = 0; trecv = 0 for i in range(num_parts): soffset_base[i] += soffset_cur[i] if input_sr[i] < base_chunk_size_fs: soffset_cur[i] = int(input_sr[i]) input_sr[i] = 0 else: soffset_cur[i] = base_chunk_size_fs input_sr[i] -= base_chunk_size_fs if output_sr[i] < base_chunk_size_fs: roffset_cur[i] = int(output_sr[i]) output_sr[i] = 0 else: roffset_cur[i] = base_chunk_size_fs output_sr[i] -= base_chunk_size_fs tsend += soffset_cur[i] trecv += roffset_cur[i] send_node_list = \ [torch.empty(soffset_cur[i], dtype=torch.int32) for i in range(num_parts)] sten_ = torch.empty(tsend * (feat_size + 1), dtype=feat.dtype) dten_ = torch.empty(trecv * (feat_size + 1), dtype=feat.dtype) sten_nodes = torch.empty(tsend , dtype=torch.int32) dten_nodes = torch.empty(trecv , dtype=torch.int32) out_size = [0 for i in range(num_parts)] in_size = [0 for i in range(num_parts)] out_size_nodes = [0 for i in range(num_parts)] in_size_nodes = [0 for i in range(num_parts)] offset = 0 ## 2.1 gather feats to send by leaf nodes for i in range(num_parts): fdrpa_gather_emb_lr(feat, feat.shape[0], adj, sten_, offset, send_node_list[i], sten_nodes, selected_nodes_t[roundn], in_degs, ver2part, ver2part_index, width, feat_size, i, soffset_base[i], soffset_cur[i], node_map_t, num_parts) out_size[i] = roffset_cur[i] * (feat_size + 1) in_size[i] = soffset_cur[i] * (feat_size + 1) offset += soffset_cur[i] out_size_nodes[i] = roffset_cur[i] in_size_nodes[i] = soffset_cur[i] message(rank, "Sending {}, recving {} data I", tsend, trecv) ## Communicate the features and node IDs req_feats = dist.all_to_all_single(dten_, sten_, out_size, in_size, async_op=True) gfqueue_feats_lr.push(req_feats) req_nodes = dist.all_to_all_single(dten_nodes, sten_nodes, out_size_nodes, in_size_nodes, async_op=True) gfqueue_nodes_lr.push(req_nodes) soffset_cur_copy = soffset_cur.copy() ## Store pointers for the data in motion for delayed comms buffcomm_feats_lr.push(dten_) buffcomm_nodes_lr.push(dten_nodes) buffcomm_feats_size_lr.push(out_size) buffcomm_nodes_size_lr.push(out_size_nodes) buffcomm_snl_lr.push(send_node_list) ## fwd phase II buffcomm_snl_size_lr.push(soffset_cur_copy) ## fwd phase II if j == 0: req_nsplit_comm.wait() lim = int(nsplit_comm_t) j += 1 ############################################################################## buffcomm_iter_lr.push(lim) message(rank, "Max iters in MPI split comm: {}", (lim)) #message(rank, "Time for Gather I: {:0.4f}", (time.time() - tic)) prof.append('Gather I: {:0.4f}'.format(time.time() - tic)) ## 3. Recv the remote partial aggreates by the root and update the aggregates recv_list_nodes = [] if epoch >= nrounds_update or nrounds == 1: #assert gfqueue_feats_lr.empty() == False, "Error: Forward empty queue !!!" if gfqueue_feats_lr.empty() == True: raise DGLError("Error: unexpected event, forward empty queue.") ticg = time.time() lim = buffcomm_iter_lr.pop() out_size_nodes_ar = [] for i in range(lim): if rank == 0 and display: tic = time.time() req = gfqueue_feats_lr.pop(); req.wait() req = gfqueue_nodes_lr.pop(); req.wait() #message(rank, "Time for async comms I: {:4f}", (time.time() - tic)) prof.append('Async comm I: {:0.4f}'.format(time.time() - tic)) otf = buffcomm_feats_lr.pop() out_size = buffcomm_feats_size_lr.pop() otn = buffcomm_nodes_lr.pop() out_size_nodes = buffcomm_nodes_size_lr.pop() out_size_nodes_ar.append(out_size_nodes) recv_list_nodes_ar = []; ilen = 0 for l in range(num_parts): ilen += out_size_nodes[l] recv_list_nodes_ar.append(torch.empty(out_size_nodes[l], dtype=torch.int32)) pos = torch.tensor([0], dtype=torch.int64) offsetf = 0; offsetn = 0 ## 3.1 Scatter: update the local aggregates using remote recv aggregates for l in range(num_parts): scatter_reduce_lr(otf, offsetf, otn, offsetn, feat, in_degs, node_map_t, out_size[l], feat_size, num_parts, recv_list_nodes_ar[l], pos, int(out_size_nodes[l]), rank) offsetf += out_size[l] offsetn += out_size_nodes[l] #assert ilen == pos[0], "Issue in scatter reduce!" if ilen != pos[0]: raise DGLError("Error: Issue in scatter reduce.") recv_list_nodes.append(recv_list_nodes_ar) #message(rank, "Time for scatter I: {:0.4f} in epoch: {}", (time.time() - ticg), epoch) prof.append('Scatter I: {:0.4f}'.format(time.time() - tic)) tic = time.time() for j in range(lim): ### gather-scatter round II tsend = 0; trecv = 0 stn_fp2 = buffcomm_snl_size_lr.pop() out_size_nodes = out_size_nodes_ar[j] for i in range(num_parts): tsend += out_size_nodes[i] trecv += stn_fp2[i] recv_list_nodes_ = recv_list_nodes[j] sten_ = torch.empty(tsend * (feat_size + 1), dtype=feat.dtype) dten_ = torch.empty(trecv * (feat_size + 1), dtype=feat.dtype) out_size = [0 for i in range(num_parts)] in_size = [0 for i in range(num_parts)] offset = 0 ## 3.2 Gather for sending remote partial aggregated from the root to leafs for i in range(num_parts): fdrpa_gather_emb_rl(feat, feat.shape[0], sten_, offset, recv_list_nodes_[i], out_size_nodes[i], in_degs, feat_size, node_map_t, num_parts) out_size[i] = stn_fp2[i] * (feat_size + 1) in_size[i] = out_size_nodes[i] * (feat_size + 1) offset += in_size[i] req = dist.all_to_all_single(dten_, sten_, out_size, in_size, async_op=True) gfqueue_feats_rl.push(req) ## push dten buffcomm_feats_rl.push(dten_) buffcomm_feats_size_rl.push(out_size) buffcomm_iter_rl.push(lim) #message(rank, "Time for gather 2: {:0.4f}",(time.time() - tic)) prof.append('Gather II: {:0.4f}'.format(time.time() - tic)) ## 4. Recv remote partial aggreagted from the root and update local aggregates if epoch >= 2*nrounds_update or nrounds == 1: ticg = time.time() lim = buffcomm_iter_rl.pop() for i in range(lim): tic = time.time() req = gfqueue_feats_rl.pop(); req.wait() #message(rank, "Time for async comms II: {:4f}", (time.time() - tic)) prof.append('Async comms II: {:0.4f}'.format(time.time() - tic)) otf = buffcomm_feats_rl.pop() out_size = buffcomm_feats_size_rl.pop() stn = buffcomm_snl_lr.pop() offset = 0 for l in range(num_parts): #assert out_size[l] / (feat_size + 1) == stn[l].shape[0] scatter_reduce_rl(otf, offset, stn[l], stn[l].shape[0], in_degs, feat, node_map_t, out_size[l], feat_size, num_parts) offset += out_size[l] #message(rank, "Time for scatter 2: {:0.4f}, roundn: {}", time.time() - ticg, roundn) prof.append('Scatter II: {:0.4f}'.format(time.time() - ticg)) if rank == 0: ## Display runtime profile for major components print(prof, flush=True) print() return feat
def forward(self, graph, feat, weight=None, real_weighted_g=False): graph = graph.local_var() if real_weighted_g: # weighted degrees graph.update_all(fn.copy_e("weight", "e_w"), fn.sum("e_w", "in_degs")) degs = graph.ndata['in_degs'] if self._norm == 'both': if not real_weighted_g: degs = graph.out_degrees().to(feat.device).float().clamp(min=1) norm = th.pow(degs, -0.5) shp = norm.shape + (1, ) * (feat.dim() - 1) norm = th.reshape(norm, shp) feat = feat * norm if weight is not None: if self.weight is not None: raise DGLError( 'External weight is provided while at the same time the' ' module has defined its own weight parameter. Please' ' create the module with flag weight=False.') else: weight = self.weight if self._in_feats > self._out_feats: # mult W first to reduce the feature size for aggregation. if weight is not None: feat = th.matmul(feat, weight) graph.ndata['h'] = feat if real_weighted_g: graph.update_all(fn.u_mul_e("h", "weight", "src_mul_edge"), fn.sum(msg='src_mul_edge', out='h')) else: graph.update_all(fn.copy_src(src='h', out='m'), fn.sum(msg='m', out='h')) rst = graph.ndata['h'] else: # aggregate first then mult W graph.ndata['h'] = feat if real_weighted_g: graph.update_all(fn.u_mul_e("h", "weight", "src_mul_edge"), fn.sum(msg='src_mul_edge', out='h')) else: graph.update_all(fn.copy_src(src='h', out='m'), fn.sum(msg='m', out='h')) rst = graph.ndata['h'] if weight is not None: rst = th.matmul(rst, weight) if self._norm != 'none': if not real_weighted_g: degs = graph.in_degrees().to(feat.device).float().clamp(min=1) if self._norm == 'both': norm = th.pow(degs, -0.5) else: # divide the aggregated messages by each node's in-degrees norm = 1.0 / degs shp = norm.shape + (1, ) * (feat.dim() - 1) norm = th.reshape(norm, shp) rst = rst * norm if self.bias is not None: rst = rst + self.bias if self._activation is not None: rst = self._activation(rst) return rst
argparser.add_argument('--num-parts', type=int, default=2) argparser.add_argument('--out-dir', type=str, default='./') args = argparser.parse_args() dataset = args.dataset num_community = args.num_parts out_dir = 'Libra_result_' + dataset ## "Libra_result_" prefix is mandatory resultdir = os.path.join(args.out_dir, out_dir) print("Input dataset for partitioning: ", dataset) if args.dataset == 'ogbn-products': print("Loading ogbn-products") G, _ = load_ogb('ogbn-products') elif args.dataset == 'ogbn-papers100M': print("Loading ogbn-papers100M") G, _ = load_ogb('ogbn-papers100M') elif args.dataset == 'proteins': G = load_proteins('proteins') elif args.dataset == 'ogbn-arxiv': print("Loading ogbn-arxiv") G, _ = load_ogb('ogbn-arxiv') else: try: G = load_data(args)[0] except: raise DGLError("Error: Dataset {} not found !!!".format(dataset)) print("Done loading the graph.", flush=True) partition_graph(num_community, G, resultdir)
def forward(self, graph, feat): with graph.local_scope(): if not self._allow_zero_in_degree: if (graph.in_degrees() == 0).any(): raise DGLError( 'There are 0-in-degree nodes in the graph, ' 'output for those nodes will be invalid. ' 'This is harmful for some applications, ' 'causing silent performance regression. ' 'Adding self-loop on the input graph by ' 'calling `g = dgl.add_self_loop(g)` will resolve ' 'the issue. Setting ``allow_zero_in_degree`` ' 'to be `True` when constructing this module will ' 'suppress the check and let the code run.') # (BarclayII) For RGCN on heterogeneous graphs we need to support GCN on bipartite. feat_src, feat_dst = expand_as_pair(feat, graph) # if self._norm == 'both': # degs = graph.out_degrees().float().clamp(min=1) # norm = torch.pow(degs, -0.5) # shp = norm.shape + (1,) * (feat_src.dim() - 1) # norm = torch.reshape(norm, shp) # feat_src = feat_src * norm feat_sum_src = th.matmul(feat_src, self.weight_sum) feat_prod_src = th.matmul( th.cat( (feat_src, th.ones([feat_src.shape[0], 1]).to('cuda:0')), 1), self.weight_prod) #graph.srcdata['h_prod'] = th.tanh(feat_prod_src)#torch.tanh(feat_src) graph.srcdata['h_sum'] = feat_sum_src graph.srcdata['h_prod'] = th.tanh(feat_prod_src) graph.update_all(fn.copy_src('h_prod', 'm_prod'), self._elementwise_product) graph.update_all(fn.copy_src('h_sum', 'm_sum'), self._elementwise_sum) #graph.update_all(fn.copy_src('h_sum', 'm_sum'), fn.sum(msg='m_sum', out='h_sum')) prod_agg = th.matmul(graph.dstdata['h_prod'], self.v) sum_agg = graph.dstdata['h_sum'] att_prod, att_sum = self.attention(prod_agg, sum_agg) rst = att_prod * prod_agg + att_sum * sum_agg #rst = self.batch_norm(rst) #print("rst1",rst) #print(rst) #rst = th.matmul(rst, self.weight2)+graph.dstdata['h_sum'] #print("rst2",rst) # if self._norm != 'none': # degs = graph.in_degrees().float().clamp(min=1) # if self._norm == 'both': # norm = torch.pow(degs, -0.5) # else: # norm = 1.0 / degs # shp = norm.shape + (1,) * (feat_dst.dim() - 1) # norm = torch.reshape(norm, shp) # rst = rst * norm #if self.bias is not None: #rst = rst + self.bias return rst
def forward(self, graph, feat): r""" Description ----------- Compute graph attention network layer. Parameters ---------- graph : DGLGraph The graph. feat : torch.Tensor or pair of torch.Tensor If a torch.Tensor is given, the input feature of shape :math:`(N, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. If a pair of torch.Tensor is given, the pair must contain two tensors of shape :math:`(N_{in}, D_{in_{src}})` and :math:`(N_{out}, D_{in_{dst}})`. Returns ------- torch.Tensor The output feature of shape :math:`(N, H, D_{out})` where :math:`H` is the number of heads, and :math:`D_{out}` is size of output feature. Raises ------ DGLError If there are 0-in-degree nodes in the input graph, it will raise DGLError since no message will be passed to those nodes. This will cause invalid output. The error can be ignored by setting ``allow_zero_in_degree`` parameter to ``True``. """ with graph.local_scope(): if not self._allow_zero_in_degree: if (graph.in_degrees() == 0).any(): raise DGLError( 'There are 0-in-degree nodes in the graph, ' 'output for those nodes will be invalid. ' 'This is harmful for some applications, ' 'causing silent performance regression. ' 'Adding self-loop on the input graph by ' 'calling `g = dgl.add_self_loop(g)` will resolve ' 'the issue. Setting ``allow_zero_in_degree`` ' 'to be `True` when constructing this module will ' 'suppress the check and let the code run.') if isinstance(feat, tuple): h_src = self.feat_drop(feat[0]) h_dst = self.feat_drop(feat[1]) if not hasattr(self, 'fc_src'): self.fc_src, self.fc_dst = self.fc, self.fc feat_src = self.fc_src(h_src).view(-1, self._num_heads, self._out_feats) feat_dst = self.fc_dst(h_dst).view(-1, self._num_heads, self._out_feats) else: h_src = h_dst = self.feat_drop(feat) feat_src = feat_dst = self.fc(h_src).view( -1, self._num_heads, self._out_feats) if graph.is_block: feat_dst = feat_src[:graph.number_of_dst_nodes()] # NOTE: GAT paper uses "first concatenation then linear projection" # to compute attention scores, while ours is "first projection then # addition", the two approaches are mathematically equivalent: # We decompose the weight vector a mentioned in the paper into # [a_l || a_r], then # a^T [Wh_i || Wh_j] = a_l Wh_i + a_r Wh_j # Our implementation is much efficient because we do not need to # save [Wh_i || Wh_j] on edges, which is not memory-efficient. Plus, # addition could be optimized with DGL's built-in function u_add_v, # which further speeds up computation and saves memory footprint. el = (feat_src * self.attn_l).sum(dim=-1).unsqueeze(-1) er = (feat_dst * self.attn_r).sum(dim=-1).unsqueeze(-1) graph.srcdata.update({'ft': feat_src, 'el': el}) graph.dstdata.update({'er': er}) # compute edge attention, el and er are a_l Wh_i and a_r Wh_j respectively. graph.apply_edges(fn.u_add_v('el', 'er', 'e')) e = self.leaky_relu(graph.edata.pop('e')) # compute softmax graph.edata['a'] = self.attn_drop(edge_softmax(graph, e)) # message passing graph.update_all(fn.u_mul_e('ft', 'a', 'm'), fn.sum('m', 'ft')) rst = graph.dstdata['ft'] # residual rst = rst.flatten(1) rst_norm = self.layer_norm(rst) if self.res_fc is not None: resval = self.res_fc(h_dst).view(h_dst.shape[0], -1, self._out_feats).flatten(1) rst_norm = self.feat_drop(rst_norm) + resval # activation rst = self.activation(rst_norm) rst = rst_norm + self.feat_drop(rst) rst = self.ff_layer_norm(rst) return rst