def test_contains_isolated_nodes(): edge_index = torch.tensor([[0, 1, 0], [1, 0, 0]]) assert not contains_isolated_nodes(edge_index) assert contains_isolated_nodes(edge_index, num_nodes=3) edge_index = torch.tensor([[0, 1, 2, 0], [1, 0, 2, 0]]) assert contains_isolated_nodes(edge_index)
def test_contains_isolated_nodes(): row = torch.tensor([0, 1, 0]) col = torch.tensor([1, 0, 0]) assert not contains_isolated_nodes(torch.stack([row, col], dim=0)) assert contains_isolated_nodes(torch.stack([row, col], dim=0), num_nodes=3) row = torch.tensor([0, 1, 2, 0]) col = torch.tensor([1, 0, 2, 0]) assert contains_isolated_nodes(torch.stack([row, col], dim=0))
def CreateAuxGraph(edge_index, pos_i, pos_j, original_vertex_features, pos): # Build new auxiliary graph: num_graph_verts = original_vertex_features.shape[0] new_verts = torch.arange(num_graph_verts, num_graph_verts + len(edge_index[0, :])) # Add edges in G as vertices in aux G new_vertex_pos = torch.cat( [pos, torch.zeros(len(new_verts), pos.shape[1]).to(device)] , dim=0) new_vertex_features = torch.cat( [original_vertex_features, torch.zeros(len(new_verts), original_vertex_features.shape[1]).to(device)] , dim=0) # Compute needed components: sources = edge_index[0, :] targets = edge_index[1, :] edge_indices = num_graph_verts + torch.arange(0, len(edge_index[0, :])) edge_indices = edge_indices.to(device) new_source_edges = torch.stack((sources, edge_indices), dim=0) new_target_edges = torch.stack((targets, edge_indices), dim=0) new_vertex_pos[num_graph_verts:, :] = (pos_i + pos_j) / 2 # Build graph from components: edge_index_aux = torch.cat([new_target_edges, new_source_edges], dim=1) assert (not contains_self_loops(edge_index_aux)) assert (not contains_isolated_nodes(edge_index_aux)) return edge_index_aux, new_vertex_features, new_vertex_pos
def get_torch_data(df, threshold=3): atoms = df['atom'].values energy = np.array([-1 * df['Energy(Ry)'].values[0]]) atoms = np.expand_dims(atoms, axis=1) one_hot_encoding = OneHotEncoder(sparse=False).fit_transform(atoms) coords = df[['x(angstrom)', 'y(angstrom)', 'z(angstrom)']].values edge_index = None edge_attr = None while True: dist = distance.cdist(coords, coords) dist[dist > threshold] = 0 dist = torch.from_numpy(dist) edge_index, edge_attr = data_utils.dense_to_sparse(dist) edge_attr = edge_attr.unsqueeze(dim=1).type(torch.FloatTensor) edge_index = torch.LongTensor(edge_index) if (data_utils.contains_isolated_nodes(edge_index, num_nodes=13)): threshold += 0.5 else: break x = torch.from_numpy(one_hot_encoding).type(torch.FloatTensor) y = torch.from_numpy(energy).type(torch.FloatTensor) data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y) return data
def has_isolated_nodes(self) -> bool: edge_index, num_nodes = self.edge_index, self.size(1) if num_nodes is None: raise NameError("Unable to infer 'num_nodes'") if self.is_bipartite(): return torch.unique(edge_index[1]).numel() < num_nodes else: return contains_isolated_nodes(edge_index, num_nodes)
def product2edgeindex(product): value = 0.9 while True: idx = np.where(product>value) # index = res = contains_isolated_nodes(torch.Tensor(idx).long(), num_nodes=39) G=nx.Graph() G.add_edges_from(np.array(idx).T.tolist()) x = getncom(G) if x==1: return np.unique(np.array([np.hstack([idx[0],idx[1]]),np.hstack([idx[1],idx[0]])],dtype=np.int),axis=1).astype(np.int) value -= 0.1
def print_stats(): for data in DATASETS: out = load_data(data) num_graphs = len(out) avg_nodes = out.data.x.size(0) / num_graphs avg_edges = out.data.edge_index.size(1) / num_graphs num_features = out.num_features num_classes = out.num_classes print( f'{data}\t{num_graphs}\t{avg_nodes}\t{avg_edges}\t{num_features}\t{num_classes}', end='\t') undirected, self_loops, isolated_nodes, onehot = True, False, False, True for graph in out: if not is_undirected(graph.edge_index, num_nodes=graph.num_nodes): undirected = False if contains_self_loops(graph.edge_index): self_loops = True if contains_isolated_nodes(graph.edge_index, num_nodes=graph.num_nodes): isolated_nodes = True if ((graph.x > 0).sum(dim=1) != 1).sum() > 0: onehot = False print(f'{undirected}\t{self_loops}\t{isolated_nodes}\t{onehot}')
def contains_isolated_nodes(self): r"""Returns :obj:`True`, if the graph does not contain isolated nodes.""" return contains_isolated_nodes(self.edge_index, self.num_nodes)
def contains_isolated_nodes(self): return contains_isolated_nodes(self.edge_index, self.num_nodes)
def read_files(self, verbose=True): start = time.time() if verbose: print("="*100 + "\n\t\t\t\t Preparing Data for {}\n".format(self.config['data_name']) + "="*100) print("\n\n==>> Loading feature matrix and adj matrix....") if self.config['data_name'] in ['gossipcop', 'politifact']: x_file = os.path.join(self.config['data_path'], self.config['data_name'], 'feat_matrix_lr_train_30_5.npz'.format(self.config['data_name'])) y_file = os.path.join(self.config['data_path'], self.config['data_name'], 'all_labels_lr_train_30_5.json'.format(self.config['data_name'])) # adj_name = 'adj_matrix_lr_train_30_5_edge.npy'.format(self.config['data_name']) if self.config['model_name'] != 'HGCN' else 'adj_matrix_lr_train_30_5.npz'.format(self.config['data_name']) adj_name = 'adj_matrix_lr_train_30_5_edge.npy'.format(self.config['data_name']) edge_index_file = os.path.join(self.config['data_path'], self.config['data_name'], adj_name) node2id_file = os.path.join(self.config['data_path'], self.config['data_name'], 'node2id_lr_train_30_5.json'.format(self.config['data_name'])) node_type_file = os.path.join(self.config['data_path'], self.config['data_name'], 'node_type_lr_train_30_5.npy'.format(self.config['data_name'])) split_mask_file = os.path.join(self.config['data_path'], self.config['data_name'], 'split_mask_lr_30_5.json') if self.config['model_name'] in ['rgcn', 'rgat', 'rsage']: edge_type_file = os.path.join(self.config['data_path'], self.config['data_name'], 'edge_type_lr_train_30_5_edge.npy'.format(self.config['data_name'])) else: x_file = os.path.join(self.config['data_path'], self.config['data_name'], 'feat_matrix_lr_top10_train.npz') y_file = os.path.join(self.config['data_path'], self.config['data_name'], 'all_labels_lr_top10_train.json') # adj_name = 'adj_matrix_lr_top10_train_edge.npy' if self.config['model_name'] != 'HGCN' else 'adj_matrix_lr_top10_train.npz' adj_name = 'adj_matrix_lr_top10_train_edge.npy' edge_index_file = os.path.join(self.config['data_path'], self.config['data_name'], adj_name) node2id_file = os.path.join(self.config['data_path'], self.config['data_name'], 'node2id_lr_top10_train.json') node_type_file = os.path.join(self.config['data_path'], self.config['data_name'], 'node_type_lr_top10_train.npy') split_mask_file = os.path.join(self.config['data_path'], self.config['data_name'], 'split_mask_top10.json') if self.config['model_name'] in ['rgcn', 'rgat', 'rsage']: edge_type_file = os.path.join(self.config['data_path'], self.config['data_name'], 'edge_type_lr_top10_edge.npy') # if self.config['model_name'] != 'HGCN': # edge_index_data = np.load(edge_index_file) # edge_index_data = torch.from_numpy(edge_index_data).long() # elif self.config['model_name'] == 'HGCN': # edge_index_data = load_npz(edge_index_file) # # edge_index_data = torch.from_numpy(edge_index_data.toarray()) # edge_index_data = edge_index_data.tocoo() # indices = torch.from_numpy(np.vstack((edge_index_data.row, edge_index_data.col)).astype(np.int64)) # values = torch.Tensor(edge_index_data.data) # shape = torch.Size(edge_index_data.shape) # edge_index_data = torch.sparse.FloatTensor(indices, values, shape) self.edge_index_data = np.load(edge_index_file) self.edge_index_data = torch.from_numpy(edge_index_data).long() self.x_data = load_npz(x_file) self.x_data = torch.from_numpy(self.x_data.toarray()) self.y_data = json.load(open(y_file, 'r')) self.y_data = torch.LongTensor(self.y_data['all_labels']) self.node2id = json.load(open(node2id_file, 'r')) # node_type = np.load(node_type_file) # node_type = torch.from_numpy(node_type).float() if self.config['model_name'] in ['rgcn', 'rgat', 'rsage']: self.edge_type_data = np.load(edge_type_file) self.edge_type_data = torch.from_numpy(self.edge_type_data).long() else: self.edge_type_data = None self.split_masks = json.load(open(split_mask_file, 'r')) num_nodes, self.vocab_size = self.x_data.shape if self.config['model_name'] != 'HGCN': isolated_nodes = contains_isolated_nodes(edge_index= self.edge_index_data) self_loops = contains_self_loops(edge_index= self.edge_index_data) if verbose: print("\n\n" + "-"*50 + "\nDATA STATISTICS:\n" + "-"*50) if self.config['model_name'] != 'HGCN': print("Contains isolated nodes = ", isolated_nodes) print("Contains self loops = ", self_loops) print("Vocabulary size = ", self.vocab_size) print('No. of nodes in graph = ', num_nodes) print('No. of nodes after removing isolated nodes = ', new_num_nodes) print("No. of edges in graph = ", self.data.num_edges) print("\nNo.of train instances = ", self.data.train_mask.sum().item()) print("No.of val instances = ", self.data.val_mask.sum().item()) print("No.of test instances = ", num_nodes - self.data.train_mask.sum().item() - self.data.val_mask.sum().item()) end = time.time() hours, minutes, seconds = calc_elapsed_time(start, end) print("\n"+ "-"*50 + "\nTook {:0>2} hours: {:0>2} mins: {:05.2f} secs to Prepare Data\n".format(hours,minutes,seconds))