def remove_Connected(dataset): custom_dataset = [] for data in dataset: g = cnv.to_networkx(data).to_undirected() no_of_components = nx.connected_components(g) maxset = [] maxsize = 0 count = 0 for comp in no_of_components: count += 1 comp_size = len(comp) if(comp_size > maxsize): maxsize = comp_size maxset = comp maxset = list(maxset) adj = nx.adjacency_matrix(g).todense() adj = adj[maxset,:] adj = adj[:,maxset] g2 = nx.from_numpy_matrix(adj).to_undirected() custom_dataset += [cnv.from_networkx(g2)] return custom_dataset
def construct_graph(self, path): ''' Input: list of nodes as a path Returns: torch geometric graph input Data ''' current_graph = self.graph.copy() current_graph.nodes[0]['start'] = True current_graph.nodes[path[-1]]['current'] = True for node in path: current_graph.nodes[node]['visited'] = True G = from_networkx(current_graph) G.x = torch.stack([G.visited.float(), G.start.float(), G.current.float(), G.x_pos.float(), G.y_pos.float()]).T G.current = None G.start = None G.visited = None G.x_pos = None G.y_pos = None #print(G) return G
def networkx_to_torch2(self, networkx_graph): from torch_geometric.utils import convert import torch_geometric.transforms as T graph = convert.from_networkx(networkx_graph) transform = T.Compose([T.TargetIndegree()]) graph = transform(graph) return graph.to(self.device)
def process_set(self): self.split_train = np.random.choice(60000, 6000) self.split_test = np.random.choice(10000, 1000) random_list = list(range(0, 7000)) np.random.shuffle(random_list) self.train_mask = random_list[0:6000] self.test_mask = random_list[6000:7000] # self.origin_dataset = self.train_dataset + self.test_dataset # self.test_file = 'test_Mnist_graph_pred.pt' # num_nodes = self.origin_dataset[0].shape[0] * self.origin_dataset[0].shape[0] num_nodes = 6000 + 1000 n = num_nodes node_list = [i for i in range(num_nodes)] edge_list = [] for i in range(num_nodes): neighbor_i = np.random.choice(7000, 100) edge_i = [(i, idx) for idx in neighbor_i] edge_list += (edge_i) g = nx.Graph() g.add_nodes_from(node_list) g.add_edges_from(edge_list) data = convert.from_networkx(g) imgs = torch.Tensor([]) labels = torch.Tensor([]) # for i in range(len(self.origin_dataset)): for i in range(7000): if i < 6000: (img, label) = self.train_dataset[self.split_train[i]] else: idx = i-6000 (img, label) = self.test_dataset[self.split_test[idx]] print("label:", label) print("Processing %d-th Image" % (i)) img = img.reshape([1, 28, 28]) imgs = torch.cat((imgs, img)) labels = torch.cat((labels, torch.Tensor([label]))) print(labels[0:5]) data.train_mask = self.train_mask data.test_mask = self.test_mask data.x = imgs data.y = labels if self.pre_transform is not None: data = self.pre_transform(data) self.data = data return self.data
def map_graph(self, T, G): node_list = list(G.nodes) for i, n in enumerate(node_list): x = math.ceil(G.node[n]['pos'][1] / 16) y = math.ceil(G.node[n]['pos'][0] / 16) tmp = T[:, :, y, x] tmp = tmp.squeeze(0) tmp = tmp.detach().cpu() tmp = tmp.numpy() G.add_node(n, x=tmp) # print(G.node[n]['x']) G = from_networkx(G) return G
def process(self): (img0, label0) = self.origin_dataset[0] print(img0) num_nodes = img0.shape[1] * img0.shape[1] print("img0.shape: " , img0.shape) print("num_nodes: %d" % num_nodes) n = num_nodes node_list = [i for i in range(num_nodes)] edge_list = [] for i in range(num_nodes): for j in range(num_nodes): edge_ij = [ (matrix2nodeid(i-1+n//n, j, n), matrix2nodeid(i, j, n)), (matrix2nodeid(i+1+n//n, j, n), matrix2nodeid(i, j, n)), (matrix2nodeid(i, j-1+n//n, n), matrix2nodeid(i, j, n)), (matrix2nodeid(i, j-1+n//n, n), matrix2nodeid(i, j, n)), (matrix2nodeid(i-1+n//n, j-1+n//n, n), matrix2nodeid(i, j, n)), (matrix2nodeid(i+1+n//n, j-1+n//n, n), matrix2nodeid(i, j, n)), (matrix2nodeid(i-1+n//n, j+1+n//n, n), matrix2nodeid(i, j, n)), (matrix2nodeid(i+1+n//n, j+1+n//n, n), matrix2nodeid(i, j, n)) ] edge_list += edge_ij data_list = [] g0 = nx.Graph() g0.add_nodes_from(node_list) g0.add_edges_from(edge_list) for i in range(len(self.origin_dataset)): print("Processing %d-th Image" % (i)) if i > 10: break # Constructing a new g with node_list and edge_list is very time-consuming # So here we can use deepcopy # g = nx.Graph() # g.add_nodes_from(node_list) # g.add_edges_from(edge_list) g = copy.deepcopy(g0) img, label = self.origin_dataset[i] data = convert.from_networkx(g) data.x = img.reshape([num_nodes, 1]) data.graph_label = label if self.pre_transform is not None: data = self.pre_transform(data) # data_list.append(data) # data, slices = self.collate(data_list) # torch.save((data, slices), osp.join(self.data_dir, self.data_prefix+'data_{}.pt'.format(i))) torch.save(data, osp.join(self.processed_dir, self.data_prefix+'data_{}.pt'.format(i)))
def __init__(self, data, infer=False): if isinstance(data, pd.DataFrame): self.data = data elif isinstance(data, str): self.data = read_data(data) self.NON_MORD_NAMES = ['smiles', 'active'] scl = StandardScaler() self.mord_ft = scl.fit_transform( self.data.drop(columns=self.NON_MORD_NAMES).astype( np.float64)).tolist() self.graphs = [ Chem.MolFromSmiles(s) for s in self.data['smiles'].values.tolist() ] self.graphs = [from_networkx(mol_to_nx(g)) for g in self.graphs] self.label = self.data['active'].values.tolist()
def process_nx_graph(self, g, add_weight=False, uniform_weight=False): """ given a nx graph. add random edge_weight""" if isinstance(g, Data): print('already a pygeo graph') g = self.reorder_pyG(g) return g if add_weight: for u, v in g.edges: g[u][v]['edge_weight'] = np.random.random() if uniform_weight: for u, v in g.edges: g[u][v]['edge_weight'] = 0.1 g = from_networkx(g) g = self.reorder_pyG(g) return g
def transform_networkx_sample_to_torch_geometric_data(networkx_sample, label): graph_tg = from_networkx(networkx_sample) graph_tg.y = torch.tensor([label]) return graph_tg