def process(self): with open(os.path.join(self.root, self.name + '_%s.pkl' % self.split), 'rb') as f: self.dataset = pickle.load(f) # self.n_samples = len(self.dataset) print("preparing graphs for the %s set..." % (self.split.upper())) print('Converting graphs into PyG objects...') pyg_graph_list = [] for data in tqdm(self.dataset): node_features = data.node_feat edge_list = (data.W != 0).nonzero() # converting adj matrix to edge_list g = Data() g.__num_nodes__ = node_features.size(0) g.edge_index = edge_list.T #g.edge_index = torch.from_numpy(edge_list) g.x = node_features.long() # adding edge features for Residual Gated ConvNet edge_feat_dim = 1 g.edge_attr = torch.ones(g.num_edges, edge_feat_dim) g.y = data.node_label.to(torch.float32) pyg_graph_list.append(g) del self.dataset data, slices = self.collate(pyg_graph_list) print('Saving...') torch.save((data, slices), self.processed_paths[0])
def read_csv_graph_pyg(raw_dir, add_inverse_edge = True, additional_node_files = [], additional_edge_files = []): graph_list = read_csv_graph_raw(raw_dir, add_inverse_edge, additional_node_files = additional_node_files, additional_edge_files = additional_edge_files) pyg_graph_list = [] print('Converting graphs into PyG objects...') for graph in tqdm(graph_list): g = Data() g.__num_nodes__ = graph["num_nodes"] g.edge_index = torch.tensor(graph["edge_index"]) if graph["edge_feat"] is not None: g.edge_attr = torch.tensor(graph["edge_feat"]) if graph["node_feat"] is not None: g.x = torch.tensor(graph["node_feat"]) for key in additional_node_files: g[key] = torch.tensor(graph[key]) for key in additional_edge_files: g[key] = torch.tensor(graph[key]) pyg_graph_list.append(g) return pyg_graph_list
def read_graph_pyg(raw_dir, add_inverse_edge=False, additional_node_files=[], additional_edge_files=[], binary=False): if binary: # npz graph_list = read_binary_graph_raw(raw_dir, add_inverse_edge) else: # csv graph_list = read_csv_graph_raw( raw_dir, add_inverse_edge, additional_node_files=additional_node_files, additional_edge_files=additional_edge_files) pyg_graph_list = [] print('Converting graphs into PyG objects...') for graph in tqdm(graph_list): g = Data() g.__num_nodes__ = graph['num_nodes'] g.edge_index = torch.from_numpy(graph['edge_index']) del graph['num_nodes'] del graph['edge_index'] if graph['edge_feat'] is not None: g.edge_attr = torch.from_numpy(graph['edge_feat']) del graph['edge_feat'] if graph['node_feat'] is not None: g.x = torch.from_numpy(graph['node_feat']) del graph['node_feat'] for key in additional_node_files: g[key] = torch.from_numpy(graph[key]) del graph[key] for key in additional_edge_files: g[key] = torch.from_numpy(graph[key]) del graph[key] pyg_graph_list.append(g) add_order_info_01(g) # DAGNN # length of longest path # layer ids start with 0 so max, gives actual path length and -1 is not necessary g.len_longest_path = float(torch.max(g._bi_layer_idx0).item()) return pyg_graph_list
def __getitem__(self, idx): '''Get datapoint with index''' data = Data() smiles, y = self.smiles_list[idx] graph = self.smiles2graph(smiles) data.__num_nodes__ = int(graph['num_nodes']) data.edge_index = torch.from_numpy(graph['edge_index']).to(torch.int64) data.edge_attr = torch.from_numpy(graph['edge_feat']).to(torch.int64) data.x = torch.from_numpy(graph['node_feat']).to(torch.int64) return data
def read_graph_pyg(raw_dir, add_inverse_edge=False, additional_node_files=[], additional_edge_files=[], binary=False): if binary: # npz graph_list = read_binary_graph_raw(raw_dir, add_inverse_edge) else: # csv graph_list = read_csv_graph_raw( raw_dir, add_inverse_edge, additional_node_files=additional_node_files, additional_edge_files=additional_edge_files) pyg_graph_list = [] print('Converting graphs into PyG objects...') for graph in tqdm(graph_list): g = Data() g.__num_nodes__ = graph['num_nodes'] g.edge_index = torch.from_numpy(graph['edge_index']) del graph['num_nodes'] del graph['edge_index'] if graph['edge_feat'] is not None: g.edge_attr = torch.from_numpy(graph['edge_feat']) del graph['edge_feat'] if graph['node_feat'] is not None: g.x = torch.from_numpy(graph['node_feat']) del graph['node_feat'] for key in additional_node_files: g[key] = torch.from_numpy(graph[key]) del graph[key] for key in additional_edge_files: g[key] = torch.from_numpy(graph[key]) del graph[key] pyg_graph_list.append(g) return pyg_graph_list
def process(self): data_df = pd.read_csv(osp.join(self.raw_dir, 'data.csv.gz')) smiles_list = data_df['smiles'] homolumogap_list = data_df['homolumogap'] print('Converting SMILES strings into graphs...') data_list = [] for i in tqdm(range(len(smiles_list))): data = Data() smiles = smiles_list[i] homolumogap = homolumogap_list[i] graph = self.smiles2graph(smiles) assert (len(graph['edge_feat']) == graph['edge_index'].shape[1]) assert (len(graph['node_feat']) == graph['num_nodes']) data.__num_nodes__ = int(graph['num_nodes']) data.edge_index = torch.from_numpy(graph['edge_index']).to( torch.int64) data.edge_attr = torch.from_numpy(graph['edge_feat']).to( torch.int64) data.x = torch.from_numpy(graph['node_feat']).to(torch.int64) data.y = torch.Tensor([homolumogap]) data_list.append(data) # double-check prediction target split_dict = self.get_idx_split() assert (all( [not torch.isnan(data_list[i].y)[0] for i in split_dict['train']])) assert (all( [not torch.isnan(data_list[i].y)[0] for i in split_dict['valid']])) assert (all( [torch.isnan(data_list[i].y)[0] for i in split_dict['test-dev']])) assert (all([ torch.isnan(data_list[i].y)[0] for i in split_dict['test-challenge'] ])) if self.pre_transform is not None: data_list = [self.pre_transform(data) for data in data_list] data, slices = self.collate(data_list) print('Saving...') torch.save((data, slices), self.processed_paths[0])
def read_csv_graph_pyg(raw_dir, add_inverse_edge=True, additional_node_files=[], additional_edge_files=[]): graph_list = read_csv_graph_raw( raw_dir, add_inverse_edge, additional_node_files=additional_node_files, additional_edge_files=additional_edge_files) pyg_graph_list = [] print('Converting graphs into PyG objects...') for graph in tqdm(graph_list): g = Data() g.__num_nodes__ = graph["num_nodes"] g.edge_index = torch.from_numpy(graph["edge_index"]) if graph["edge_feat"] is not None: g.edge_attr = torch.from_numpy(graph["edge_feat"]) if graph["node_feat"] is not None: g.x = torch.from_numpy(graph["node_feat"]) for key in additional_node_files: if 'node_' not in key: feat_name = 'node_' + key else: feat_name = key g[feat_name] = torch.from_numpy(graph[feat_name]) for key in additional_edge_files: if 'edge_' not in key: feat_name = 'edge_' + key else: feat_name = key g[feat_name] = torch.from_numpy(graph[feat_name]) add_order_info_01(g) # DAGNN # length of longest path # layer ids start with 0 so max, gives actual path length and -1 is not necessary g.len_longest_path = float(torch.max(g._bi_layer_idx0).item()) pyg_graph_list.append(g) return pyg_graph_list
def read_csv_graph_pyg(raw_dir, add_inverse_edge=False): graph_list = read_csv_graph_raw(raw_dir, add_inverse_edge) pyg_graph_list = [] for graph in graph_list: g = Data() g.__num_nodes__ = graph["num_nodes"] g.edge_index = torch.tensor(graph["edge_index"]) if graph["edge_feat"] is not None: g.edge_attr = torch.tensor(graph["edge_feat"]) if graph["node_feat"] is not None: g.x = torch.tensor(graph["node_feat"]) pyg_graph_list.append(g) return pyg_graph_list
def read_csv_heterograph_pyg(raw_dir, add_inverse_edge = False, additional_node_files = [], additional_edge_files = []): graph_list = read_csv_heterograph_raw(raw_dir, add_inverse_edge, additional_node_files = additional_node_files, additional_edge_files = additional_edge_files) pyg_graph_list = [] print('Converting graphs into PyG objects...') for graph in tqdm(graph_list): g = Data() g.__num_nodes__ = graph["num_nodes_dict"] g.num_nodes_dict = graph["num_nodes_dict"] # add edge connectivity g.edge_index_dict = {} for triplet, edge_index in graph["edge_index_dict"].items(): g.edge_index_dict[triplet] = torch.from_numpy(edge_index) if graph["edge_feat_dict"] is not None: g.edge_attr_dict = {} for triplet in graph["edge_feat_dict"].keys(): g.edge_attr_dict[triplet] = torch.from_numpy(graph["edge_feat_dict"][triplet]) if graph["node_feat_dict"] is not None: g.x_dict = {} for nodetype in graph["node_feat_dict"].keys(): g.x_dict[nodetype] = torch.from_numpy(graph["node_feat_dict"][nodetype]) for key in additional_edge_files: g[key + '_dict'] = {} for triplet in graph[key].keys(): g[key + '_dict'][triplet] = torch.from_numpy(graph[key][triplet]) for key in additional_node_files: g[key + '_dict'] = {} for nodetype in graph[key].keys(): g[key + '_dict'][nodetype] = torch.from_numpy(graph[key][nodetype]) pyg_graph_list.append(g) return pyg_graph_list
def read_csv_graph_pyg(raw_dir, add_inverse_edge=True, additional_node_files=[], additional_edge_files=[]): graph_list = read_csv_graph_raw( raw_dir, add_inverse_edge, additional_node_files=additional_node_files, additional_edge_files=additional_edge_files) pyg_graph_list = [] print('Converting graphs into PyG objects...') for graph in tqdm(graph_list): g = Data() g.__num_nodes__ = graph["num_nodes"] g.edge_index = torch.from_numpy(graph["edge_index"]) if graph["edge_feat"] is not None: g.edge_attr = torch.from_numpy(graph["edge_feat"]) if graph["node_feat"] is not None: g.x = torch.from_numpy(graph["node_feat"]) for key in additional_node_files: if 'node_' not in key: feat_name = 'node_' + key else: feat_name = key g[feat_name] = torch.from_numpy(graph[feat_name]) for key in additional_edge_files: if 'edge_' not in key: feat_name = 'edge_' + key else: feat_name = key g[feat_name] = torch.from_numpy(graph[feat_name]) pyg_graph_list.append(g) return pyg_graph_list
def read_csv_graph_pyg(raw_dir, add_inverse_edge=False): graph_list = read_csv_graph_raw(raw_dir, add_inverse_edge) pyg_graph_list = [] print('Converting graphs into PyG objects...') for graph in tqdm(graph_list): g = Data() g.__num_nodes__ = graph["num_nodes"] g.edge_index = torch.tensor(graph["edge_index"]) if graph["edge_feat"] is not None: g.edge_attr = torch.tensor(graph["edge_feat"]) if graph["node_feat"] is not None: g.x = torch.tensor(graph["node_feat"]) pyg_graph_list.append(g) return pyg_graph_list
def graphs_to_pyg(graph_list): pyg_graph_list = [] print('Converting graphs into PyG objects...') for graph in tqdm(graph_list): g = Data() g.__num_nodes__ = graph["num_nodes"] g.edge_index = torch.tensor(graph["edge_index"]) if graph["edge_feat"] is not None: g.edge_attr = torch.DoubleTensor(graph["edge_feat"]) if graph["node_feat"] is not None: g.x = torch.DoubleTensor(graph["node_feat"]) if graph["target"] is not None: tar = int(graph["target"]) tar = 0 if tar == -1 else tar g.y = torch.LongTensor([tar]) pyg_graph_list.append(g) return pyg_graph_list