def process(self): add_inverse_edge = self.meta_info['add_inverse_edge'] == 'True' if self.meta_info['additional node files'] == 'None': additional_node_files = [] else: additional_node_files = self.meta_info[ 'additional node files'].split(',') if self.meta_info['additional edge files'] == 'None': additional_edge_files = [] else: additional_edge_files = self.meta_info[ 'additional edge files'].split(',') if self.is_hetero: data = read_heterograph_pyg( self.raw_dir, add_inverse_edge=add_inverse_edge, additional_node_files=additional_node_files, additional_edge_files=additional_edge_files, binary=self.binary)[0] else: data = read_graph_pyg(self.raw_dir, add_inverse_edge=add_inverse_edge, additional_node_files=additional_node_files, additional_edge_files=additional_edge_files, binary=self.binary)[0] data = data if self.pre_transform is None else self.pre_transform(data) print('Saving...') torch.save(self.collate([data]), self.processed_paths[0])
def process(self): ### read pyg graph list add_inverse_edge = self.meta_info['add_inverse_edge'] == 'True' if self.meta_info['additional node files'] == 'None': additional_node_files = [] else: additional_node_files = self.meta_info['additional node files'].split(',') if self.meta_info['additional edge files'] == 'None': additional_edge_files = [] else: additional_edge_files = self.meta_info['additional edge files'].split(',') data_list = read_graph_pyg(self.raw_dir, add_inverse_edge = add_inverse_edge, additional_node_files = additional_node_files, additional_edge_files = additional_edge_files, binary=self.binary) if self.task_type == 'subtoken prediction': graph_label_notparsed = pd.read_csv(osp.join(self.raw_dir, 'graph-label.csv.gz'), compression='gzip', header = None).values graph_label = [str(graph_label_notparsed[i][0]).split(' ') for i in range(len(graph_label_notparsed))] for i, g in enumerate(data_list): g.y = graph_label[i] else: if self.binary: graph_label = np.load(osp.join(self.raw_dir, 'graph-label.npz'))['graph_label'] else: graph_label = pd.read_csv(osp.join(self.raw_dir, 'graph-label.csv.gz'), compression='gzip', header = None).values has_nan = np.isnan(graph_label).any() for i, g in enumerate(data_list): if 'classification' in self.task_type: if has_nan: g.y = torch.from_numpy(graph_label[i]).view(1,-1).to(torch.float32) else: g.y = torch.from_numpy(graph_label[i]).view(1,-1).to(torch.long) else: g.y = torch.from_numpy(graph_label[i]).view(1,-1).to(torch.float32) if self.pre_transform is not None: data_list = [self.pre_transform(data) for data in data_list] data, slices = self.collate(data_list) print('Saving...') torch.save((data, slices), self.processed_paths[0])
def process(self): add_inverse_edge = self.meta_info['add_inverse_edge'] == 'True' if self.meta_info['additional node files'] == 'None': additional_node_files = [] else: additional_node_files = self.meta_info[ 'additional node files'].split(',') if self.meta_info['additional edge files'] == 'None': additional_edge_files = [] else: additional_edge_files = self.meta_info[ 'additional edge files'].split(',') if self.is_hetero: data = read_heterograph_pyg( self.raw_dir, add_inverse_edge=add_inverse_edge, additional_node_files=additional_node_files, additional_edge_files=additional_edge_files, binary=self.binary)[0] if self.binary: tmp = np.load(osp.join(self.raw_dir, 'node-label.npz')) node_label_dict = {} for key in list(tmp.keys()): node_label_dict[key] = tmp[key] del tmp else: node_label_dict = read_node_label_hetero(self.raw_dir) data.y_dict = {} if 'classification' in self.task_type: for nodetype, node_label in node_label_dict.items(): # detect if there is any nan if np.isnan(node_label).any(): data.y_dict[nodetype] = torch.from_numpy( node_label).to(torch.float32) else: data.y_dict[nodetype] = torch.from_numpy( node_label).to(torch.long) else: for nodetype, node_label in node_label_dict.items(): data.y_dict[nodetype] = torch.from_numpy(node_label).to( torch.float32) else: data = read_graph_pyg(self.raw_dir, add_inverse_edge=add_inverse_edge, additional_node_files=additional_node_files, additional_edge_files=additional_edge_files, binary=self.binary)[0] ### adding prediction target if self.binary: node_label = np.load(osp.join(self.raw_dir, 'node-label.npz'))['node_label'] else: node_label = pd.read_csv(osp.join(self.raw_dir, 'node-label.csv.gz'), compression='gzip', header=None).values if 'classification' in self.task_type: # detect if there is any nan if np.isnan(node_label).any(): data.y = torch.from_numpy(node_label).to(torch.float32) else: data.y = torch.from_numpy(node_label).to(torch.long) else: data.y = torch.from_numpy(node_label).to(torch.float32) data = data if self.pre_transform is None else self.pre_transform(data) print('Saving...') torch.save(self.collate([data]), self.processed_paths[0])
def process(self): from ogb.io.read_graph_pyg import read_graph_pyg ### read pyg graph list add_inverse_edge = self.meta_info['add_inverse_edge'] == 'True' if self.meta_info['additional node files'] == 'None': additional_node_files = [] else: additional_node_files = self.meta_info[ 'additional node files'].split(',') if self.meta_info['additional edge files'] == 'None': additional_edge_files = [] else: additional_edge_files = self.meta_info[ 'additional edge files'].split(',') data_list = read_graph_pyg(self.raw_dir, add_inverse_edge=add_inverse_edge, additional_node_files=additional_node_files, additional_edge_files=additional_edge_files, binary=self.binary) if self.task_type == 'subtoken prediction': graph_label_notparsed = pd.read_csv(os.path.join( self.raw_dir, 'graph-label.csv.gz'), compression='gzip', header=None).values graph_label = [ str(graph_label_notparsed[i][0]).split(' ') for i in range(len(graph_label_notparsed)) ] for i, g in enumerate(data_list): g.y = graph_label[i] else: if self.binary: graph_label = np.load( os.path.join(self.raw_dir, 'graph-label.npz'))['graph_label'] else: graph_label = pd.read_csv(os.path.join(self.raw_dir, 'graph-label.csv.gz'), compression='gzip', header=None).values has_nan = np.isnan(graph_label).any() for i, g in enumerate(data_list): if 'classification' in self.task_type: if has_nan: g.y = torch.from_numpy(graph_label[i]).view(1, -1).to( torch.float32) else: g.y = torch.from_numpy(graph_label[i]).view(1, -1).to( torch.long) else: g.y = torch.from_numpy(graph_label[i]).view(1, -1).to( torch.float32) if self.pre_transform is not None: dataset = self.collate(data_list) data_list = _process_dataset_pretransform_torch( self.pre_transform, TensorSliceDataset(*dataset)) dataset = self.collate(data_list) np.savez(self.processed_paths[0], **_dataset_to_dict(dataset))