all_features_pad = [ pad(feat, (max_nodes, feat.shape[1])) for feat in all_link_features ] def create_mask(feat, max_nodes): return np.array( [True if i < feat.shape[0] else False for i in range(max_nodes)]) all_masks = [create_mask(feat, max_nodes) for feat in all_link_features] #num_channels = all_features_pad[0].shape[1] #step 3: Create dataset object data = [ Data(adj=torch.from_numpy(adj).float(), mask=torch.from_numpy(mask), x=torch.from_numpy(x[:, :num_channels]).float(), y=torch.from_numpy(np.array([y])).float(), std=torch.from_numpy(np.array([std_dict[std]])).float()) for adj, mask, x, y, std in zip(all_link_adj_symmetric_pad, all_masks, all_features_pad, all_rewards, std_dict) ] import random random.shuffle(data) dataset = dataset.shuffle() n = (len(dataset) + 9) // 10 test_dataset = data[:n] val_dataset = data[n:2 * n] train_dataset = data[2 * n:] with open('test_loader',
def load_one_graph(self, fname, mol): """Loads one graph Args: fname (str): hdf5 file name mol (str): name of the molecule Returns: Data object or None: torch_geometric Data object containing the node features, the internal and external edge features, the target and the xyz coordinates. Return None if features cannot be loaded. """ f5 = h5py.File(fname, 'r') try: grp = f5[mol] except: f5.close() return None # nodes data = () try: for feat in self.node_feature: vals = grp['node_data/'+feat][()] if vals.ndim == 1: vals = vals.reshape(-1, 1) data += (vals,) x = torch.tensor(np.hstack(data), dtype=torch.float) except: print('node attributes not found in the file', self.database[0]) f5.close() return None try: # index ! we have to have all the edges i.e : (i,j) and (j,i) ind = grp['edge_index'][()] ind = np.vstack((ind, np.flip(ind, 1))).T edge_index = torch.tensor( ind, dtype=torch.long).contiguous() # edge feature (same issue than above) data = () if self.edge_feature is not None: for feat in self.edge_feature: vals = grp['edge_data/'+feat][()] if vals.ndim == 1: vals = vals.reshape(-1, 1) data += (vals,) data = np.hstack(data) data = np.vstack((data, data)) data = self.edge_feature_transform(data) edge_attr = torch.tensor( data, dtype=torch.float).contiguous() else: edge_attr = None # internal edges ind = grp['internal_edge_index'][()] ind = np.vstack((ind, np.flip(ind, 1))).T internal_edge_index = torch.tensor( ind, dtype=torch.long).contiguous() # internal edge feature data = () if self.edge_feature is not None: for feat in self.edge_feature: vals = grp['internal_edge_data/'+feat][()] if vals.ndim == 1: vals = vals.reshape(-1, 1) data += (vals,) data = np.hstack(data) data = np.vstack((data, data)) data = self.edge_feature_transform(data) internal_edge_attr = torch.tensor( data, dtype=torch.float).contiguous() else: internal_edge_attr = None except: print('edge features not found in the file', self.database[0]) f5.close() return None # target if self.target is None: y = None else: if self.target in grp['score/'].keys(): if grp['score/'+self.target][()] is not None: y = torch.tensor( [grp['score/'+self.target][()]], dtype=torch.float).contiguous() else: y = None else: y = None # pos pos = torch.tensor(grp['node_data/pos/'] [()], dtype=torch.float).contiguous() # load data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y, pos=pos) data.internal_edge_index = internal_edge_index data.internal_edge_attr = internal_edge_attr # mol name data.mol = mol # cluster if 'clustering' in grp.keys(): if self.clustering_method in grp['clustering'].keys(): if ('depth_0' in grp['clustering/{}'.format(self.clustering_method)].keys() and 'depth_1' in grp['clustering/{}'.format( self.clustering_method)].keys() ): data.cluster0 = torch.tensor( grp['clustering/' + self.clustering_method + '/depth_0'][()], dtype=torch.long) data.cluster1 = torch.tensor( grp['clustering/' + self.clustering_method + '/depth_1'][()], dtype=torch.long) else: print('WARNING: no cluster detected') else: print('WARNING: no cluster detected') else: print('WARNING: no cluster detected') f5.close() return data
test_dataset = pickle.load(test_file) train_dataset = pickle.load(train_file) val_dataset = pickle.load(val_file) else: os.makedirs(dataset_dir, exist_ok=True) raw_dataset_path = os.path.join(current_dir, 'data', args.dataset_name + '.csv') all_features, all_link_adj, all_masks, all_rewards \ = load_partial_design_data(raw_dataset_path, os.path.join(project_dir, 'data/designs/grammar_jan21.dot')) # Create dataset object data = [ Data(adj=torch.from_numpy(adj).float(), mask=torch.from_numpy(mask), x=torch.from_numpy(x).float(), y=torch.from_numpy(np.array([y])).float()) for adj, mask, x, y in zip(all_link_adj, all_masks, all_features, all_rewards) ] random.shuffle(data) n_val = (len(data) + 9) // 10 n_test = (len(data) + 9) // 10 train_dataset = data[:-n_test - n_val] val_dataset = data[-n_test - n_val:-n_test] test_dataset = data[-n_test:] with open(testset_path, 'wb') as test_file, open( valset_path, 'wb') as val_file, open(trainset_path, 'wb') as train_file: pickle.dump(test_dataset, test_file)
def __getitem__(self, idx): pos = torch.from_numpy(np.random.normal(0, 1, (self.num_points, 3))) y = torch.from_numpy(np.random.normal(0, 1, (self.num_points, self.output_nc))) x = torch.from_numpy(np.random.normal(0, 1, (self.num_points, self.input_nc))) return Data(x=x, pos=pos, y=y)
def test_num_batches(self): data = Data(pos=torch.randn((2, 3, 3))) self.assertEqual(MockBaseDataset.get_num_samples(data, ConvolutionFormat.DENSE.value), 2) data = Data(pos=torch.randn((3, 3)), batch=torch.tensor([0, 1, 2])) self.assertEqual(MockBaseDataset.get_num_samples(data, ConvolutionFormat.PARTIAL_DENSE.value), 3)
def to_homogeneous(self, node_attrs: Optional[List[str]] = None, edge_attrs: Optional[List[str]] = None, add_node_type: bool = True, add_edge_type: bool = True) -> Data: """Converts a :class:`~torch_geometric.data.HeteroData` object to a homogeneous :class:`~torch_geometric.data.Data` object. By default, all features with same feature dimensionality across different types will be merged into a single representation, unless otherwise specified via the :obj:`node_attrs` and :obj:`edge_attrs` arguments. Furthermore, attributes named :obj:`node_type` and :obj:`edge_type` will be added to the returned :class:`~torch_geometric.data.Data` object, denoting node-level and edge-level vectors holding the node and edge type as integers, respectively. Args: node_attrs (List[str], optional): The node features to combine across all node types. These node features need to be of the same feature dimensionality. If set to :obj:`None`, will automatically determine which node features to combine. (default: :obj:`None`) edge_attrs (List[str], optional): The edge features to combine across all edge types. These edge features need to be of the same feature dimensionality. If set to :obj:`None`, will automatically determine which edge features to combine. (default: :obj:`None`) add_node_type (bool, optional): If set to :obj:`False`, will not add the node-level vector :obj:`node_type` to the returned :class:`~torch_geometric.data.Data` object. (default: :obj:`True`) add_edge_type (bool, optional): If set to :obj:`False`, will not add the edge-level vector :obj:`edge_type` to the returned :class:`~torch_geometric.data.Data` object. (default: :obj:`True`) """ def _consistent_size(stores: List[BaseStorage]) -> List[str]: sizes_dict = defaultdict(list) for store in stores: for key, value in store.items(): if key in ['edge_index', 'adj_t']: continue if isinstance(value, Tensor): dim = self.__cat_dim__(key, value, store) size = value.size()[:dim] + value.size()[dim + 1:] sizes_dict[key].append(tuple(size)) return [ k for k, sizes in sizes_dict.items() if len(sizes) == len(stores) and len(set(sizes)) == 1 ] edge_index, node_slices, edge_slices = to_homogeneous_edge_index(self) device = edge_index.device if edge_index is not None else None data = Data(**self._global_store.to_dict()) if edge_index is not None: data.edge_index = edge_index data._node_type_names = list(node_slices.keys()) data._edge_type_names = list(edge_slices.keys()) # Combine node attributes into a single tensor: if node_attrs is None: node_attrs = _consistent_size(self.node_stores) for key in node_attrs: values = [store[key] for store in self.node_stores] dim = self.__cat_dim__(key, values[0], self.node_stores[0]) value = torch.cat(values, dim) if len(values) > 1 else values[0] data[key] = value if not data.can_infer_num_nodes: data.num_nodes = list(node_slices.values())[-1][1] # Combine edge attributes into a single tensor: if edge_attrs is None: edge_attrs = _consistent_size(self.edge_stores) for key in edge_attrs: values = [store[key] for store in self.edge_stores] dim = self.__cat_dim__(key, values[0], self.edge_stores[0]) value = torch.cat(values, dim) if len(values) > 1 else values[0] data[key] = value if add_node_type: sizes = [offset[1] - offset[0] for offset in node_slices.values()] sizes = torch.tensor(sizes, dtype=torch.long, device=device) node_type = torch.arange(len(sizes), device=device) data.node_type = node_type.repeat_interleave(sizes) if add_edge_type and edge_index is not None: sizes = [offset[1] - offset[0] for offset in edge_slices.values()] sizes = torch.tensor(sizes, dtype=torch.long, device=device) edge_type = torch.arange(len(sizes), device=device) data.edge_type = edge_type.repeat_interleave(sizes) return data