def generate_graph_matrices(graphs, auto_pad=False): A, X, E = nx_to_numpy(graphs, nf_keys=['atomic_num'], ef_keys=['bond_type'], auto_pad=auto_pad, self_loops=True) uniq_X = np.unique([v for x in X for v in np.unique(x)]) X = [label_to_one_hot(x, uniq_X) for x in X] uniq_E = np.unique([v for x in E for v in np.unique(x)]) E = [label_to_one_hot(x, uniq_E) for x in E] return A, X, E
def read(self): f = np.load(osp.join(self.path, "dblp.npz")) x = sp.csr_matrix( (f["attr_data"], f["attr_indices"], f["attr_indptr"]), f["attr_shape"] ).toarray() x[x > 0] = 1 if self.normalize_x: print("Pre-processing node features") x = _preprocess_features(x) a = sp.csr_matrix( (f["adj_data"], f["adj_indices"], f["adj_indptr"]), f["adj_shape"] ) # .tocoo() y = f["labels"] y = label_to_one_hot(y, np.unique(y)) return [ Graph( x=x.astype(self.dtype), a=a.astype(self.dtype), y=y.astype(self.dtype), ) ]
def __call__(self, graph): if self.labels is not None: graph.y = label_to_one_hot(graph.y, self.labels) else: graph.y = one_hot(graph.y, self.depth) return graph
def atom_to_feature(atom): atomic_num = label_to_one_hot(atom["atomic_num"], ATOM_TYPES) coords = atom["coords"] charge = atom["charge"] iso = atom["iso"] return np.concatenate((atomic_num, coords, [charge, iso]), -1)
def atom_to_feature(atom): atomic_num = label_to_one_hot(atom['atomic_num'], ATOM_TYPES) coords = atom['coords'] charge = atom['charge'] iso = atom['iso'] return np.concatenate((atomic_num, coords, [charge, iso]), -1)
def mol_to_adj(mol): row, col, edge_attr = [], [], [] for bond in mol["bonds"]: start, end = bond["start_atom"], bond["end_atom"] row += [start, end] col += [end, start] edge_attr += [bond["type"]] * 2 a = sp.csr_matrix((np.ones_like(row), (row, col))) edge_attr = np.array([label_to_one_hot(e, BOND_TYPES) for e in edge_attr]) return a, edge_attr
def generate_graph_matrices(graphs, auto_pad=False): """ Generate A, X, E matrix from smiles :param graphs: list of networkx graphs :param auto_pad: bool. whether to pad the node matrix to have the same length :return A, X, E """ A, X, E = nx_to_numpy(graphs, nf_keys=['atomic_num'], ef_keys=['bond_type'], auto_pad=auto_pad, self_loops=True) uniq_X = np.unique([v for x in X for v in np.unique(x)]) X = [label_to_one_hot(x, uniq_X) for x in X] uniq_E = np.unique([v for x in E for v in np.unique(x)]) E = [label_to_one_hot(x, uniq_E) for x in E] return A, X, E
def mol_to_adj(mol): row, col, edge_features = [], [], [] for bond in mol["bonds"]: start, end = bond["start_atom"], bond["end_atom"] row += [start, end] col += [end, start] edge_features += [bond["type"]] * 2 a, e = sparse.edge_index_to_matrix( edge_index=np.array((row, col)).T, edge_weight=np.ones_like(row), edge_features=label_to_one_hot(edge_features, BOND_TYPES), ) return a, e
def read(self): f = np.load(osp.join(self.path, "adj_full.npz")) a = sp.csr_matrix((f["data"], f["indices"], f["indptr"]), f["shape"]) x = np.load(osp.join(self.path, "feats.npy")) if self.normalize_x: print("Pre-processing node features") x = _preprocess_features(x) y = np.zeros(x.shape[0]) with open(osp.join(self.path, "class_map.json")) as f: class_map = json.load(f) for key, item in class_map.items(): y[int(key)] = item y = label_to_one_hot(y, np.unique(y)) with open(osp.join(self.path, "role.json")) as f: role = json.load(f) self.train_mask = np.zeros(x.shape[0], dtype=bool) self.train_mask[np.array(role["tr"])] = 1 self.val_mask = np.zeros(x.shape[0], dtype=bool) self.val_mask[np.array(role["va"])] = 1 self.test_mask = np.zeros(x.shape[0], dtype=bool) self.test_mask[np.array(role["te"])] = 1 return [ Graph( x=x.astype(self.dtype), a=a.astype(self.dtype), y=y.astype(self.dtype), ) ]
# LOAD DATA ################################################################################ A, X, E, y = qm9.load_data(return_type='numpy', nf_keys='atomic_num', ef_keys='type', self_loops=True, amount=1000) # Set to None to train on whole dataset y = y[['cv']].values # Heat capacity at 298.15K # Preprocessing X_uniq = np.unique(X) X_uniq = X_uniq[X_uniq != 0] E_uniq = np.unique(E) E_uniq = E_uniq[E_uniq != 0] X = label_to_one_hot(X, X_uniq) E = label_to_one_hot(E, E_uniq) # Parameters N = X.shape[-2] # Number of nodes in the graphs F = X[0].shape[-1] # Dimension of node features S = E[0].shape[-1] # Dimension of edge features n_out = y.shape[-1] # Dimension of the target # Train/test split A_train, A_test, \ X_train, X_test, \ E_train, E_test, \ y_train, y_test = train_test_split(A, X, E, y, test_size=0.1, random_state=0) ################################################################################
from spektral.layers import GraphConv, GlobalAvgPool from spektral.utils import Batch, batch_iterator from spektral.utils import label_to_one_hot # Load data A, X, _, y = qm9.load_data(return_type='numpy', nf_keys='atomic_num', ef_keys='type', self_loops=True, auto_pad=False, amount=1000) y = y[['cv']].values # Heat capacity at 298.15K # Preprocessing uniq_X = np.unique([v for x in X for v in np.unique(x)]) X = [label_to_one_hot(x, uniq_X) for x in X] y = StandardScaler().fit_transform(y).reshape(-1, y.shape[-1]) # Parameters F = X[0].shape[-1] # Dimension of node features n_out = y.shape[-1] # Dimension of the target learning_rate = 1e-3 # Learning rate epochs = 25 # Number of training epochs batch_size = 64 # Batch size # Train/test split A_train, A_test, \ X_train, X_test, \ y_train, y_test = train_test_split(A, X, y, test_size=0.1) # Model definition
def generate_data(classes=0, n_samples_in_class=1000, n_nodes=7, support_low=0., support_high=10., drift_amount=1.0, one_hot_labels=True, support=None, seed=None, return_type='numpy'): """ Generates a dataset of Delaunay triangulations as described by [Zambon et al. (2017)](https://arxiv.org/abs/1706.06941). Node attributes are the 2D coordinates of the points. Two nodes are connected if they share an edge in the Delaunay triangulation. Labels represent the class of the graph (0 to 20, each class index i represent the "difficulty" of the classification problem 0 v. i. In other words, the higher the class index, the more similar the class is to class 0). :param classes: indices of the classes to load (integer, or list of integers between 0 and 20); :param n_samples_in_class: number of generated samples per class; :param n_nodes: number of nodes in a graph; :param support_low: lower bound of the uniform distribution from which the support is generated; :param support_high: upper bound of the uniform distribution from which the support is generated; :param drift_amount: coefficient to control the amount of change between classes; :param one_hot_labels: one-hot encode dataset labels; :param support: custom support to use instead of generating it randomly; :param seed: random numpy seed; :param return_type: `'numpy'` or `'networkx'`, data format to return; :return: - if `return_type='numpy'`, the adjacency matrix, node features, and an array containing labels; - if `return_type='networkx'`, a list of graphs in Networkx format, and an array containing labels; """ if return_type not in RETURN_TYPES: raise ValueError('Possible return_type: {}'.format(RETURN_TYPES)) if isinstance(classes, int): classes = [classes] if max(classes) > 20 or min(classes) < 0: raise ValueError('Class indices must be between 0 and 20') r_classes = list(reversed(classes)) if r_classes[-1] == 0: r_classes.insert(0, r_classes.pop(-1)) # Support points np.random.seed(seed) if support is None: support = np.random.uniform(support_low, support_high, (1, n_nodes, 2)) else: try: assert support.shape == (1, n_nodes, 2) except AssertionError: print('The given support doesn\'t have shape (1, n_nodes, 2) as' 'expected. Attempting to reshape.') support = support.reshape(1, n_nodes, 2) # Compute node features node_features = [] # Other node features for idx, i in enumerate(r_classes): if i == 0: concept_0 = np.repeat(support, n_samples_in_class, 0) noise_0 = np.random.normal(0, 1, (n_samples_in_class, n_nodes, 2)) class_0 = concept_0 + noise_0 node_features.append(class_0) else: radius = 10. * ((2./3.) ** (drift_amount * (i - 1))) phase = np.random.uniform(0, 2 * np.pi, (n_nodes, 1)) perturb_i_x = radius * np.cos(phase) perturb_i_y = radius * np.sin(phase) perturb_i = np.concatenate((perturb_i_x, perturb_i_y), axis=-1) support_i = support + perturb_i concept_i = np.repeat(support_i, n_samples_in_class, 0) noise_i = np.random.normal(0, 1, (n_samples_in_class, n_nodes, 2)) class_i = concept_i + noise_i node_features.append(class_i) node_features = np.array(node_features).reshape((-1, n_nodes, 2)) # Compute adjacency matrices adjacency = [] for nf in node_features: adj = _compute_adj(nf) adjacency.append(adj) adjacency = np.array(adjacency) # Compute labels labels = np.repeat(classes, n_samples_in_class) if one_hot_labels: labels = label_to_one_hot(labels, labels=classes) if return_type is 'numpy': return adjacency, node_features, labels elif return_type is 'networkx': graphs = numpy_to_nx(adjacency, node_features=node_features, nf_name='coords') return graphs, labels else: raise NotImplementedError
from sklearn.preprocessing import StandardScaler from spektral.datasets import qm9 from spektral.layers import EdgeConditionedConv, GlobalAttentionPool from spektral.utils import init_logging, label_to_one_hot # Load data adj, nf, ef, y = qm9.load_data(return_type='numpy', nf_keys='atomic_num', ef_keys='type', self_loops=True) y = y[['cv']].values # Heat capacity at 298.15K # Preprocessing uniq_nf = np.unique(nf) nf = label_to_one_hot(nf, uniq_nf) uniq_ef = np.unique(ef) ef = label_to_one_hot(ef, uniq_ef) y = StandardScaler().fit_transform(y).reshape(-1, y.shape[-1]) # Parameters N = nf.shape[-2] # Number of nodes in the graphs F = nf.shape[-1] # Node features dimensionality S = ef.shape[-1] # Edge features dimensionality n_out = y.shape[-1] # Dimensionality of the target learning_rate = 1e-3 # Learning rate for SGD epochs = 25 # Number of training epochs batch_size = 64 # Batch size es_patience = 5 # Patience fot early stopping log_dir = init_logging() # Create log directory and file
A, X, E, y = qm9.load_data( return_type='numpy', nf_keys='atomic_num', ef_keys='type', self_loops=False, auto_pad=False, amount=1000) # Set to None to train on whole dataset y = y[['cv']].values # Heat capacity at 298.15K # Preprocessing X_uniq = np.unique([v for x in X for v in np.unique(x)]) E_uniq = np.unique([v for e in E for v in np.unique(e)]) X_uniq = X_uniq[X_uniq != 0] E_uniq = E_uniq[E_uniq != 0] X = [label_to_one_hot(x, labels=X_uniq) for x in X] E = [label_to_one_hot(e, labels=E_uniq) for e in E] # Parameters F = X[0].shape[-1] # Dimension of node features S = E[0].shape[-1] # Dimension of edge features n_out = y.shape[-1] # Dimension of the target # Train/test split A_train, A_test, \ X_train, X_test, \ E_train, E_test, \ y_train, y_test = train_test_split(A, X, E, y, test_size=0.1, random_state=0) ################################################################################ # BUILD MODEL
np.random.seed(0) SW_KEY = 'dense_1_sample_weights:0' # Keras automatically creates a placeholder for sample weights, which must be fed # Load data A, X, E, y = qm9.load_data( return_type='numpy', nf_keys='atomic_num', ef_keys='type', self_loops=True, auto_pad=False, amount=1000) # Set to None to train on whole dataset y = y[['cv']].values # Heat capacity at 298.15K # Preprocessing uniq_X = np.unique([v for x in X for v in np.unique(x)]) X = [label_to_one_hot(x, uniq_X) for x in X] uniq_E = np.unique([v for e in E for v in np.unique(e)]) uniq_E = uniq_E[uniq_E != 0] E = [label_to_one_hot(e, uniq_E) for e in E] # Parameters F = X[0].shape[-1] # Dimension of node features S = E[0].shape[-1] # Dimension of edge features n_out = y.shape[-1] # Dimension of the target learning_rate = 1e-3 # Learning rate epochs = 25 # Number of training epochs batch_size = 32 # Batch size # Train/test split A_train, A_test, \ X_train, X_test, \
from spektral.datasets import qm9 from spektral.layers import EdgeConditionedConv, GlobalAvgPool from spektral.utils import label_to_one_hot # Load data A, X, E, y = qm9.load_data(return_type='numpy', nf_keys='atomic_num', ef_keys='type', self_loops=True, amount=1000) # Set to None to train on whole dataset y = y[['cv']].values # Heat capacity at 298.15K # Preprocessing uniq_X = np.unique(X) uniq_X = uniq_X[uniq_X != 0] X = label_to_one_hot(X, uniq_X) uniq_E = np.unique(E) uniq_E = uniq_E[uniq_E != 0] E = label_to_one_hot(E, uniq_E) # Parameters N = X.shape[-2] # Number of nodes in the graphs F = X.shape[-1] # Node features dimensionality S = E.shape[-1] # Edge features dimensionality n_out = y.shape[-1] # Dimensionality of the target learning_rate = 1e-3 # Learning rate for SGD epochs = 25 # Number of training epochs batch_size = 32 # Batch size es_patience = 5 # Patience fot early stopping # Train/test split