def transform( self, gr: list, ): """transpose: by default, the grakel produces output in shape of len(y) * len(x2). Use transpose to reshape that to a more conventional shape..""" if self.undirected: gr = transform_to_undirected(gr) if self.type == 'edge': if not all([g.graph_type == 'edge_attr' for g in gr]): raise ValueError( "One or more graphs passed are not edge-attributed graphs. You need all graphs to be" "in edge format to use 'edge' type Weisfiler-Lehman kernel." ) gr_ = graph_from_networkx(gr, self.node_label, self.edge_label) else: gr_ = graph_from_networkx( gr, self.node_label, ) K = self.kern.transform(gr_) if self.return_tensor and not isinstance(K, torch.Tensor): K = torch.tensor(K) return K
def forward_t(self, gr2, gr1=None): """ Forward pass, but in tensor format. Parameters ---------- gr1: single networkx graph Returns ------- K: the kernel matrix x2 or y: the leaf variable(s) with requires_grad enabled. This allows future Jacobian-vector product to be efficiently computed. """ from grakel_replace.utils import calculate_kernel_matrix_as_tensor if self.undirected: gr2 = transform_to_undirected(gr2) # Convert into GraKel compatible graph format if self.type == 'edge': gr2 = graph_from_networkx(gr2, self.node_label, self.edge_label) else: gr2 = graph_from_networkx(gr2, self.node_label) if gr1 is None: gr1 = self._train_transformed else: if self.undirected: gr1 = transform_to_undirected(gr1) if self.type == 'edge': gr1 = graph_from_networkx(gr1, self.node_label, self.edge_label) else: gr1 = graph_from_networkx(gr1, self.node_label) x_ = torch.tensor( np.concatenate(self.kern.transform(gr1, return_embedding_only=True), axis=1)) y_ = torch.tensor( np.concatenate(self.kern.transform(gr2, return_embedding_only=True), axis=1)) # Note that the vector length of the WL procedure is indeterminate, and thus dim(Y) != dim(X) in general. # However, since the newly observed features in the test data is always concatenated at the end of the feature # matrix, these features will not matter for the inference, and as such we can safely truncate the feature # matrix for the test data so that only those appearing in both the training and testing datasets are included. x_.requires_grad_() y_ = y_[:, :x_.shape[1]].requires_grad_() K = calculate_kernel_matrix_as_tensor(x_, y_, oa=self.oa, se_kernel=self.se) return K, y_, x_
def feature_value(self, X_s): """Given a list of architectures X_s, compute their WL embedding of size N_s x D, where N_s is the length of the list and D is the number of training set features. Returns: embedding: torch.Tensor of shape N_s x D, described above names: list of shape D, which has 1-to-1 correspondence to each element of the embedding matrix above """ if not self.requires_grad: logging.warning( 'Requires_grad flag is off -- in this case, there is risk that the element order in the ' 'feature map DOES NOT correspond to the order in the feature matrix. To suppress this warning,' 'when initialising the WL kernel, do WeisfilerLehman(requires_grad=True)' ) feat_map = self.feature_map(flatten=False) len_feat_map = [len(f) for f in feat_map.values()] X_s = graph_from_networkx( X_s, self.node_label, ) embedding = self.kern.transform(X_s, return_embedding_only=True) for j, em in enumerate(embedding): # Remove some of the spurious features that pop up sometimes embedding[j] = em[:, :len_feat_map[j]] # Generate the final embedding embedding = torch.tensor(np.concatenate(embedding, axis=1)) return embedding, list(self.feature_map(flatten=True).values())
def main(): graphlet = Graphlet('data/annotated-trace.csv') graplet_test = Graphlet('data/not-annotated-trace.csv', test=True) X = graphlet.profile_graphlets y = graphlet.get_graphlets_label() y =[0 if el=='normal' else 1 for el in y] X_test = graplet_test.profile_graphlets train = list(graph_from_networkx(X, node_labels_tag='type')) G_test = list(graph_from_networkx(X_test, node_labels_tag='type')) # Splits the dataset into a training and a validation set G_train, G_val, y_train, y_val = train_test_split(train, y, test_size=0.1, random_state=42) gk = RandomWalkLabeled(n_jobs= 4) K_train = gk.fit_transform(train) K_val = gk.transform(G_val) pickle.dump(K_train, open( "k_train.p", "wb" ) ) pickle.dump(K_val, open( "k_val.p", "wb" ) ) pickle.dump(gk, open("gk.p", "wb")) # Uses the SVM classifier to perform classification clf = SVC(kernel="precomputed") clf.fit(K_train, y) y_pred = clf.predict(K_val) # Computes and prints the classification accuracy print(acc) dump(clf, 'svm_rand_walk.joblib') K_test = gk.transform(G_test) y_test_pred = clf.predict(K_test) print(y_test_pred) pickle.dump(K_test, open( "k_test.p", "wb" ) )
def fit_transform(self, gr: list, rebuild_model=False, save_gram_matrix=True, layer_weights=None, **kwargs): # Transform into GraKeL graph format if rebuild_model is False and self._gram is not None: return self._gram if self.undirected: gr = transform_to_undirected(gr) if self.type == 'edge': if not all([g.graph_type == 'edge_attr' for g in gr]): raise ValueError( "One or more graphs passed are not edge-attributed graphs. You need all graphs to be" "in edge format to use 'edge' type Weisfiler-Lehman kernel." ) gr_ = list( graph_from_networkx(gr, self.node_label, self.edge_label)) else: gr_ = list(graph_from_networkx( gr, self.node_label, )) if rebuild_model or self._gram is None: self._train = gr[:] self._train_transformed = gr_[:] if layer_weights is not None and layer_weights is not self.layer_weights: self.change_kernel_params({'layer_weights': layer_weights}) self.layer_weights = layer_weights K = self.kern.fit_transform(gr_) if self.return_tensor and not isinstance(K, torch.Tensor): K = torch.tensor(K) if save_gram_matrix: self._gram = K.clone() self.layer_weights = self.kern.layer_weights return K
def transform( self, gr: list, ): gr_ = graph_from_networkx( gr, self.node_label, ) K = self.kern.transform(gr_) if not isinstance(K, torch.Tensor): K = torch.tensor(K) return K
def transform( self, gr: list, ): gr = transform_to_undirected(gr) if self.reindex_node_label: gr = self._reindex_node_label(gr) gr_ = graph_from_networkx(gr, self.node_label, self.edge_label) K = self.kern.transform(gr_) if self.return_tensor: K = torch.tensor(K) return K
def find_wl_feature(test, feature, kernel, ): """Return the number of occurrence of --feature-- in --test--, based on a --kernel--.""" import numpy as np if not isinstance(test, list): test = [test] test = graph_from_networkx(test, 'op_name', ) feat_map = kernel.feature_map(flatten=False) len_feat_map = [len(f) for f in feat_map.values()] try: idx = list(kernel.feature_map(flatten=True).values()).index(feature[0]) except KeyError: raise KeyError("Feature " + str(feature) + ' is not found in the training set of the kernel!') embedding = kernel.kern.transform(test, return_embedding_only=True) for i, em in enumerate(embedding): embedding[i] = em.flatten()[:len_feat_map[i]] return np.hstack(embedding)[idx]
def fit_transform(self, gr: list, rebuild_model=False, save_gram_matrix=False, **kwargs): if rebuild_model is False and self._gram is not None: return self._gram gr = transform_to_undirected(gr) if self.reindex_node_label: gr = self._reindex_node_label(gr) gr_ = graph_from_networkx(gr, self.node_label, self.edge_label) K = self.kern.fit_transform(gr_) if self.return_tensor: K = torch.tensor(K) if save_gram_matrix: self._gram = K.clone() self._train = gr[:] return K
def fit_transform(self, gr: list, rebuild_model=False, save_gram_matrix=False, **kwargs): if rebuild_model is False and self._gram is not None: return self._gram gr_ = list(graph_from_networkx( gr, self.node_label, )) if rebuild_model or self._gram is None: self._train = gr[:] self._train_transformed = gr_[:] K = self.kern.fit_transform(gr_) if not isinstance(K, torch.Tensor): K = torch.tensor(K) if save_gram_matrix: self._gram = K.clone() return K
) # here, with 200 parcels, we'd expect a shape of 200 but get something between 199 and 196, probably for mask reasons # construct dict of attributes d = {idx: list(attr[idx, :]) for idx in range(attr.shape[0])} # add attributes to nodes nx.set_node_attributes(gx, d, "attr") return gx if __name__ == "__main__": subs_list = ["USM_0050475", "USM_0050478", "USM_0050481"] nx_graphs = [] for sub_name in subs_list: nx_graphs.append(compute_graph(sub_name)) ## Compute gram matrix # all your gx graphs are in a list of graphs called nx_graphs # transform networkx-graph into GraKel-graph G = list(graph_from_networkx( list(nx_graphs), node_labels_tag="attr")) # error here ! the attributes can't be found gamma = ( 1.0 # I need to check which value we should use... we will change it later... ) print("GraphHopper gamma : {}".format(gamma)) gk = GraphHopper(normalize=True, kernel_type=("gaussian", float(gamma))) K = gk.fit_transform(G) np.save("/scratch/mmahaut/data/abide/graph_classification/gram_matrix.npy", K) # K is your gram matrix, that you can then use to perform SVM-based classification...
from grakel.utils import graph_from_networkx # Creates a list of two simple graphs G1 = nx.Graph() G1.add_nodes_from([0,1,2]) G1.add_edges_from([(0,1), (1,2)]) G2 = nx.Graph() G2.add_nodes_from([0,1,2]) G2.add_edges_from([(0,1), (0,2), (1,2)]) G_nx = [G1, G2] # Transforms list of NetworkX graphs into a list of GraKeL graphs G = graph_from_networkx(G_nx) print("1 - Simple graphs transformed\n") # Creates a list of two node-labeled graphs G1 = nx.Graph() G1.add_nodes_from([0,1,2]) G1.add_edges_from([(0,1), (1,2)]) nx.set_node_attributes(G1, {0:'a', 1:'b', 2:'a'}, 'label') G2 = nx.Graph() G2.add_nodes_from([0,1,2]) G2.add_edges_from([(0,1), (0,2), (1,2)]) nx.set_node_attributes(G2, {0:'a', 1:'b', 2:'c'}, 'label') G_nx = [G1, G2]
def __init__(self, xtrain, ytrain, gkernel, space='nasbench101', h='auto', noise_var=1e-3, num_steps=200, max_noise_var=1e-1, max_h=3, optimize_noise_var=True, node_label='op_name'): self.likelihood = noise_var self.space = space self.h = h if gkernel == 'wl': self.wl_base = CustomVertexHistogram, {'sparse': False} elif gkernel == 'wloa': self.wl_base = CustomVertexHistogram, {'sparse': False, 'oa': True} else: raise NotImplementedError(gkernel + ' is not a valid graph kernel choice!') self.gkernel = None # only applicable for the DARTS search space, where we optimise two graphs jointly. self.gkernel_reduce = None # sometimes (especially for NAS-Bench-201), we can have invalid graphs with all nodes being pruned. Remove # these graphs at training time. if self.space == 'nasbench301' or self.space == 'darts': # For NAS-Bench-301 or DARTS search space, we need to search for 2 cells (normal and reduction simultaneously) valid_indices = [ i for i in range(len(xtrain[0])) if len(xtrain[0][i]) and len(xtrain[1][i]) ] self.x = np.array(xtrain)[:, valid_indices] # self.x = [xtrain[i] for i in valid_indices] self.xtrain_converted = [ list(graph_from_networkx( self.x[0], node_label, )), list(graph_from_networkx( self.x[1], node_label, )), ] else: valid_indices = [i for i in range(len(xtrain)) if len(xtrain[i])] self.x = np.array([xtrain[i] for i in valid_indices]) self.xtrain_converted = list( graph_from_networkx( self.x, node_label, )) ytrain = np.array(ytrain)[valid_indices] self.y_ = deepcopy(torch.tensor(ytrain, dtype=torch.float32), ) self.y, self.y_mean, self.y_std = _normalize(deepcopy(self.y_)) # number of steps of training self.num_steps = num_steps # other hyperparameters self.max_noise_var = max_noise_var self.max_h = max_h self.optimize_noise_var = optimize_noise_var self.node_label = node_label self.K_i = None
def forward( self, Xnew, full_cov=False, ): if self.K_i is None: raise ValueError("The GraphGP model has not been fit!") # At testing time, similarly we first inspect to see whether there are invalid graphs if self.space == 'nasbench301' or self.space == 'darts': invalid_indices = [ i for i in range(len(Xnew[0])) if len(Xnew[0][i]) == 0 or len(Xnew[1][i]) == 0 ] else: nnodes = np.array([len(x) for x in Xnew]) invalid_indices = np.argwhere(nnodes == 0) # replace the invalid indices with something valid patience = 100 for i in range(len(Xnew)): if i in invalid_indices: patience -= 1 continue break if patience < 0: # All architectures are invalid! return torch.zeros(len(Xnew)), torch.zeros(len(Xnew)) for j in invalid_indices: if self.space == 'nasbench301' or self.space == 'darts': Xnew[0][int(j)] = Xnew[0][i] Xnew[1][int(j)] = Xnew[1][i] else: Xnew[int(j)] = Xnew[i] if self.space == 'nasbench301' or self.space == 'darts': Xnew_T = np.array(Xnew) Xnew = np.array([ list(graph_from_networkx( Xnew_T[0], self.node_label, )), list(graph_from_networkx( Xnew_T[1], self.node_label, )), ]) X_full = np.concatenate((np.array(self.xtrain_converted), Xnew), axis=1) K_full = torch.tensor( 0.5 * torch.tensor(self.gkernel.fit_transform(X_full[0]), dtype=torch.float32) + 0.5 * torch.tensor(self.gkernel_reduce.fit_transform(X_full[1]), dtype=torch.float32)) # Kriging equations K_s = K_full[:len(self.x[0]):, len(self.x[0]):] K_ss = K_full[len(self.x[0]):, len(self.x[0]):] + self.likelihood * torch.eye( Xnew.shape[1], ) else: Xnew = list(graph_from_networkx( Xnew, self.node_label, )) X_full = self.xtrain_converted + Xnew K_full = torch.tensor(self.gkernel.fit_transform(X_full), dtype=torch.float32) # Kriging equations K_s = K_full[:len(self.x):, len(self.x):] K_ss = K_full[len(self.x):, len(self.x):] + self.likelihood * torch.eye( len(Xnew), ) mu_s = K_s.t() @ self.K_i @ self.y cov_s = K_ss - K_s.t() @ self.K_i @ K_s cov_s = torch.clamp(cov_s, self.likelihood, np.inf) mu_s = unnormalize_y(mu_s, self.y_mean, self.y_std) std_s = torch.sqrt(cov_s) std_s = unnormalize_y(std_s, None, self.y_std, True) cov_s = std_s**2 if not full_cov: cov_s = torch.diag(cov_s) # replace the invalid architectures with zeros mu_s[torch.tensor(invalid_indices, dtype=torch.long)] = torch.tensor( 0., dtype=torch.float32) cov_s[torch.tensor(invalid_indices, dtype=torch.long)] = torch.tensor( 0., dtype=torch.float32) return mu_s, cov_s
print(sub_name, " attr : ", attr.shape) # construct dict of attributes d = {i: list(attr[i, :]) for i in range(attr.shape[0])} # add attributes to nodes nx.set_node_attributes(gx, d, "attributes") return gx if __name__ == "__main__": subs_list_file = open("subs_list_asd.json") subs_list = json.load(subs_list_file) nx_graphs = list() for sub_name in subs_list: nx_graphs.append(compute_graph(sub_name)) ## Compute gram matrix # all your gx graphs are in a list of graphs called nx_graphs # transform networkx-graph into GraKel-graph G = list(graph_from_networkx(nx_graphs, node_labels_tag="attributes")) gamma = ( 1.0 # I need to check which value we should use... we will change it later... ) print("GraphHopper gamma : {}".format(gamma)) gk = GraphHopper(normalize=True, kernel_type=("gaussian", float(gamma))) K = gk.fit_transform(G) np.save("/scratch/mmahaut/data/abide/graph_classification/gram_matrix.npy", K) # K is your gram matrix, that you can then use to perform SVM-based classification...