Esempio n. 1
0
    def transform(
        self,
        gr: list,
    ):
        """transpose: by default, the grakel produces output in shape of len(y) * len(x2). Use transpose to
        reshape that to a more conventional shape.."""
        if self.undirected:
            gr = transform_to_undirected(gr)
        if self.type == 'edge':
            if not all([g.graph_type == 'edge_attr' for g in gr]):
                raise ValueError(
                    "One or more graphs passed are not edge-attributed graphs. You need all graphs to be"
                    "in edge format to use 'edge' type Weisfiler-Lehman kernel."
                )
            gr_ = graph_from_networkx(gr, self.node_label, self.edge_label)
        else:
            gr_ = graph_from_networkx(
                gr,
                self.node_label,
            )

        K = self.kern.transform(gr_)
        if self.return_tensor and not isinstance(K, torch.Tensor):
            K = torch.tensor(K)
        return K
Esempio n. 2
0
    def forward_t(self, gr2, gr1=None):
        """
        Forward pass, but in tensor format.

        Parameters
        ----------
        gr1: single networkx graph

        Returns
        -------
        K: the kernel matrix
        x2 or y: the leaf variable(s) with requires_grad enabled.
        This allows future Jacobian-vector product to be efficiently computed.
        """
        from grakel_replace.utils import calculate_kernel_matrix_as_tensor
        if self.undirected:
            gr2 = transform_to_undirected(gr2)

        # Convert into GraKel compatible graph format
        if self.type == 'edge':
            gr2 = graph_from_networkx(gr2, self.node_label, self.edge_label)
        else:
            gr2 = graph_from_networkx(gr2, self.node_label)

        if gr1 is None:
            gr1 = self._train_transformed
        else:
            if self.undirected:
                gr1 = transform_to_undirected(gr1)
            if self.type == 'edge':
                gr1 = graph_from_networkx(gr1, self.node_label,
                                          self.edge_label)
            else:
                gr1 = graph_from_networkx(gr1, self.node_label)

        x_ = torch.tensor(
            np.concatenate(self.kern.transform(gr1,
                                               return_embedding_only=True),
                           axis=1))
        y_ = torch.tensor(
            np.concatenate(self.kern.transform(gr2,
                                               return_embedding_only=True),
                           axis=1))

        # Note that the vector length of the WL procedure is indeterminate, and thus dim(Y) != dim(X) in general.
        # However, since the newly observed features in the test data is always concatenated at the end of the feature
        # matrix, these features will not matter for the inference, and as such we can safely truncate the feature
        # matrix for the test data so that only those appearing in both the training and testing datasets are included.

        x_.requires_grad_()
        y_ = y_[:, :x_.shape[1]].requires_grad_()
        K = calculate_kernel_matrix_as_tensor(x_,
                                              y_,
                                              oa=self.oa,
                                              se_kernel=self.se)
        return K, y_, x_
Esempio n. 3
0
    def feature_value(self, X_s):
        """Given a list of architectures X_s, compute their WL embedding of size N_s x D, where N_s is the length
        of the list and D is the number of training set features.

        Returns:
            embedding: torch.Tensor of shape N_s x D, described above
            names: list of shape D, which has 1-to-1 correspondence to each element of the embedding matrix above
        """
        if not self.requires_grad:
            logging.warning(
                'Requires_grad flag is off -- in this case, there is risk that the element order in the '
                'feature map DOES NOT correspond to the order in the feature matrix. To suppress this warning,'
                'when initialising the WL kernel, do WeisfilerLehman(requires_grad=True)'
            )
        feat_map = self.feature_map(flatten=False)
        len_feat_map = [len(f) for f in feat_map.values()]
        X_s = graph_from_networkx(
            X_s,
            self.node_label,
        )
        embedding = self.kern.transform(X_s, return_embedding_only=True)
        for j, em in enumerate(embedding):
            # Remove some of the spurious features that pop up sometimes
            embedding[j] = em[:, :len_feat_map[j]]

        # Generate the final embedding
        embedding = torch.tensor(np.concatenate(embedding, axis=1))
        return embedding, list(self.feature_map(flatten=True).values())
Esempio n. 4
0
def main():
    graphlet = Graphlet('data/annotated-trace.csv')
    graplet_test = Graphlet('data/not-annotated-trace.csv', test=True)
    

    X = graphlet.profile_graphlets
    y = graphlet.get_graphlets_label()
    y =[0 if el=='normal' else 1 for el in y]
  
    X_test = graplet_test.profile_graphlets
    
    
  
    train = list(graph_from_networkx(X, node_labels_tag='type'))
    G_test = list(graph_from_networkx(X_test, node_labels_tag='type'))

    # Splits the dataset into a training and a validation set
    G_train, G_val, y_train, y_val = train_test_split(train, y, test_size=0.1, random_state=42)


    gk = RandomWalkLabeled(n_jobs= 4)
    K_train = gk.fit_transform(train)
    K_val = gk.transform(G_val)
    pickle.dump(K_train, open( "k_train.p", "wb" ) )
    pickle.dump(K_val, open( "k_val.p", "wb" ) )
    pickle.dump(gk, open("gk.p", "wb"))

    # Uses the SVM classifier to perform classification
    clf = SVC(kernel="precomputed")
    clf.fit(K_train, y)
   
    y_pred = clf.predict(K_val)

    # Computes and prints the classification accuracy
    print(acc)
    dump(clf, 'svm_rand_walk.joblib') 
    
    K_test = gk.transform(G_test)
    y_test_pred = clf.predict(K_test)
    
    print(y_test_pred)
    pickle.dump(K_test, open( "k_test.p", "wb" ) )
Esempio n. 5
0
    def fit_transform(self,
                      gr: list,
                      rebuild_model=False,
                      save_gram_matrix=True,
                      layer_weights=None,
                      **kwargs):
        # Transform into GraKeL graph format
        if rebuild_model is False and self._gram is not None:
            return self._gram
        if self.undirected:
            gr = transform_to_undirected(gr)
        if self.type == 'edge':
            if not all([g.graph_type == 'edge_attr' for g in gr]):
                raise ValueError(
                    "One or more graphs passed are not edge-attributed graphs. You need all graphs to be"
                    "in edge format to use 'edge' type Weisfiler-Lehman kernel."
                )

            gr_ = list(
                graph_from_networkx(gr, self.node_label, self.edge_label))
        else:
            gr_ = list(graph_from_networkx(
                gr,
                self.node_label,
            ))

        if rebuild_model or self._gram is None:
            self._train = gr[:]
            self._train_transformed = gr_[:]

        if layer_weights is not None and layer_weights is not self.layer_weights:
            self.change_kernel_params({'layer_weights': layer_weights})
            self.layer_weights = layer_weights

        K = self.kern.fit_transform(gr_)
        if self.return_tensor and not isinstance(K, torch.Tensor):
            K = torch.tensor(K)
        if save_gram_matrix:
            self._gram = K.clone()
            self.layer_weights = self.kern.layer_weights
        return K
Esempio n. 6
0
 def transform(
     self,
     gr: list,
 ):
     gr_ = graph_from_networkx(
         gr,
         self.node_label,
     )
     K = self.kern.transform(gr_)
     if not isinstance(K, torch.Tensor):
         K = torch.tensor(K)
     return K
 def transform(
     self,
     gr: list,
 ):
     gr = transform_to_undirected(gr)
     if self.reindex_node_label:
         gr = self._reindex_node_label(gr)
     gr_ = graph_from_networkx(gr, self.node_label, self.edge_label)
     K = self.kern.transform(gr_)
     if self.return_tensor:
         K = torch.tensor(K)
     return K
Esempio n. 8
0
def find_wl_feature(test, feature, kernel, ):
    """Return the number of occurrence of --feature-- in --test--, based on a --kernel--."""
    import numpy as np
    if not isinstance(test, list): test = [test]
    test = graph_from_networkx(test, 'op_name', )

    feat_map = kernel.feature_map(flatten=False)
    len_feat_map = [len(f) for f in feat_map.values()]
    try:
        idx = list(kernel.feature_map(flatten=True).values()).index(feature[0])
    except KeyError:
        raise KeyError("Feature " + str(feature) + ' is not found in the training set of the kernel!')
    embedding = kernel.kern.transform(test, return_embedding_only=True)
    for i, em in enumerate(embedding):
        embedding[i] = em.flatten()[:len_feat_map[i]]
    return np.hstack(embedding)[idx]
 def fit_transform(self,
                   gr: list,
                   rebuild_model=False,
                   save_gram_matrix=False,
                   **kwargs):
     if rebuild_model is False and self._gram is not None:
         return self._gram
     gr = transform_to_undirected(gr)
     if self.reindex_node_label:
         gr = self._reindex_node_label(gr)
     gr_ = graph_from_networkx(gr, self.node_label, self.edge_label)
     K = self.kern.fit_transform(gr_)
     if self.return_tensor:
         K = torch.tensor(K)
     if save_gram_matrix:
         self._gram = K.clone()
         self._train = gr[:]
     return K
Esempio n. 10
0
 def fit_transform(self,
                   gr: list,
                   rebuild_model=False,
                   save_gram_matrix=False,
                   **kwargs):
     if rebuild_model is False and self._gram is not None:
         return self._gram
     gr_ = list(graph_from_networkx(
         gr,
         self.node_label,
     ))
     if rebuild_model or self._gram is None:
         self._train = gr[:]
         self._train_transformed = gr_[:]
     K = self.kern.fit_transform(gr_)
     if not isinstance(K, torch.Tensor):
         K = torch.tensor(K)
     if save_gram_matrix:
         self._gram = K.clone()
     return K
    )  # here, with 200 parcels, we'd expect a shape of 200 but get something between 199 and 196, probably for mask reasons
    # construct dict of attributes
    d = {idx: list(attr[idx, :]) for idx in range(attr.shape[0])}
    # add attributes to nodes
    nx.set_node_attributes(gx, d, "attr")
    return gx


if __name__ == "__main__":
    subs_list = ["USM_0050475", "USM_0050478", "USM_0050481"]
    nx_graphs = []
    for sub_name in subs_list:
        nx_graphs.append(compute_graph(sub_name))
    ## Compute gram matrix

    # all your  gx graphs are in a list of graphs called nx_graphs
    # transform networkx-graph into GraKel-graph
    G = list(graph_from_networkx(
        list(nx_graphs),
        node_labels_tag="attr"))  # error here ! the attributes can't be found

    gamma = (
        1.0  # I need to check which value we should use... we will change it later...
    )
    print("GraphHopper gamma : {}".format(gamma))
    gk = GraphHopper(normalize=True, kernel_type=("gaussian", float(gamma)))
    K = gk.fit_transform(G)
    np.save("/scratch/mmahaut/data/abide/graph_classification/gram_matrix.npy",
            K)
    # K is your gram matrix, that you can then use to perform SVM-based classification...
Esempio n. 12
0
from grakel.utils import graph_from_networkx

# Creates a list of two simple graphs
G1 = nx.Graph()
G1.add_nodes_from([0,1,2])
G1.add_edges_from([(0,1), (1,2)])

G2 = nx.Graph()
G2.add_nodes_from([0,1,2])
G2.add_edges_from([(0,1), (0,2), (1,2)])

G_nx = [G1, G2]

# Transforms list of NetworkX graphs into a list of GraKeL graphs
G = graph_from_networkx(G_nx)
print("1 - Simple graphs transformed\n")


# Creates a list of two node-labeled graphs
G1 = nx.Graph()
G1.add_nodes_from([0,1,2])
G1.add_edges_from([(0,1), (1,2)])
nx.set_node_attributes(G1, {0:'a', 1:'b', 2:'a'}, 'label')

G2 = nx.Graph()
G2.add_nodes_from([0,1,2])
G2.add_edges_from([(0,1), (0,2), (1,2)])
nx.set_node_attributes(G2, {0:'a', 1:'b', 2:'c'}, 'label')

G_nx = [G1, G2]
Esempio n. 13
0
    def __init__(self,
                 xtrain,
                 ytrain,
                 gkernel,
                 space='nasbench101',
                 h='auto',
                 noise_var=1e-3,
                 num_steps=200,
                 max_noise_var=1e-1,
                 max_h=3,
                 optimize_noise_var=True,
                 node_label='op_name'):
        self.likelihood = noise_var
        self.space = space
        self.h = h

        if gkernel == 'wl':
            self.wl_base = CustomVertexHistogram, {'sparse': False}
        elif gkernel == 'wloa':
            self.wl_base = CustomVertexHistogram, {'sparse': False, 'oa': True}
        else:
            raise NotImplementedError(gkernel +
                                      ' is not a valid graph kernel choice!')

        self.gkernel = None
        # only applicable for the DARTS search space, where we optimise two graphs jointly.
        self.gkernel_reduce = None

        # sometimes (especially for NAS-Bench-201), we can have invalid graphs with all nodes being pruned. Remove
        # these graphs at training time.
        if self.space == 'nasbench301' or self.space == 'darts':
            # For NAS-Bench-301 or DARTS search space, we need to search for 2 cells (normal and reduction simultaneously)
            valid_indices = [
                i for i in range(len(xtrain[0]))
                if len(xtrain[0][i]) and len(xtrain[1][i])
            ]
            self.x = np.array(xtrain)[:, valid_indices]
            # self.x = [xtrain[i] for i in valid_indices]
            self.xtrain_converted = [
                list(graph_from_networkx(
                    self.x[0],
                    node_label,
                )),
                list(graph_from_networkx(
                    self.x[1],
                    node_label,
                )),
            ]

        else:
            valid_indices = [i for i in range(len(xtrain)) if len(xtrain[i])]
            self.x = np.array([xtrain[i] for i in valid_indices])
            self.xtrain_converted = list(
                graph_from_networkx(
                    self.x,
                    node_label,
                ))

        ytrain = np.array(ytrain)[valid_indices]
        self.y_ = deepcopy(torch.tensor(ytrain, dtype=torch.float32), )
        self.y, self.y_mean, self.y_std = _normalize(deepcopy(self.y_))
        # number of steps of training
        self.num_steps = num_steps

        # other hyperparameters
        self.max_noise_var = max_noise_var
        self.max_h = max_h
        self.optimize_noise_var = optimize_noise_var

        self.node_label = node_label
        self.K_i = None
Esempio n. 14
0
    def forward(
        self,
        Xnew,
        full_cov=False,
    ):

        if self.K_i is None:
            raise ValueError("The GraphGP model has not been fit!")

        # At testing time, similarly we first inspect to see whether there are invalid graphs
        if self.space == 'nasbench301' or self.space == 'darts':
            invalid_indices = [
                i for i in range(len(Xnew[0]))
                if len(Xnew[0][i]) == 0 or len(Xnew[1][i]) == 0
            ]
        else:
            nnodes = np.array([len(x) for x in Xnew])
            invalid_indices = np.argwhere(nnodes == 0)

        # replace the invalid indices with something valid
        patience = 100
        for i in range(len(Xnew)):
            if i in invalid_indices:
                patience -= 1
                continue
            break
        if patience < 0:
            # All architectures are invalid!
            return torch.zeros(len(Xnew)), torch.zeros(len(Xnew))
        for j in invalid_indices:
            if self.space == 'nasbench301' or self.space == 'darts':
                Xnew[0][int(j)] = Xnew[0][i]
                Xnew[1][int(j)] = Xnew[1][i]
            else:
                Xnew[int(j)] = Xnew[i]

        if self.space == 'nasbench301' or self.space == 'darts':
            Xnew_T = np.array(Xnew)
            Xnew = np.array([
                list(graph_from_networkx(
                    Xnew_T[0],
                    self.node_label,
                )),
                list(graph_from_networkx(
                    Xnew_T[1],
                    self.node_label,
                )),
            ])

            X_full = np.concatenate((np.array(self.xtrain_converted), Xnew),
                                    axis=1)
            K_full = torch.tensor(
                0.5 * torch.tensor(self.gkernel.fit_transform(X_full[0]),
                                   dtype=torch.float32) + 0.5 *
                torch.tensor(self.gkernel_reduce.fit_transform(X_full[1]),
                             dtype=torch.float32))
            # Kriging equations
            K_s = K_full[:len(self.x[0]):, len(self.x[0]):]
            K_ss = K_full[len(self.x[0]):,
                          len(self.x[0]):] + self.likelihood * torch.eye(
                              Xnew.shape[1], )
        else:
            Xnew = list(graph_from_networkx(
                Xnew,
                self.node_label,
            ))
            X_full = self.xtrain_converted + Xnew
            K_full = torch.tensor(self.gkernel.fit_transform(X_full),
                                  dtype=torch.float32)
            # Kriging equations
            K_s = K_full[:len(self.x):, len(self.x):]
            K_ss = K_full[len(self.x):,
                          len(self.x):] + self.likelihood * torch.eye(
                              len(Xnew), )
        mu_s = K_s.t() @ self.K_i @ self.y
        cov_s = K_ss - K_s.t() @ self.K_i @ K_s
        cov_s = torch.clamp(cov_s, self.likelihood, np.inf)
        mu_s = unnormalize_y(mu_s, self.y_mean, self.y_std)
        std_s = torch.sqrt(cov_s)
        std_s = unnormalize_y(std_s, None, self.y_std, True)
        cov_s = std_s**2
        if not full_cov:
            cov_s = torch.diag(cov_s)
        # replace the invalid architectures with zeros
        mu_s[torch.tensor(invalid_indices, dtype=torch.long)] = torch.tensor(
            0., dtype=torch.float32)
        cov_s[torch.tensor(invalid_indices, dtype=torch.long)] = torch.tensor(
            0., dtype=torch.float32)
        return mu_s, cov_s
Esempio n. 15
0
    print(sub_name, " attr : ", attr.shape)
    # construct dict of attributes
    d = {i: list(attr[i, :]) for i in range(attr.shape[0])}
    # add attributes to nodes
    nx.set_node_attributes(gx, d, "attributes")
    return gx


if __name__ == "__main__":
    subs_list_file = open("subs_list_asd.json")
    subs_list = json.load(subs_list_file)
    nx_graphs = list()
    for sub_name in subs_list:
        nx_graphs.append(compute_graph(sub_name))
    ## Compute gram matrix

    # all your  gx graphs are in a list of graphs called nx_graphs

    # transform networkx-graph into GraKel-graph
    G = list(graph_from_networkx(nx_graphs, node_labels_tag="attributes"))

    gamma = (
        1.0  # I need to check which value we should use... we will change it later...
    )
    print("GraphHopper gamma : {}".format(gamma))
    gk = GraphHopper(normalize=True, kernel_type=("gaussian", float(gamma)))
    K = gk.fit_transform(G)
    np.save("/scratch/mmahaut/data/abide/graph_classification/gram_matrix.npy",
            K)
    # K is your gram matrix, that you can then use to perform SVM-based classification...