def __init__(self,
                 adjacency_matrix,
                 attribute_matrix,
                 labels_onehot,
                 hidden_sizes,
                 gpu_id=None,
                 weight_decay=5e-4,
                 learning_rate=0.01,
                 dropout=0.5):
        """
        Parameters
        ----------
        adjacency_matrix: sp.spmatrix [N,N]
                Unweighted, symmetric adjacency matrix where N is the number of nodes. Should be a scipy.sparse matrix.

        attribute_matrix: sp.spmatrix or np.array [N,D]
            Attribute matrix where D is the number of attributes per node. Can be sparse or dense.

        labels_onehot: np.array [N,K]
            One-hot matrix of class labels, where N is the number of nodes. Labels of the unlabeled nodes should come
            from self-training using only the labels of the labeled nodes.

        hidden_sizes: list of ints
            List that defines the number of hidden units per hidden layer. Input and output layers not included.

        gpu_id: int or None
            GPU to use. None means CPU-only

        weight_decay: float, default 5e-4
            L2 regularization for the first layer only (matching the original implementation of GCN)

        learning_rate: float, default 0.01
            The learning rate used for training.

        dropout: float, default 0.5
            Dropout used for training.

        """
        if not sp.issparse(adjacency_matrix):
            raise ValueError("Adjacency matrix should be a sparse matrix.")

        self.N, self.D = attribute_matrix.shape
        self.K = labels_onehot.shape[1]
        self.hidden_sizes = hidden_sizes
        self.graph = tf.Graph()

        self.learning_rate = learning_rate
        self.dropout = dropout
        self.weight_decay = weight_decay

        with self.graph.as_default():
            self.training = tf.placeholder_with_default(False, shape=())

            self.idx = tf.placeholder(tf.int32, shape=[None])
            self.labels_onehot = labels_onehot

            adj_norm = utils.preprocess_graph(adjacency_matrix).astype(
                "float32")
            self.adj_norm = tf.SparseTensor(
                np.array(adj_norm.nonzero()).T,
                adj_norm[adj_norm.nonzero()].A1, [self.N, self.N])

            self.sparse_attributes = sp.issparse(attribute_matrix)

            if self.sparse_attributes:
                self.attributes = tf.SparseTensor(
                    np.array(attribute_matrix.nonzero()).T,
                    attribute_matrix[attribute_matrix.nonzero()].A1,
                    [self.N, self.D])
                self.attributes_dropout = sparse_dropout(
                    self.attributes, 1 - self.dropout,
                    (int(self.attributes.values.get_shape()[0]), ))
            else:
                self.attributes = tf.Variable(attribute_matrix,
                                              dtype=tf.float32)
                self.attributes_dropout = tf.nn.dropout(self.attributes,
                                                        rate=dropout)

            self.attrs_comp = tf.cond(
                self.training, lambda: self.attributes_dropout, lambda: self.
                attributes) if self.dropout > 0. else self.attributes

            w_init = slim.xavier_initializer
            self.weights = []
            self.biases = []

            previous_size = self.D
            for ix, layer_size in enumerate(self.hidden_sizes):
                weight = tf.get_variable(f"W_{ix + 1}",
                                         shape=[previous_size, layer_size],
                                         dtype=tf.float32,
                                         initializer=w_init())
                bias = tf.get_variable(f"b_{ix + 1}",
                                       shape=[layer_size],
                                       dtype=tf.float32,
                                       initializer=w_init())
                self.weights.append(weight)
                self.biases.append(bias)
                previous_size = layer_size

            weight_final = tf.get_variable(f"W_{len(hidden_sizes) + 1}",
                                           shape=[previous_size, self.K],
                                           dtype=tf.float32,
                                           initializer=w_init())
            bias_final = tf.get_variable(f"b_{len(hidden_sizes) + 1}",
                                         shape=[self.K],
                                         dtype=tf.float32,
                                         initializer=w_init())

            self.weights.append(weight_final)
            self.biases.append(bias_final)

            if gpu_id is None:
                config = tf.ConfigProto(device_count={'GPU': 0})
            else:
                gpu_options = tf.GPUOptions(
                    visible_device_list='{}'.format(gpu_id), allow_growth=True)
                config = tf.ConfigProto(gpu_options=gpu_options)

            session = tf.Session(config=config)
            self.session = session

            self.logits = None
            self.logits_gather = None
            self.loss = None
            self.optimizer = None
            self.train_op = None
            self.initializer = None
Exemple #2
0
    def __init__(self, extra_graphs, adjacency_matrix, attribute_matrix, labels_onehot, hidden_sizes, gpu_id=None, isMTL = False):
        """
        Parameters
        ----------
        extra_graphs: [adjacency_matrix, attribute_matrix, labels_onehot] * K
                K extra graphs

        adjacency_matrix: sp.spmatrix [N,N]
                Unweighted, symmetric adjacency matrix where N is the number of nodes. Should be a scipy.sparse matrix.

        attribute_matrix: sp.spmatrix or np.array [N,D]
            Attribute matrix where D is the number of attributes per node. Can be sparse or dense.

        labels_onehot: np.array [N,K]
            One-hot matrix of class labels, where N is the number of nodes. Labels of the unlabeled nodes should come
            from self-training using only the labels of the labeled nodes.

        hidden_sizes: list of ints
            List that defines the number of hidden units per hidden layer. Input and output layers not included.

        gpu_id: int or None
            GPU to use. None means CPU-only

        """
        self.isMTL = isMTL
        if not sp.issparse(adjacency_matrix):
            raise ValueError("Adjacency matrix should be a sparse matrix.")

        self.N, self.D = attribute_matrix.shape
        self.K = labels_onehot.shape[1]
        self.hidden_sizes = hidden_sizes
        self.graph = tf.Graph()
        # graph 0 is the target graph
        self.num_graph = len(extra_graphs) + 1
        self.graphs = [[adjacency_matrix, attribute_matrix, labels_onehot],] + extra_graphs

        with self.graph.as_default():
            self.idx = tf.placeholder(tf.int32, shape=[None])
            self.labels_onehot = [graph[2] for graph in self.graphs]
            
            self.adj_norm = []
            for i in range(self.num_graph):
                _adj_norm = utils.preprocess_graph(self.graphs[i][0]).astype("float32")
                self.adj_norm.append(tf.SparseTensor(np.array(_adj_norm.nonzero()).T,
                                                _adj_norm[_adj_norm.nonzero()].A1, [_adj_norm.shape[0], _adj_norm.shape[1]]))



            self.sparse_attributes = sp.issparse(attribute_matrix)

            if self.sparse_attributes:
                self.attributes = [tf.SparseTensor(np.array(graph[1].nonzero()).T,
                                                  graph[1][graph[1].nonzero()].A1, [graph[1].shape[0], graph[1].shape[1]]) for graph in self.graphs]
            else:
                self.attributes = [tf.constant(graph[1], dtype=tf.float32) for graph in self.graphs]

            w_init = slim.xavier_initializer
            self.weights = []
            self.biases = []

            previous_size = self.D
            for ix, layer_size in enumerate(self.hidden_sizes):
                weight = tf.get_variable(f"W_{ix + 1}", shape=[previous_size, layer_size], dtype=tf.float32,
                                         initializer=w_init())
                bias = tf.get_variable(f"b_{ix + 1}", shape=[layer_size], dtype=tf.float32,
                                       initializer=w_init())
                self.weights.append(weight)
                self.biases.append(bias)
                previous_size = layer_size
            weight_final = tf.get_variable(f"W_{len(hidden_sizes) + 1}", shape=[previous_size, self.K],
                                           dtype=tf.float32,
                                           initializer=w_init())
            bias_final = tf.get_variable(f"b_{len(hidden_sizes) + 1}", shape=[self.K], dtype=tf.float32,
                                         initializer=w_init())

            self.weights.append(weight_final)
            self.biases.append(bias_final)

            if gpu_id is None:
                config = tf.ConfigProto(
                    device_count={'GPU': 0}
                )
            else:
                gpu_options = tf.GPUOptions(visible_device_list='{}'.format(gpu_id), allow_growth=True)
                config = tf.ConfigProto(gpu_options=gpu_options)

            session = tf.Session(config=config)
            self.session = session

            self.logits = None
            self.logits_gather = None
            self.loss = None
            self.optimizer = None
            self.train_op = None
            self.initializer = None
Exemple #3
0
def get_perturbated_graph(origin_graph, split, rate = 0.10, variant = "A-Meta-Self", gpu_id = '0', isMTL = False):
    share_perturbation = rate
    hidden_sizes = [16]
    _A_obs, _X_obs, _z_obs = origin_graph
    _A_obs.setdiag(0)
    _A_obs = _A_obs.astype("float32")
    _A_obs.eliminate_zeros()
    _X_obs = _X_obs.astype("float32")

    # assert np.abs(_A_obs - _A_obs.T).sum() == 0, "Input graph is not symmetric"
    # assert _A_obs.max() == 1 and len(np.unique(_A_obs[_A_obs.nonzero()].A1)) == 1, "Graph must be unweighted"
    # assert _A_obs.sum(0).A1.min() > 0, "Graph contains singleton nodes"

    _N = _A_obs.shape[0]
    _K = _z_obs.shape[1]
    _Z_obs = _z_obs
    _An = utils.preprocess_graph(_A_obs)
    sizes = [16, _K]
    degrees = _A_obs.sum(0).A1

    unlabeled_share = 0.8
    val_share = 0.1
    train_share = 1 - unlabeled_share - val_share

    split_train, split_val, split_unlabeled = split
    split_unlabeled = np.union1d(split_val, split_unlabeled)
    
    perturbations = int(share_perturbation * (_A_obs.sum()//2))
    train_iters = 100
    dtype = tf.float32 # change this to tf.float16 if you run out of GPU memory. Might affect the performance and lead to numerical instability


    #%%
    surrogate = mtk.GCNSparse(_A_obs, _X_obs, _Z_obs, hidden_sizes, isMTL=isMTL, gpu_id=gpu_id)
    surrogate.build(with_relu=False)
    surrogate.train(split_train)


    #%%
    # Predict the labels of the unlabeled nodes to use them for self-training.
    if not isMTL:
        labels_self_training = np.eye(_K)[surrogate.logits.eval(session=surrogate.session).argmax(1)]
    else:
        labels_self_training = np.round(sigmoid(surrogate.logits.eval(session=surrogate.session)))
    labels_self_training[split_train] = _Z_obs[split_train]




    enforce_ll_constrant = False
    approximate_meta_gradient = False
    if variant.startswith("A-"): # approximate meta gradient
        approximate_meta_gradient = True
        if "Train" in variant:
            lambda_ = 1
        elif "Self" in variant:
            lambda_ = 0
        else:
            lambda_ = 0.5
            
    if "Train" in variant:
        idx_attack = split_train
    elif "Self" in variant:
        idx_attack = split_unlabeled
    else:  # Both
        idx_attack = np.union1d(split_train, split_unlabeled)


    #%%
    if approximate_meta_gradient:
        gcn_attack = mtk.GNNMetaApprox(_A_obs, _X_obs, labels_self_training, hidden_sizes, 
                                    gpu_id=gpu_id, _lambda=lambda_, train_iters=train_iters, dtype=dtype, isMTL = isMTL)
    else:
        if sp.issparse(_X_obs):
            _X_obs = _X_obs.toarray().astype("float32")
        gcn_attack = mtk.GNNMeta(_A_obs, _X_obs, labels_self_training, hidden_sizes, 
                                gpu_id=gpu_id, attack_features=False, train_iters=train_iters, dtype=dtype, isMTL = isMTL)


    #%%
    gcn_attack.build()
    gcn_attack.make_loss(ll_constraint=enforce_ll_constrant)


    #%%
    if approximate_meta_gradient:
        gcn_attack.attack(perturbations, split_train, split_unlabeled, idx_attack)
    else:
        gcn_attack.attack(perturbations, split_train, idx_attack)


    #%%
    # adjacency_changes = gcn_attack.adjacency_changes.eval(session=gcn_attack.session).reshape(_A_obs.shape)
    modified_adjacency = gcn_attack.modified_adjacency.eval(session=gcn_attack.session)
    return sp.csr_matrix(modified_adjacency)
Exemple #4
0
    def __init__(self, adjacency_matrix, attribute_matrix, labels_onehot, hidden_sizes, preprocessed_path, setting, rate, isMTL = False, gpu_id=None):
        """
        Parameters
        ----------
        adjacency_matrix: sp.spmatrix [N,N]
                Unweighted, symmetric adjacency matrix where N is the number of nodes. Should be a scipy.sparse matrix.

        attribute_matrix: sp.spmatrix or np.array [N,D]
            Attribute matrix where D is the number of attributes per node. Can be sparse or dense.

        labels_onehot: np.array [N,K]
            One-hot matrix of class labels, where N is the number of nodes. Labels of the unlabeled nodes should come
            from self-training using only the labels of the labeled nodes.

        hidden_sizes: list of ints
            List that defines the number of hidden units per hidden layer. Input and output layers not included.

        gpu_id: int or None
            GPU to use. None means CPU-only

        """
        self.isMTL = isMTL
                
        if not sp.issparse(adjacency_matrix):
            raise ValueError("Adjacency matrix should be a sparse matrix.")

        self.N, self.D = attribute_matrix.shape
        self.K = labels_onehot.shape[1]
        self.hidden_sizes = hidden_sizes
        self.graph = tf.Graph()

        self.pp_path = os.path.join(preprocessed_path, setting, 'preprocess')
        if not os.path.isdir(self.pp_path):
            os.mkdir(self.pp_path)
        self.adj_path = os.path.join(self.pp_path, f'{rate}.pkl')
        if os.path.isfile(self.adj_path):
            adjacency_matrix = pickle.load(open(self.adj_path, 'rb'))
        else:
            # preprocess based on X
            isSparse = False
            if sp.issparse(attribute_matrix):
                isSparse = True
            edges = np.array(adjacency_matrix.nonzero()).T
            for edge in edges:
                if edge[0] < edge[1]:
                    if isSparse:
                        # Jaccard similarity
                        nb_shared_ftr = attribute_matrix[edge[0]].multiply(attribute_matrix[edge[1]]).count_nonzero()
                        J = nb_shared_ftr * 1.0 / (attribute_matrix[edge[0]].count_nonzero() + attribute_matrix[edge[1]].count_nonzero() - nb_shared_ftr)
                        if J < 0.8:
                            adjacency_matrix[edge[0],edge[1]] = 0
                            adjacency_matrix[edge[1],edge[0]] = 0
                    else:
                        # Cosine similarity
                        J = (attribute_matrix[edge[0]] * attribute_matrix[edge[1]]).sum() / np.sqrt(np.square(attribute_matrix[edge[0]]).sum() + np.square(attribute_matrix[edge[1]]).sum())
                        if J < 0:
                            adjacency_matrix[edge[0],edge[1]] = 0
                            adjacency_matrix[edge[1],edge[0]] = 0
            # adjacency_matrix
            pickle.dump(adjacency_matrix, open(self.adj_path, 'wb'))


        with self.graph.as_default():
            self.idx = tf.placeholder(tf.int32, shape=[None])
            self.labels_onehot = labels_onehot

            adj_norm = utils.preprocess_graph(adjacency_matrix).astype("float32")
            self.adj_norm = tf.SparseTensor(np.array(adj_norm.nonzero()).T,
                                            adj_norm[adj_norm.nonzero()].A1, [self.N, self.N])

            self.sparse_attributes = sp.issparse(attribute_matrix)

            if self.sparse_attributes:
                self.attributes = tf.SparseTensor(np.array(attribute_matrix.nonzero()).T,
                                                    attribute_matrix[attribute_matrix.nonzero()].A1, [self.N, self.D])
            else:
                self.attributes = tf.constant(attribute_matrix, dtype=tf.float32)

            w_init = slim.xavier_initializer
            self.weights = []
            self.biases = []

            previous_size = self.D
            for ix, layer_size in enumerate(self.hidden_sizes):
                weight = tf.get_variable(f"W_{ix + 1}", shape=[previous_size, layer_size], dtype=tf.float32,
                                            initializer=w_init())
                bias = tf.get_variable(f"b_{ix + 1}", shape=[layer_size], dtype=tf.float32,
                                        initializer=w_init())
                self.weights.append(weight)
                self.biases.append(bias)
                previous_size = layer_size
            weight_final = tf.get_variable(f"W_{len(hidden_sizes) + 1}", shape=[previous_size, self.K],
                                            dtype=tf.float32,
                                            initializer=w_init())
            bias_final = tf.get_variable(f"b_{len(hidden_sizes) + 1}", shape=[self.K], dtype=tf.float32,
                                            initializer=w_init())

            self.weights.append(weight_final)
            self.biases.append(bias_final)

            if gpu_id is None:
                config = tf.ConfigProto(
                    device_count={'GPU': 0}
                )
            else:
                gpu_options = tf.GPUOptions(visible_device_list='{}'.format(gpu_id), allow_growth=True)
                config = tf.ConfigProto(gpu_options=gpu_options)

            session = tf.Session(config=config)
            self.session = session

            self.logits = None
            self.logits_gather = None
            self.loss = None
            self.optimizer = None
            self.train_op = None
            self.initializer = None