def call(self, inputs): x, a = inputs mode = ops.autodetect_mode(x, a) if mode == modes.SINGLE and K.is_sparse(a): output, attn_coef = self._call_single(x, a) else: if K.is_sparse(a): a = tf.sparse.to_dense(a) output, attn_coef = self._call_dense(x, a) if self.concat_heads: shape = tf.concat( (tf.shape(output)[:-2], [self.attn_heads * self.channels]), axis=0 ) output = tf.reshape(output, shape) else: output = tf.reduce_mean(output, axis=-2) if self.use_bias: output += self.bias output = self.activation(output) if self.return_attn_coef: return output, attn_coef else: return output
def call(self, inputs): X, A = inputs N = K.shape(A)[-1] # Check if the layer is operating in mixed or batch mode mode = ops.autodetect_mode(X, A) self.reduce_loss = mode in (modes.MIXED, modes.BATCH) # Get normalized adjacency if K.is_sparse(A): I_ = tf.sparse.eye(N, dtype=A.dtype) A_ = tf.sparse.add(A, I_) else: I_ = tf.eye(N, dtype=A.dtype) A_ = A + I_ fltr = ops.normalize_A(A_) # Node embeddings Z = K.dot(X, self.kernel_emb) Z = ops.modal_dot(fltr, Z) if self.activation is not None: Z = self.activation(Z) # Compute cluster assignment matrix S = K.dot(X, self.kernel_pool) S = ops.modal_dot(fltr, S) S = activations.softmax(S, axis=-1) # softmax applied row-wise # Link prediction loss S_gram = ops.modal_dot(S, S, transpose_b=True) if mode == modes.MIXED: A = tf.sparse.to_dense(A)[None, ...] if K.is_sparse(A): LP_loss = tf.sparse.add(A, -S_gram) # A/tf.norm(A) - S_gram/tf.norm(S_gram) else: LP_loss = A - S_gram LP_loss = tf.norm(LP_loss, axis=(-1, -2)) if self.reduce_loss: LP_loss = K.mean(LP_loss) self.add_loss(LP_loss) # Entropy loss entr = tf.negative( tf.reduce_sum(tf.multiply(S, K.log(S + K.epsilon())), axis=-1) ) entr_loss = K.mean(entr, axis=-1) if self.reduce_loss: entr_loss = K.mean(entr_loss) self.add_loss(entr_loss) # Pooling X_pooled = ops.modal_dot(S, Z, transpose_a=True) A_pooled = ops.matmul_at_b_a(S, A) output = [X_pooled, A_pooled] if self.return_mask: output.append(S) return output
def dot(a, b): """ Computes a @ b, for a, b of the same rank (both 2 or both 3). If the rank is 2, then the innermost dimension of `a` must match the outermost dimension of `b`. If the rank is 3, the first dimension of `a` and `b` must be equal and the function computes a batch matmul. Supports both dense and sparse multiplication (including sparse-sparse). :param a: Tensor or SparseTensor with rank 2 or 3. :param b: Tensor or SparseTensor with same rank as b. :return: Tensor or SparseTensor with rank 2 or 3. """ a_ndim = K.ndim(a) b_ndim = K.ndim(b) assert a_ndim == b_ndim, "Expected equal ranks, got {} and {}" "".format( a_ndim, b_ndim ) a_is_sparse = K.is_sparse(a) b_is_sparse = K.is_sparse(b) # Handle cases: rank 2 sparse-dense, rank 2 dense-sparse # In these cases we can use the faster sparse-dense matmul of tf.sparse if a_ndim == 2: if a_is_sparse and not b_is_sparse: return tf.sparse.sparse_dense_matmul(a, b) if not a_is_sparse and b_is_sparse: return ops.transpose( tf.sparse.sparse_dense_matmul(ops.transpose(b), ops.transpose(a)) ) # Handle cases: rank 2 sparse-sparse, rank 3 sparse-dense, # rank 3 dense-sparse, rank 3 sparse-sparse # In these cases we can use the tfsp.CSRSparseMatrix implementation (slower, # but saves memory) if a_is_sparse: a = tfsp.CSRSparseMatrix(a) if b_is_sparse: b = tfsp.CSRSparseMatrix(b) if a_is_sparse or b_is_sparse: out = tfsp.matmul(a, b) if hasattr(out, "to_sparse_tensor"): return out.to_sparse_tensor() else: return out # Handle case: rank 2 dense-dense, rank 3 dense-dense # Here we use the standard dense operation return tf.matmul(a, b)
def __init__(self, model, generator): """ Args: model (Keras model object): The Keras GAT model. generator (FullBatchSequence object): The generator from which we extract the feature and adjacency matirx. """ # The placeholders for features and adjacency matrix (model input): if not isinstance(generator, FullBatchSequence): raise TypeError( "The generator supplied has to be an object of FullBatchSequence." ) self.model = model # Collect variables for IG self.deltas = [] self.non_exist_edges = [] for var in model.non_trainable_weights: if "ig_delta" in var.name: self.deltas.append(var) if "ig_non_exist_edge" in var.name: self.non_exist_edges.append(var) features_t, output_indices_t, adj_t = model.input # Placeholder for class prediction (model output): output = self.model.output self.A = generator.A_dense self.X = generator.features self.is_sparse = K.is_sparse(adj_t)
def modularity_loss(self, a, s, a_pool): if K.is_sparse(a): n_edges = tf.cast(len(a.values), dtype=s.dtype) degrees = tf.sparse.reduce_sum(a, axis=-1) degrees = tf.reshape(degrees, (-1, 1)) else: n_edges = tf.cast(tf.math.count_nonzero(a, axis=(-2, -1)), dtype=s.dtype) degrees = tf.reduce_sum(a, axis=-1, keepdims=True) normalizer_left = tf.matmul(s, degrees, transpose_a=True) normalizer_right = tf.matmul(degrees, s, transpose_a=True) if K.ndim(s) == 3: normalizer = ( ops.modal_dot(normalizer_left, normalizer_right) / 2 / tf.reshape(n_edges, [tf.shape(n_edges)[0]] + [1] * 2)) else: normalizer = ops.modal_dot(normalizer_left, normalizer_right) / 2 / n_edges loss = -tf.linalg.trace(a_pool - normalizer) / 2 / n_edges return loss
def call(self, inputs): X = inputs[0] A = inputs[1] mode = ops.autodetect_mode(A, X) if mode == modes.SINGLE and K.is_sparse(A): output, attn_coef = self._call_single(X, A) else: output, attn_coef = self._call_dense(X, A) if self.concat_heads: shape = output.shape[:-2] + [self.attn_heads * self.channels] shape = [d if d is not None else -1 for d in shape] output = tf.reshape(output, shape) else: output = tf.reduce_mean(output, axis=-2) if self.use_bias: output += self.bias output = self.activation(output) if self.return_attn_coef: return output, attn_coef else: return output
def core_ops_dense(inputs, kernel, bias, activation, dtype, units): """Add a GPU-compatible core ops dense function. Adapted from the Tensorflow source code. """ rank = inputs.shape.rank if rank is not None and rank > 2: # Broadcasting is required for the inputs. outputs = tf.tensordot(inputs, kernel, [[rank - 1], [0]]) # Reshape the output back to the original ndim of the input. if not tf.executing_eagerly(): shape = inputs.shape.as_list() output_shape = shape[:-1] + [units] outputs.set_shape(output_shape) else: inputs = tf.cast(inputs, dtype) if K.is_sparse(inputs): outputs = tf.sparse.sparse_tensor_dense_matmul(inputs, kernel) else: outputs = tf.linalg.matmul(inputs, kernel) if bias is not None: outputs = nn.bias_add(outputs, bias) if activation is not None: return activation(outputs) # pylint: disable=not-callable return outputs
def call(self, adj): """ The adjacency matrix preprocessing in tensorflow. This function applies the matrix transformations on the adjacency matrix, which are required by GCN. GCN requires that the input adjacency matrix has self-loops and is normalized. Args: adj (Numpy array): the adjacency matrix to transform. Returns: The tensor of the transformed adjacency matrix. """ if K.is_sparse(adj): # isinstance(adj, tf.SparseTensor): raise RuntimeError( "TensorFlow adjacency matrix normalization not implemented for sparse matrices." ) else: # Add self loops. adj = adj + tf.linalg.diag( tf.ones(adj.shape[0]) - tf.linalg.diag_part(adj)) # Normalization rowsum = tf.reduce_sum(adj, 1) d_mat_inv_sqrt = tf.linalg.diag(tf.math.rsqrt(rowsum)) adj_normalized = tf.matmul(tf.matmul(d_mat_inv_sqrt, adj), d_mat_inv_sqrt) return adj_normalized
def call(self, inputs): """ Applies the layer. Args: inputs (list): a list of 3 input tensors that includes propagated node features (size 1 x N x F), node features (size 1 x N x F), graph adjacency matrix (size N x N), where N is the number of nodes in the graph, and F is the dimensionality of node features. Returns: Keras Tensor that represents the output of the layer. """ propagated_features, features, *As = inputs batch_dim, n_nodes, _ = K.int_shape(features) if batch_dim != 1: raise ValueError( "Currently full-batch methods only support a batch dimension of one" ) # Propagate the node features A = As[0] if K.is_sparse(A): propagated_features = K.squeeze(propagated_features, 0) propagated_features = K.dot(A, propagated_features) propagated_features = K.expand_dims(propagated_features, 0) else: propagated_features = K.batch_dot(A, propagated_features) output = (1 - self.teleport_probability) * propagated_features output += self.teleport_probability * features return output
def call(self, inputs, training=False): if training and self.error_inject_phase in ['training', 'both']: print("TRAINING") self.inject_errors() elif not training and self.error_inject_phase in ['inference', 'both']: print("INFERENCE") self.inject_errors() rank = len(inputs.shape) if rank > 2: # Broadcasting is required for the inputs. outputs = standard_ops.tensordot(inputs, self.kernel, [[rank - 1], [0]]) # Reshape the output back to the original ndim of the input. if not context.executing_eagerly(): shape = inputs.shape.as_list() output_shape = shape[:-1] + [self.units] outputs.set_shape(output_shape) else: inputs = tf.cast(inputs, self._compute_dtype) if K.is_sparse(inputs): outputs = sparse_ops.sparse_tensor_dense_matmul( inputs, self.kernel) else: outputs = tf.matmul(inputs, self.kernel) if self.use_bias: outputs = tf.nn.bias_add(outputs, self.bias) if self.activation is not None: return self.activation(outputs) # pylint: disable=not-callable return outputs
def call(self, inputs): """ Overriding the Dense layer call, in order to multiple `kernel` by the factor `w0` prior to matmul. This preserves the distribution of the activation, while leaving gradients wrt int input of sine neuron unchanged. """ rank = inputs.shape.rank if rank is not None and rank > 2: # Broadcasting is required for the inputs. # [W0 multiplication here !] outputs = tf.tensordot(inputs, self.scale * self.kernel, [[rank - 1], [0]]) # Reshape the output back to the original ndim of the input. if not tf.executing_eagerly(): shape = inputs.shape.as_list() output_shape = shape[:-1] + [self.units] outputs.set_shape(output_shape) else: inputs = tf.cast(inputs, self._compute_dtype) if K.is_sparse(inputs): # [W0 multiplication here !] outputs = tf.sparse.sparse_dense_matmul( inputs, self.scale * self.kernel) else: # [W0 multiplication here !] outputs = tf.matmul(inputs, self.scale * self.kernel) if self.use_bias: outputs = tf.nn.bias_add(outputs, self.bias) if self.activation is not None: return self.activation(outputs) # pylint: disable=not-callable return outputs
def concatenate(tensors, axis=-1, name="concat"): """Concatenates a list of tensors alongside the specified axis. Args: tensors: list of tensors to concatenate. axis: concatenation axis. name: str, Returns: A tensor. Example: >>>a = tf.constant([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) >>>b = tf.constant([[10, 20, 30], [40, 50, 60], [70, 80, 90]]) >>>tf.keras.backend.concatenate((a, b), axis=-1) <tf.Tensor: shape=(3, 6), dtype=int32, numpy= array([[ 1, 2, 3, 10, 20, 30], [ 4, 5, 6, 40, 50, 60], [ 7, 8, 9, 70, 80, 90]], dtype=int32)> """ if axis < 0: rank = K.ndim(tensors[0]) if rank: axis %= rank else: axis = 0 if all(K.is_sparse(x) for x in tensors): return sparse_ops.sparse_concat(axis, tensors, name=name) elif all(isinstance(x, ragged_tensor.RaggedTensor) for x in tensors): return array_ops.concat(tensors, axis, name=name) else: return array_ops.concat([K.to_dense(x) for x in tensors], axis, name=name)
def select(self, x, a, i, leader_mask=None): # Cosine similarity if i is None: i = tf.zeros(self.n_nodes, dtype=tf.int32) cosine_similarity = sparse_cosine_similarity(x, self.n_nodes, leader_mask, i) # Shortest path regularization if self.shortest_path_reg: def shortest_path(a_): return sparse.csgraph.shortest_path(a_, directed=False) np_fn_input = tf.sparse.to_dense(a) if K.is_sparse(a) else a beta = 1 / tf.numpy_function(shortest_path, [np_fn_input], tf.float64) beta = tf.where(tf.math.is_inf(beta), tf.zeros_like(beta), beta) beta = tf.boolean_mask(beta, leader_mask, axis=1) beta = tf.cast( tf.ensure_shape(beta, cosine_similarity.shape), cosine_similarity.dtype ) else: beta = 1.0 s = tf.sparse.softmax(cosine_similarity) s = beta * tf.sparse.to_dense(s) # Leaders end up entirely in their own cluster kronecker_delta = tf.boolean_mask( tf.eye(self.n_nodes, dtype=s.dtype), leader_mask, axis=1 ) # Create clustering s = tf.where(leader_mask[:, None], kronecker_delta, s) return s
def call(self, inputs): if self.input_activation is not None: inputs = self.input_activation(inputs) rank = len(inputs.shape) placticity = tf.multiply(self.kernel_p, self.hebb) if rank > 2: # Broadcasting is required for the inputs. outputs = standard_ops.tensordot(inputs, self.kernel, [[rank - 1], [0]]) outputs2 = standard_ops.tensordot(inputs, placticity, [[rank - 1], [0]]) outputs = tf.add(outputs, outputs2) #plasticity management inputs_1 = K.mean(tf.expand_dims(inputs, rank), axis=0) outputs_1 = K.mean(tf.expand_dims(outputs, rank - 1), axis=0) v = tf.multiply(inputs_1, outputs_1) while len(v.shape) > 2: v = K.mean(v, axis=0) self.hebb.assign((1 - self.eta) * self.hebb + self.eta * v) if not context.executing_eagerly(): shape = inputs.shape.as_list() output_shape = shape[:-1] + [self.units] outputs.set_shape(output_shape) else: # Cast the inputs to self.dtype, which is the variable dtype. We do not # cast if `should_cast_variables` is True, as in that case the variable # will be automatically casted to inputs.dtype. if not self._mixed_precision_policy.should_cast_variables: inputs = math_ops.cast(inputs, self.dtype) if K.is_sparse(inputs): outputs = sparse_ops.sparse_tensor_dense_matmul( inputs, self.kernel) outputs2 = sparse_ops.sparse_tensor_dense_matmul( inputs, placticity) outputs = tf.add(outputs, outputs2) else: outputs = gen_math_ops.mat_mul(inputs, self.kernel) outputs2 = gen_math_ops.mat_mul(inputs, placticity) outputs = tf.add(outputs, outputs2) #plasticity management inputs_1 = K.mean(tf.expand_dims(inputs, rank), axis=0) outputs_1 = K.mean(tf.expand_dims(outputs, rank - 1), axis=0) self.hebb.assign((1 - self.eta) * self.hebb + self.eta * tf.multiply(inputs_1, outputs_1)) if self.use_bias: outputs = nn.bias_add(outputs, self.bias) #inputs_1 = K.mean(tf.expand_dims(inputs,rank),axis=0) #outputs_1 = K.mean(tf.expand_dims(outputs,rank-1),axis=0) #self.hebb.assign ( (1-self.eta)*self.hebb + self.eta * tf.multiply(inputs_1, outputs_1)) if self.activation is not None: return self.activation(outputs) # pylint: disable=not-callable return outputs
def laplacian(a): d = ops.degree_matrix(a, return_sparse_batch=True) if K.is_sparse(a): a = a.__mul__(-1) else: a = -a return tf.sparse.add(d, a)
def link_prediction_loss(a, s): s_gram = ops.modal_dot(s, s, transpose_b=True) if K.is_sparse(a): lp_loss = tf.sparse.add(a, -s_gram) else: lp_loss = a - s_gram lp_loss = tf.norm(lp_loss, axis=(-1, -2)) return lp_loss
def transpose(A, perm=None, name=None): if K.is_sparse(A): transpose_op = tf.sparse.transpose else: transpose_op = tf.transpose if perm is None: perm = (1, 0) # Make explicit so that shape will always be preserved return transpose_op(A, perm=perm, name=name)
def matrix_to_tensor(matrix): if any((tf.is_tensor(matrix), K.is_sparse(matrix), matrix is None)): return matrix elif sp.isspmatrix_csr(matrix) or sp.isspmatrix_csc(matrix): return tf.sparse.SparseTensor(*sparse_to_tuple(matrix)) elif isinstance(matrix, (np.ndarray, list)): return tf.convert_to_tensor(matrix) else: raise TypeError( f'Invalid type `{type(matrix)}` of inputs data. Allowed data type (Tensor, SparseTensor, np.ndarray, scipy.sparse.csr_matrix, scipy.sparse.csc_matrix, None).' )
def single_mode_dot(A, B): """ Dot product between two rank 2 matrices. Deals automatically with either A or B being sparse. :param A: rank 2 Tensor or SparseTensor. :param B: rank 2 Tensor or SparseTensor. :return: rank 2 Tensor or SparseTensor. """ a_sparse = K.is_sparse(A) b_sparse = K.is_sparse(B) if a_sparse and b_sparse: raise ValueError('Sparse x Sparse matmul is not implemented yet.') elif a_sparse: output = tf.sparse_tensor_dense_matmul(A, B) elif b_sparse: output = transpose( tf.sparse_tensor_dense_matmul(transpose(B), transpose(A))) else: output = tf.matmul(A, B) return output
def degrees(A): """ Computes the degrees of each node in A, dealing with sparse A and batch mode automatically. :param A: Tensor or SparseTensor with rank k = {2, 3}. :return: Tensor or SparseTensor of rank k - 1. """ if K.is_sparse(A): D = tf.sparse.reduce_sum(A, axis=-1) else: D = tf.reduce_sum(A, axis=-1) return D
def call(self, inputs): if len(inputs) == 3: X, A, I = inputs self.data_mode = 'disjoint' else: X, A = inputs I = tf.zeros(tf.shape(X)[:1]) self.data_mode = 'single' if K.ndim(I) == 2: I = I[:, 0] I = tf.cast(I, tf.int32) A_is_sparse = K.is_sparse(A) # Get mask y = self.compute_scores(X, A, I) N = K.shape(X)[-2] indices = ops.segment_top_k(y[:, 0], I, self.ratio, self.top_k_var) mask = tf.scatter_nd(tf.expand_dims(indices, 1), tf.ones_like(indices), (N,)) # Multiply X and y to make layer differentiable features = X * self.gating_op(y) axis = 0 if len(K.int_shape(A)) == 2 else 1 # Cannot use negative axis in tf.boolean_mask # Reduce X X_pooled = tf.boolean_mask(features, mask, axis=axis) # Compute A^2 if A_is_sparse: A_dense = tf.sparse.to_dense(A) else: A_dense = A A_squared = K.dot(A, A_dense) # Reduce A A_pooled = tf.boolean_mask(A_squared, mask, axis=axis) A_pooled = tf.boolean_mask(A_pooled, mask, axis=axis + 1) if A_is_sparse: A_pooled = ops.dense_to_sparse(A_pooled) output = [X_pooled, A_pooled] # Reduce I if self.data_mode == 'disjoint': I_pooled = tf.boolean_mask(I[:, None], mask)[:, 0] output.append(I_pooled) if self.return_mask: output.append(mask) return output
def call(self, inputs, mask=None): x, a, i = self.get_inputs(inputs) # Graph filter for GNNs if K.is_sparse(a): i_n = tf.sparse.eye(self.n_nodes, dtype=a.dtype) a_ = tf.sparse.add(a, i_n) else: i_n = tf.eye(self.n_nodes, dtype=a.dtype) a_ = a + i_n fltr = ops.normalize_A(a_) output = self.pool(x, a, i, fltr=fltr, mask=mask) return output
def reshape(a, shape=None, name=None): """ Reshapes a according to shape, dealing automatically with sparsity. :param a: Tensor or SparseTensor. :param shape: new shape. :param name: name for the operation. :return: Tensor or SparseTensor. """ if K.is_sparse(a): reshape_op = tf.sparse.reshape else: reshape_op = tf.reshape return reshape_op(a, shape=shape, name=name)
def call(self, inputs): x, adj = inputs if K.is_sparse(adj): adj = tf.sparse.to_dense( adj ) # the adjacency matrix will be transformed into dense matrix adj = tf.expand_dims(adj, axis=1) # (N, 1, N) x = tf.expand_dims(x, axis=-1) # (N, F, 1) h = adj * x # (N, F, N) h = tf.transpose(h, perm=(2, 1, 0)) h = tf.math.top_k(h, k=self.k, sorted=True).values h = tf.concat([x, h], axis=-1) h = tf.transpose(h, perm=(0, 2, 1)) return h # (N, k+1, F)
def is_tf_sparse_tensor(x): """Check whether `x` is a sparse Tensor. Check whether an object is a `tf.sparse.SparseTensor`. NOTE: This method is different with `scipy.sparse.is_sparse` which checks whether `x` is Scipy sparse matrix. Parameters: x: A python object to check. Returns: `True` iff `x` is a `tf.sparse.SparseTensor`. """ return K.is_sparse(x)
def call(self, inputs): if len(inputs) == 3: X, A, I = inputs self.data_mode = "disjoint" else: X, A = inputs I = tf.zeros(tf.shape(X)[:1]) self.data_mode = "single" if K.ndim(I) == 2: I = I[:, 0] I = tf.cast(I, tf.int32) A_is_sparse = K.is_sparse(A) # Get mask y = self.compute_scores(X, A, I) N = K.shape(X)[-2] indices = ops.segment_top_k(y[:, 0], I, self.ratio) indices = tf.sort(indices) # required for ordered SparseTensors mask = ops.indices_to_mask(indices, N) # Multiply X and y to make layer differentiable features = X * self.gating_op(y) axis = ( 0 if len(K.int_shape(A)) == 2 else 1 ) # Cannot use negative axis in tf.boolean_mask # Reduce X X_pooled = tf.gather(features, indices, axis=axis) # Reduce A if A_is_sparse: A_pooled, _ = ops.gather_sparse_square(A, indices, mask=mask) else: A_pooled = tf.gather(A, indices, axis=axis) A_pooled = tf.gather(A_pooled, indices, axis=axis + 1) output = [X_pooled, A_pooled] # Reduce I if self.data_mode == "disjoint": I_pooled = tf.gather(I, indices) output.append(I_pooled) if self.return_mask: output.append(mask) return output
def get_inputs(inputs): if len(inputs) == 3: x, a, e = inputs assert K.ndim(e) == 2, 'E must have rank 2' elif len(inputs) == 2: x, a = inputs e = None else: raise ValueError( 'Expected 2 or 3 inputs tensors (X, A, E), got {}.'.format( len(inputs))) assert K.ndim(x) == 2, 'X must have rank 2' assert K.is_sparse(a), 'A must be a SparseTensor' assert K.ndim(a) == 2, 'A must have rank 2' return x, a, e
def get_inputs(inputs): if len(inputs) == 3: x, a, e = inputs assert K.ndim(e) in (2, 3), "E must have rank 2 or 3" elif len(inputs) == 2: x, a = inputs e = None else: raise ValueError( "Expected 2 or 3 inputs tensors (X, A, E), got {}.".format( len(inputs))) assert K.ndim(x) in (2, 3), "X must have rank 2 or 3" assert K.is_sparse(a), "A must be a SparseTensor" assert K.ndim(a) == 2, "A must have rank 2" return x, a, e
def get_inputs(inputs): if len(inputs) == 3: X, A, E = inputs assert K.ndim(E) == 2, 'E must have rank 2' elif len(inputs) == 2: X, A = inputs E = None else: raise ValueError( 'Expected 2 or 3 inputs tensors (X, A, E), got {}.'.format( len(inputs))) assert K.ndim(X) == 2, 'X must have rank 2' assert K.is_sparse(A), 'A must be a SparseTensor' assert K.ndim(A) == 2, 'A must have rank 2' return X, A, E
def transpose(a, perm=None, name=None): """ Transposes a according to perm, dealing automatically with sparsity. :param a: Tensor or SparseTensor with rank k. :param perm: permutation indices of size k. :param name: name for the operation. :return: Tensor or SparseTensor with rank k. """ if K.is_sparse(a): transpose_op = tf.sparse.transpose else: transpose_op = tf.transpose if perm is None: perm = (1, 0) # Make explicit so that shape will always be preserved return transpose_op(a, perm=perm, name=name)