def normalized_laplacian(x: tf.SparseTensor, symmetric: bool = True, shift: float = 0.0) -> tf.SparseTensor: d = tf.sparse.reduce_sum(x, axis=0) if symmetric: d = tf.math.rsqrt(d) row, col = tf.unstack(x.indices, axis=1) x = x.with_values(-x.values * tf.gather(d, row, axis=0) * tf.gather(d, col, axis=0)) else: x = x.with_values(-x.values / tf.gather(d, x.indices[:, 0], axis=0)) return tf.sparse.add( tf.sparse.eye(x.dense_shape[0], dtype=x.dtype) * (1 + shift), x)
def value(self) -> 'ArrayType': """Return the value in original framework type :return: the value of in numpy, scipy, tensorflow, pytorch type.""" framework = self._pb_body.cls_name if self.is_sparse: if framework == 'scipy': idx, val, shape = self._get_raw_sparse_array() from scipy.sparse import coo_matrix x = coo_matrix((val, idx.T), shape=shape) sp_format = self._pb_body.parameters['sparse_format'] if sp_format == 'bsr': return x.tobsr() elif sp_format == 'csc': return x.tocsc() elif sp_format == 'csr': return x.tocsr() elif sp_format == 'coo': return x elif framework == 'tensorflow': idx, val, shape = self._get_raw_sparse_array() from tensorflow import SparseTensor return SparseTensor(idx, val, shape) elif framework == 'torch': idx, val, shape = self._get_raw_sparse_array() from torch import sparse_coo_tensor return sparse_coo_tensor(idx, val, shape) else: if framework in {'numpy', 'torch', 'paddle', 'tensorflow'}: x = _get_dense_array(self._pb_body.dense) return _to_framework_array(x, framework)
def to_symmetric(x: tf.SparseTensor, half: bool = False) -> tf.SparseTensor: xt = tf.sparse.reorder( # pylint: disable=no-value-for-parameter tf.sparse.transpose(x)) x = tf.sparse.add(x, xt) if half: x = x.with_values(x.values / 2) return x
def test_tf_sparse(idx_shape): import tensorflow as tf from tensorflow import SparseTensor from jina.types.ndarray.sparse.tensorflow import SparseNdArray a = SparseTensor(indices=idx_shape[0], values=[1, 2, 3], dense_shape=idx_shape[1]) b = SparseNdArray() b.value = a np.testing.assert_equal(tf.sparse.to_dense(b.value).numpy(), tf.sparse.to_dense(a).numpy())
def sparse_constructor(self, indices: 'np.ndarray', values: 'np.ndarray', shape: List[int]) -> 'SparseTensor': """ Sparse NdArray constructor for Tensorflow. :param indices: the indices of the sparse array :param values: the values of the sparse array :param shape: the shape of the sparse array :return: SparseTensor """ return SparseTensor(indices, values, shape)
def setUp(self): self.keys = ['PassengerId', 'Survived', 'Pclass'] tensor = SparseTensor(indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4]) self.input = dict() for key in self.keys: self.input[key] = tensor
def read_system_matrix(dirname, outtype='tf', version=2, theta=None): """Read sparse system matrix from disk Reads in a pre-calculated system matrix from disk that models parallel-beam X-ray CT propagation. For more information on how the system matrix was calculated see the GitLab project: radon.seas.wustl.edu/xray/xct-parallelbeam-matlab This system matrix corresponds to an input of a 256 x 256 image and an output of 120 views with NRAYS detectors at each view. The value of NRAYS will depend on ther version of the system matrix. For version 1, NRAYS = 367. For subsequent versions, NRAYS = 256. Arguments: dirname - directory name where files are located outtype - either 'tf' or 'scipy'; in the first case, the output will have type tf.SparseTensor, in the second, the output will have type scipy.sparse.csr_matrix (Default: 'tf') version - integer specifying which version of the system matrix to read in. (Default: 2) Returns: Either a tf.SparseTensor or scipy.sparse.csr_matrix with dimensions NDATA x NPIXELS. For version 1, NDATA = 367*120 while for version 2, NDATA = 256*120. For both versions, NPIXELS = 256*256. """ if theta is None: if version == 1: fn_prefix = H_FILENAME_PREFIXv1 ndata = NDATAv1 else: fn_prefix = H_FILENAME_PREFIXv2 ndata = NDATAv2 else: fn_prefix = 'H' + str(theta) + 'v3_' ndata = NRAYSv2 * theta irows = np.fromfile(os.path.join(dirname, '%sirows.dat' % \ fn_prefix), dtype=np.float32) icols = np.fromfile(os.path.join(dirname, '%sicols.dat' % \ fn_prefix), dtype=np.float32) vals = np.fromfile(os.path.join(dirname, '%svals.dat' % \ fn_prefix), dtype=np.float32) if outtype == 'tf': from tensorflow import SparseTensor indices = np.zeros((int(vals.shape[0]), 2), dtype=np.int64) indices[:, 0] = irows.astype(np.int64) - 1 indices[:, 1] = icols.astype(np.int64) - 1 return SparseTensor(indices, vals, shape=(ndata, NPIXELS)) from scipy.sparse import csr_matrix return csr_matrix((vals, (irows - 1, icols - 1)), shape=(ndata, NPIXELS))
def normalize_sparse(A: tf.SparseTensor, symmetric: bool = True): row_sum = tf.sparse.reduce_sum(A, axis=1) tf.debugging.assert_non_negative(row_sum) i, j = tf.unstack(A.indices, axis=-1) if symmetric: d_vals = tf.math.rsqrt(row_sum) d_vals = tf.where(row_sum == 0, tf.ones_like(d_vals), d_vals) values = A.values * tf.gather(d_vals, i, axis=0) * tf.gather( d_vals, j, axis=0) else: d_vals = tf.math.reciprocal(row_sum) d_vals = tf.where(row_sum == 0, tf.ones_like(d_vals), d_vals) values = A.values * tf.gather(d_vals, i, axis=0) return A.with_values(values)
def build_rating_sparse_tensor(ratings_df, user_label, item_label, rating_label, users_len, items_len): """ ESTA ES LA MATRIZ A a predecir Simplifica una enorme matriz en una notación inteligente Args: ratings_df: a pd.DataFrame with `user_id`, `item_id` and `rating` columns. Returns: a tf.SparseTensor representing the ratings matrix. """ indices = ratings_df[[user_label, item_label]].values values = ratings_df[rating_label].values return SparseTensor(indices=indices, values=values, dense_shape=[users_len, items_len])
def tf_groupby(time_series: ItemArray, grouping_matrix: ItemVector): from tensorflow import constant, unique, size, SparseTensor time_series = constant(time_series, tf.dtypes.float32) grouping = constant(grouping_matrix) elements, _ = unique(grouping) grouping_lookup = SparseTensor( indices=[[v, i] for i, v in enumerate(grouping)], values=tf.ones(size(grouping)), dense_shape=[size(elements), size(grouping)], ) # print(grouping_lookup) output = tf.sparse.sparse_dense_matmul(sp_a=grouping_lookup, b=time_series,) return output
def VFE_preprocessing(points, xSize, ySize, zSize, sampleSize, maxVoxelX, maxVoxelY, maxVoxelZ): clusteredPoints = {} # Iterate through points and add them to voxels for idx, point in enumerate(points): # expecting n to be around 200,000. Could be bad. Average time on local machine is about ~15 sec key = get_voxel(point, xSize, ySize, zSize) if -maxVoxelX < key[0] and key[0] < maxVoxelX \ and -maxVoxelY < key[1] and key[1] < maxVoxelY \ and 0 < key[2] and key[2] < maxVoxelZ: # remove negatives. fixedKey = (key[0] + maxVoxelX, key[1] + maxVoxelY, key[2]) if fixedKey in clusteredPoints: clusteredPoints[fixedKey].append(idx) else: clusteredPoints[fixedKey] = [idx] # Sample points and fil the rest of the voxel if not full appendedPoints = {} for voxel in clusteredPoints: # sample points, then find center s = sampleSize if len(clusteredPoints[voxel]) > sampleSize else len( clusteredPoints[voxel]) sampleIdx = np.random.choice(clusteredPoints[voxel], size=s, replace=False) # get points for this voxel currPoints = points[sampleIdx] centroid = np.mean(currPoints, axis=0) # subtract constant x, y, z values of centroid from each column. Use 0:1 to keep it as a 2D array centroidX = currPoints[:, 0:1] - centroid[0] centroidY = currPoints[:, 1:2] - centroid[1] centroidZ = currPoints[:, 2:3] - centroid[2] concat = np.hstack((currPoints, centroidX, centroidY, centroidZ)) buffer = np.vstack((concat, np.zeros((sampleSize - s, 6)))) appendedPoints[voxel] = buffer indices = [] values = [] for voxel in appendedPoints: for i in range(len(appendedPoints[voxel])): for j in range(len(appendedPoints[voxel][i])): indices.append((voxel[2], ) + voxel[:2] + (i, j)) values.append(appendedPoints[voxel][i][j]) # return as z, x, y return SparseTensor( indices=indices, values=values, dense_shape=[maxVoxelZ, maxVoxelX * 2, maxVoxelY * 2, sampleSize, 6])
def multi_attention_v0(features: tf.Tensor, attention: tf.Tensor, adjacency: tf.SparseTensor): """ Implementation using unstack / stack / sparse_dense_matmul Args: features: [Ni, H, F] attention: [E, H] adjacency: [No, Ni], E non-zero entries. Returns: [No, H, F] features. """ features = [ tf.sparse.sparse_dense_matmul(adjacency.with_values(attn), f) for attn, f in zip(tf.unstack(attention, axis=1), tf.unstack(features, axis=1)) ] return tf.stack(features, axis=1)
def __call__( self, adj_matrices: tf.SparseTensor, node_features: tf.Tensor, graph_sizes: tf.Tensor, mode: tf.estimator.ModeKeys = tf.estimator.ModeKeys.TRAIN ) -> tf.Tensor: if not self.built: self.build(node_features.shape[2].value, adj_matrices.get_shape()[1].value) # Pad features if needed if self.initial_node_features_size < self.node_features_size: pad_size = self.node_features_size - self.initial_node_features_size padding = tf.zeros(tf.concat( [tf.shape(node_features)[:2], (pad_size, )], axis=0), dtype=tf.float32) node_features = tf.concat((node_features, padding), axis=2) return super().__call__(adj_matrices, node_features, graph_sizes, mode)
def sparse_negate(x: tf.SparseTensor): return x.with_values(-x.values)
def __call__(self, adj_matrices: tf.SparseTensor, node_features: tf.Tensor, # Shape: [ batch_size, V, D ] graph_sizes: tf.Tensor, primary_paths: tf.Tensor, primary_path_lengths: tf.Tensor, mode: tf.estimator.ModeKeys = tf.estimator.ModeKeys.TRAIN) -> tf.Tensor: if not self.built: self.build( node_features.shape[2].value, adj_matrices.get_shape()[1].value) # gather representations for the nodes in the pad and do decoding on this path primary_path_features = batch_gather(node_features, primary_paths) if self.encoder_type == "bidirectional_rnn": rnn_path_representations, rnn_state = tf.nn.bidirectional_dynamic_rnn( cell_fw=self.fwd_cell, cell_bw=self.bwd_cell, inputs=primary_path_features, sequence_length=primary_path_lengths, dtype=tf.float32, swap_memory=True) rnn_path_representations = tf.concat(rnn_path_representations, axis=-1) # concat fwd and bwd representations in all substructures of the state f_rnn_state_fwd = tf.contrib.framework.nest.flatten(rnn_state[0]) f_rnn_state_bwd = tf.contrib.framework.nest.flatten(rnn_state[1]) f_rnn_state = [tf.concat([t1, t2], axis=-1) for t1, t2 in zip(f_rnn_state_fwd, f_rnn_state_bwd)] rnn_state = tf.contrib.framework.nest.pack_sequence_as(rnn_state[0], f_rnn_state) elif self.encoder_type == "rnn": rnn_path_representations, rnn_state = tf.nn.dynamic_rnn( cell=self.rnn_cell, inputs=primary_path_features, sequence_length=primary_path_lengths, dtype=tf.float32, swap_memory=True) batch_size = tf.shape(node_features, out_type=tf.int64)[0] max_num_nodes = tf.shape(node_features, out_type=tf.int64)[1] # shift indices by 1 and mask padding indices to zero # this ensures that scatter_nd won't use a padding rnn representation over # the actual representation for a node with the same index as the padding value # by forcing scatter_nd to write padding representations into "dummy" vectors shifted_paths = primary_paths + 1 shifted_paths = shifted_paths * tf.sequence_mask(primary_path_lengths, dtype=tf.int64) rnn_representations = tf.scatter_nd( indices=tf.reshape(stack_indices(shifted_paths, axis=0), (-1, 2)), updates=tf.reshape(rnn_path_representations, (-1, self.num_units)), shape=tf.stack([batch_size, max_num_nodes + 1, self.num_units], axis=0)) # remove dummy vectors rnn_representations = rnn_representations[:, 1:, :] if self.ignore_graph_encoder: return rnn_representations, rnn_state node_representations, graph_state = self.base_graph_encoder( adj_matrices=adj_matrices, node_features=self.merge_layer( tf.concat([rnn_representations, node_features], axis=-1)), graph_sizes=graph_sizes, mode=mode) output = self.output_map(tf.concat([rnn_representations, node_representations], axis=-1)) # flatten states (ie LSTM/multi-layer tuples) and calculate state size flatten_rnn_state_l = tf.contrib.framework.nest.flatten(rnn_state) flatten_rnn_state = tf.concat(flatten_rnn_state_l, axis=1) state_sizes = [] for state in flatten_rnn_state_l: state_sizes.append(state.get_shape().as_list()[-1]) total_state_size = sum(state_sizes) # concat graph state to this and linear map back to flatten size self.state_map = tf.layers.Dense( name="state_map", units=total_state_size, use_bias=False, kernel_initializer=eye_glorot) flatten_state = self.state_map(tf.concat([flatten_rnn_state, graph_state], axis=-1)) # defatten flatten_state = tf.split(flatten_state, state_sizes, axis=1) state = tf.contrib.framework.nest.pack_sequence_as(rnn_state, flatten_state) return output, state
def sparse_constructor(self, indices: 'np.ndarray', values: 'np.ndarray', shape: List[int]) -> 'SparseTensor': return SparseTensor(indices, values, shape)
def tf_sparse_tensor(): from tensorflow import SparseTensor return SparseTensor(indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4])
def __call__( self, adj_matrices: tf.SparseTensor, node_features: tf.Tensor, # Shape: [ batch_size, V, D ] graph_sizes: tf.Tensor, mode: tf.estimator.ModeKeys = tf.estimator.ModeKeys.TRAIN ) -> tf.Tensor: """ Encode graphs given by a (sparse) adjacency matrix and and their initial node features, returning the encoding of all graph nodes. Args: adj_matrices: SparseTensor of dense shape [BatchSize, NumEdgeTypes, MaxNumNodes, MaxNumNodes] representing edges in graph. adj_matrices[g, e, v, u] == 1 means that in graph g, there is an edge of type e between v and u. node_features: Tensor of shape [BatchSize, MaxNumNodes, NodeFeatureDimension], representing initial node features. node_features[g, v, :] are the features of node v in graph g. graph_sizes: Tensor of shape [BatchSize] representing the number of used nodes in the batchedand padded graphs. graph_size[g] is the number of nodes in graph g. mode: Flag indicating run mode. [Unused] Returns: Tensor of shape [BatchSize, MaxNumNodes, NodeFeatureDimension]. Representations for padding nodes will be zero vectors """ if not self.built: self.build(node_features_size=node_features.shape[2].value, num_edge_types=adj_matrices.get_shape()[1].value) if self.create_bwd_edges: adj_matrices = self._create_backward_edges(adj_matrices) # We only care about the edge indices, as adj_matrices is only an indicator # matrix with values 1 or not-present (i.e., an adjacency list): # Shape: [ num of edges (not edge types) ~ E, 4 ] adj_list = tf.cast(adj_matrices.indices, tf.int32) max_num_vertices = tf.shape(node_features, out_type=tf.int32)[1] total_edges = tf.shape(adj_list, out_type=tf.int32)[0] # Calculate offsets for flattening the adj matrices, as we are merging all graphs into one big graph. # Nodes in first graph are range(0,MaxNumNodes) and edges are shifted by [0,0], # nodes in second graph are range(MaxNumNodes,2*MaxNumNodes) and edges are # shifted by [MaxNumNodes,MaxNumNodes], etc. graph_ids_per_edge = adj_list[:, 0] node_id_offsets_per_edge = tf.expand_dims(graph_ids_per_edge, axis=-1) * max_num_vertices edge_shifts_per_edge = tf.tile(node_id_offsets_per_edge, multiples=(1, 2)) offsets_per_edge = tf.concat( [ tf.zeros( shape=(total_edges, 1), dtype=tf.int32), # we don't need to shift the edge type edge_shifts_per_edge ], axis=1) # Flatten both adj matrices and node features. For the adjacency list, we strip out the graph id # and instead shift the node IDs in edges. flattened_adj_list = offsets_per_edge + adj_list[:, 1:] flattened_node_features = tf.reshape(node_features, shape=(-1, self.node_features_size)) # propagate on this big graph and unflatten representations flattened_node_repr = self._propagate(flattened_adj_list, flattened_node_features, mode) node_representations = tf.reshape( flattened_node_repr, shape=(-1, max_num_vertices, flattened_node_repr.shape[-1])) # mask for padding nodes graph_mask = tf.expand_dims( tf.sequence_mask(graph_sizes, dtype=tf.float32), -1) if self.gated_state: gate_layer = tf.layers.Dense(1, activation=tf.nn.sigmoid, name="node_gate_layer") output_layer = tf.layers.Dense(node_representations.shape[-1], name="node_output_layer") # calculate weighted, node-level outputs node_all_repr = tf.concat([node_features, node_representations], axis=-1) graph_state = gate_layer(node_all_repr) * output_layer( node_representations) graph_state = tf.reduce_sum(graph_state * graph_mask, axis=1) else: graph_state = tf.reduce_sum(node_representations * graph_mask, axis=1) graph_state /= tf.cast(tf.expand_dims(graph_sizes, 1), tf.float32) return node_representations, graph_state
var = tf.Variable(3) var = tf.Variable(3, dtype=tf.int32) # Use the variable in the graph like any Tensor. # y = tf.matmul(w, ...another variable or tensor...) # # The overloaded operators are available too. # z = tf.sigmoid(w + y) # # Assign a new value to the variable with `assign()` or a related method. # w.assign(w + 1.0) # w.assign_add(1.0) ## placeholder 占位符 x = tf.placeholder(tf.float32, shape=(1024, 1024)) y = tf.matmul(x, x) with tf.Session() as sess: print(sess.run(y)) # ERROR: will fail because x was not fed. rand_array = np.random.rand(1024, 1024) print(sess.run(y, feed_dict={x: rand_array})) # Will succeed. ## 稀疏张量 矩阵 # https://github.com/tensorflow/docs/blob/r1.4/site/en/api_docs/api_docs/python/tf/SparseTensor.md st = SparseTensor(indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4]) print(st)
def laplacian(x: tf.SparseTensor) -> tf.SparseTensor: d = tf.sparse.reduce_sum(x, axis=0) return tf.sparse.add(stfu.diag(d), x.with_values(-x.values))
def convert_sparse_matrix_to_sparse_tensor(X): coo = X.tocoo() indices = np.mat([coo.row, coo.col]).transpose() return SparseTensor(indices, coo.data, coo.shape)
def negative(st: tf.SparseTensor) -> tf.SparseTensor: return st.with_values(-st.values)