def _to_dense(self): num_cols = 0 rows = [] broadcasted_blocks = [ operator.to_dense() for operator in self.operators ] broadcasted_blocks = linear_operator_util.broadcast_matrix_batch_dims( broadcasted_blocks) for block in broadcasted_blocks: batch_row_shape = array_ops.shape(block)[:-1] zeros_to_pad_before_shape = array_ops.concat( [batch_row_shape, [num_cols]], axis=-1) zeros_to_pad_before = array_ops.zeros( shape=zeros_to_pad_before_shape, dtype=block.dtype) num_cols += array_ops.shape(block)[-1] zeros_to_pad_after_shape = array_ops.concat( [batch_row_shape, [self.domain_dimension_tensor() - num_cols]], axis=-1) zeros_to_pad_after = array_ops.zeros( shape=zeros_to_pad_after_shape, dtype=block.dtype) rows.append( array_ops.concat( [zeros_to_pad_before, block, zeros_to_pad_after], axis=-1)) mat = array_ops.concat(rows, axis=-2) tensorshape_util.set_shape(mat, tensor_shape.TensorShape(self.shape)) return mat
def _matmul(self, x, adjoint=False, adjoint_arg=False): # Given a Toeplitz matrix, we can embed it in a Circulant matrix to perform # efficient matrix multiplications. Given a Toeplitz matrix with first row # [t_0, t_1, ... t_{n-1}] and first column [t0, t_{-1}, ..., t_{-(n-1)}, # let C by the circulant matrix with first column [t0, t_{-1}, ..., # t_{-(n-1)}, 0, t_{n-1}, ..., t_1]. Also adjoin to our input vector `x` # `n` zeros, to make it a vector of length `2n` (call it y). It can be shown # that if we take the first n entries of `Cy`, this is equal to the Toeplitz # multiplication. See: # http://math.mit.edu/icg/resources/teaching/18.085-spring2015/toeplitz.pdf # for more details. x = linalg.adjoint(x) if adjoint_arg else x expanded_x = array_ops.concat([x, array_ops.zeros_like(x)], axis=-2) col = ops.convert_to_tensor(self.col) row = ops.convert_to_tensor(self.row) circulant_col = array_ops.concat([ col, array_ops.zeros_like(col[..., 0:1]), array_ops.reverse(row[..., 1:], axis=[-1]) ], axis=-1) circulant = linear_operator_circulant.LinearOperatorCirculant( fft_ops.fft(_to_complex(circulant_col)), input_output_dtype=row.dtype) result = circulant.matmul(expanded_x, adjoint=adjoint, adjoint_arg=False) shape = self._shape_tensor(row=row, col=col) return _ops.cast( result[..., :self._domain_dimension_tensor(shape=shape), :], self.dtype)
def _matmul(self, x, adjoint=False, adjoint_arg=False): if self._assert_proper_shapes: x = linalg.adjoint(x) if adjoint_arg else x aps = linear_operator_util.assert_compatible_matrix_dimensions( self, x) x = distribution_util.with_dependencies([aps], x) if self.is_square: # Note that adjoint has no effect since this matrix is self-adjoint. if adjoint_arg: output_shape = array_ops.concat([ array_ops.shape(x)[:-2], [array_ops.shape(x)[-1], array_ops.shape(x)[-2]] ], axis=0) else: output_shape = array_ops.shape(x) return self._possibly_broadcast_batch_shape( array_ops.zeros(shape=output_shape, dtype=x.dtype)) x_shape = array_ops.shape(x) n = self._num_columns if adjoint else self._num_rows m = x_shape[-2] if adjoint_arg else x_shape[-1] output_shape = array_ops.concat([x_shape[:-2], [n, m]], axis=0) zeros = array_ops.zeros(shape=output_shape, dtype=x.dtype) return self._possibly_broadcast_batch_shape(zeros)
def _eigvals(self): # We have (n - 1) +1 eigenvalues and a single -1 eigenvalue. result_shape = array_ops.shape(self.reflection_axis) n = result_shape[-1] ones_shape = array_ops.concat([result_shape[:-1], [n - 1]], axis=-1) neg_shape = array_ops.concat([result_shape[:-1], [1]], axis=-1) eigvals = array_ops.ones(shape=ones_shape, dtype=self.dtype) eigvals = array_ops.concat( [-array_ops.ones(shape=neg_shape, dtype=self.dtype), eigvals], axis=-1) return eigvals
def _rotate_last_dim(x, rotate_right=False): """Rotate the last dimension either left or right.""" ndims = array_ops.rank(x) if rotate_right: transpose_perm = array_ops.concat( [[ndims - 1], array_ops.range(0, ndims - 1)], axis=0) else: transpose_perm = array_ops.concat([array_ops.range(1, ndims), [0]], axis=0) return array_ops.transpose(x, transpose_perm)
def _matmul(self, x, adjoint=False, adjoint_arg=False): arg_dim = -1 if adjoint_arg else -2 block_dimensions = (self._block_range_dimensions() if adjoint else self._block_domain_dimensions()) blockwise_arg = linear_operator_util.arg_is_blockwise( block_dimensions, x, arg_dim) if blockwise_arg: split_x = x else: split_dim = -1 if adjoint_arg else -2 # Split input by rows normally, and otherwise columns. split_x = linear_operator_util.split_arg_into_blocks( self._block_domain_dimensions(), self._block_domain_dimension_tensors, x, axis=split_dim) result_list = [] for index, operator in enumerate(self.operators): result_list += [ operator.matmul(split_x[index], adjoint=adjoint, adjoint_arg=adjoint_arg) ] if blockwise_arg: return result_list result_list = linear_operator_util.broadcast_matrix_batch_dims( result_list) return array_ops.concat(result_list, axis=-2)
def _to_dense(self): row = ops.convert_to_tensor(self.row) col = ops.convert_to_tensor(self.col) total_shape = array_ops.broadcast_dynamic_shape( array_ops.shape(row), array_ops.shape(col)) n = array_ops.shape(row)[-1] row = _ops.broadcast_to(row, total_shape) col = _ops.broadcast_to(col, total_shape) # We concatenate the column in reverse order to the row. # This gives us 2*n + 1 elements. elements = array_ops.concat( [array_ops.reverse(col, axis=[-1]), row[..., 1:]], axis=-1) # Given the above vector, the i-th row of the Toeplitz matrix # is the last n elements of the above vector shifted i right # (hence the first row is just the row vector provided, and # the first element of each row will belong to the column vector). # We construct these set of indices below. indices = math_ops.mod( # How much to shift right. This corresponds to `i`. math_ops.range(0, n) + # Specifies the last `n` indices. math_ops.range(n - 1, -1, -1)[..., _ops.newaxis], # Mod out by the total number of elements to ensure the index is # non-negative (for tf.gather) and < 2 * n - 1. 2 * n - 1) return array_ops.gather(elements, indices, axis=-1)
def _unblockify_then_matricize(self, vec): """Flatten the block dimensions then reshape to a batch matrix.""" # Suppose # tensor_shape.TensorShape(vec.shape) = [v0, v1, v2, v3], # self.block_depth = 2. # Then # leading shape = [v0, v1] # block shape = [v2, v3]. # We will reshape vec to # [v1, v2*v3, v0]. # Un-blockify: Flatten block dimensions. Reshape # [v0, v1, v2, v3] --> [v0, v1, v2*v3]. if tensor_shape.TensorShape(vec.shape).is_fully_defined(): # vec_shape = [v0, v1, v2, v3] vec_shape = tensor_shape.TensorShape(vec.shape).as_list() # vec_leading_shape = [v0, v1] vec_leading_shape = vec_shape[:-self.block_depth] # vec_block_shape = [v2, v3] vec_block_shape = vec_shape[-self.block_depth:] # flat_shape = [v0, v1, v2*v3] flat_shape = vec_leading_shape + [np.prod(vec_block_shape)] else: vec_shape = array_ops.shape(vec) vec_leading_shape = vec_shape[:-self.block_depth] vec_block_shape = vec_shape[-self.block_depth:] flat_shape = array_ops.concat( (vec_leading_shape, [math_ops.reduce_prod(vec_block_shape)]), 0) vec_flat = array_ops.reshape(vec, flat_shape) # Matricize: Reshape to batch matrix. # [v0, v1, v2*v3] --> [v1, v2*v3, v0], # representing a shape [v1] batch of [v2*v3, v0] matrices. matrix = distribution_util.rotate_transpose(vec_flat, shift=-1) return matrix
def _vectorize_then_blockify(self, matrix): """Shape batch matrix to batch vector, then blockify trailing dimensions.""" # Suppose # tensor_shape.TensorShape(matrix.shape) = [m0, m1, m2, m3], # and matrix is a matrix because the final two dimensions are matrix dims. # self.block_depth = 2, # self.block_shape = [b0, b1] (note b0 * b1 = m2). # We will reshape matrix to # [m3, m0, m1, b0, b1]. # Vectorize: Reshape to batch vector. # [m0, m1, m2, m3] --> [m3, m0, m1, m2] # This is called "vectorize" because we have taken the final two matrix dims # and turned this into a size m3 batch of vectors. vec = distribution_util.rotate_transpose(matrix, shift=1) # Blockify: Blockfy trailing dimensions. # [m3, m0, m1, m2] --> [m3, m0, m1, b0, b1] if (tensor_shape.TensorShape(vec.shape).is_fully_defined() and self.block_shape.is_fully_defined()): # vec_leading_shape = [m3, m0, m1], # the parts of vec that will not be blockified. vec_leading_shape = tensor_shape.TensorShape(vec.shape)[:-1] final_shape = vec_leading_shape.concatenate(self.block_shape) else: vec_leading_shape = array_ops.shape(vec)[:-1] final_shape = array_ops.concat( (vec_leading_shape, self.block_shape_tensor()), 0) return array_ops.reshape(vec, final_shape)
def _unvec_by(y, num_col): """Unstack vector to form a matrix, with a specified amount of columns.""" return _linalg.matrix_transpose( array_ops.reshape( y, array_ops.concat([array_ops.shape(y)[:-1], [num_col, -1]], axis=0)))
def _to_dense(self): product = self.operators[0].to_dense() for operator in self.operators[1:]: # Product has shape [B, R1, 1, C1, 1]. product = product[..., :, _ops.newaxis, :, _ops.newaxis] # Operator has shape [B, 1, R2, 1, C2]. op_to_mul = operator.to_dense()[..., _ops.newaxis, :, _ops.newaxis, :] # This is now [B, R1, R2, C1, C2]. product = product * op_to_mul # Now merge together dimensions to get [B, R1 * R2, C1 * C2]. product = array_ops.reshape(product, shape=array_ops.concat([ array_ops.shape(product)[:-4], [ array_ops.shape(product)[-4] * array_ops.shape(product)[-3], array_ops.shape(product)[-2] * array_ops.shape(product)[-1] ] ], axis=0)) tensorshape_util.set_shape(product, tensor_shape.TensorShape(self.shape)) return product
def _shape_tensor(self, row=None, col=None): row = self.row if row is None else row col = self.col if col is None else col v_shape = array_ops.broadcast_dynamic_shape(array_ops.shape(row), array_ops.shape(col)) k = v_shape[-1] return array_ops.concat((v_shape, [k]), 0)
def _possibly_broadcast_batch_shape(self, x): """Return 'x', possibly after broadcasting the leading dimensions.""" # If we have no batch shape, our batch shape broadcasts with everything! if self._batch_shape_arg is None: return x # Static attempt: # If we determine that no broadcast is necessary, pass x through # If we need a broadcast, add to an array of zeros. # # special_shape is the shape that, when broadcast with x's shape, will give # the correct broadcast_shape. Note that # We have already verified the second to last dimension of tensor_shape.TensorShape(self.shape) # matches x's shape in assert_compatible_matrix_dimensions. # Also, the final dimension of 'x' can have any shape. # Therefore, the final two dimensions of special_shape are 1's. special_shape = self.batch_shape.concatenate([1, 1]) bshape = _ops.broadcast_static_shape(tensor_shape.TensorShape(x.shape), special_shape) if special_shape.is_fully_defined(): # bshape.is_fully_defined iff special_shape.is_fully_defined. if bshape == tensor_shape.TensorShape(x.shape): return x # Use the built in broadcasting of addition. zeros = array_ops.zeros(shape=special_shape, dtype=self.dtype) return x + zeros # Dynamic broadcast: # Always add to an array of zeros, rather than using a "cond", since a # cond would require copying data from GPU --> CPU. special_shape = array_ops.concat((self.batch_shape_tensor(), [1, 1]), 0) zeros = array_ops.zeros(shape=special_shape, dtype=self.dtype) return x + zeros
def _shape_tensor(self): matrix_shape = array_ops.stack((self._num_rows, self._num_rows), axis=0) if self._batch_shape_arg is None: return matrix_shape return array_ops.concat((self._batch_shape_arg, matrix_shape), 0)
def _diag_part(self): diag_list = [] for operator in self.operators: # Extend the axis for broadcasting. diag_list += [operator.diag_part()[..., _ops.newaxis]] diag_list = linear_operator_util.broadcast_matrix_batch_dims(diag_list) diagonal = array_ops.concat(diag_list, axis=-2) return array_ops.squeeze(diagonal, axis=-1)
def _shape_tensor(self): # See _ops.TensorShape(self.shape) for explanation of steps s_shape = array_ops.shape(self._spectrum) batch_shape = s_shape[:-self.block_depth] trailing_dims = s_shape[-self.block_depth:] n = math_ops.reduce_prod(trailing_dims) n_x_n = [n, n] return array_ops.concat((batch_shape, n_x_n), 0)
def _eigvals(self): eig_list = [] for operator in self.operators: # Extend the axis for broadcasting. eig_list += [operator.eigvals()[..., _ops.newaxis]] eig_list = linear_operator_util.broadcast_matrix_batch_dims(eig_list) eigs = array_ops.concat(eig_list, axis=-2) return array_ops.squeeze(eigs, axis=-1)
def _shape_tensor(self): batch_shape = array_ops.broadcast_dynamic_shape( self.base_operator.batch_shape_tensor(), array_ops.shape(self.u)[:-2]) batch_shape = array_ops.broadcast_dynamic_shape( batch_shape, array_ops.shape(self.v)[:-2]) return array_ops.concat( [batch_shape, self.base_operator.shape_tensor()[-2:]], axis=0)
def _diag_part(self): diag_list = [] for op in self._diagonal_operators: # Extend the axis, since `broadcast_matrix_batch_dims` treats all but the # final two dimensions as batch dimensions. diag_list.append(op.diag_part()[..., _ops.newaxis]) diag_list = linear_operator_util.broadcast_matrix_batch_dims(diag_list) diagonal = array_ops.concat(diag_list, axis=-2) return array_ops.squeeze(diagonal, axis=-1)
def _shape_tensor(self, spectrum=None): spectrum = self.spectrum if spectrum is None else spectrum # See tensor_shape.TensorShape(self.shape) for explanation of steps s_shape = array_ops.shape(spectrum) batch_shape = s_shape[:-self.block_depth] trailing_dims = s_shape[-self.block_depth:] n = math_ops.reduce_prod(trailing_dims) n_x_n = [n, n] return array_ops.concat((batch_shape, n_x_n), 0)
def reshape_inv(y): # Expand the extra dims hanging off the end, "b_extra_sh". # Note we use y_sh[:-1] + [b_main_sh[-1]] rather than b_main_sh, because y # Could have different batch dims than a and b, because of broadcasting. y_extra_shape = array_ops.concat( (array_ops.shape(y)[:-1], [b_main_sh[-1]], b_extra_sh), 0) y_extra_on_end = array_ops.reshape(y, y_extra_shape) inverse_perm = np.argsort(perm) return array_ops.transpose(y_extra_on_end, perm=inverse_perm)
def _broadcast_batch_dims(self, x, spectrum): """Broadcast batch dims of batch matrix `x` and spectrum.""" # _ops.TensorShape(spectrum.shape) = batch_shape + block_shape # First make spectrum a batch matrix with # _ops.TensorShape(spectrum.shape) = batch_shape + [prod(block_shape), 1] spec_mat = array_ops.reshape( spectrum, array_ops.concat( (self.batch_shape_tensor(), [-1, 1]), axis=0)) # Second, broadcast, possibly requiring an addition of array of zeros. x, spec_mat = linear_operator_util.broadcast_matrix_batch_dims((x, spec_mat)) # Third, put the block shape back into spectrum. batch_shape = array_ops.shape(x)[:-2] spectrum = array_ops.reshape( spec_mat, array_ops.concat((batch_shape, self.block_shape_tensor()), axis=0)) return x, spectrum
def _ones_diag(self): """Returns the diagonal of this operator as all ones.""" if tensor_shape.TensorShape(self.shape).is_fully_defined(): d_shape = self.batch_shape.concatenate([self._min_matrix_dim()]) else: d_shape = array_ops.concat( [self.batch_shape_tensor(), [self._min_matrix_dim_tensor()]], axis=0) return array_ops.ones(shape=d_shape, dtype=self.dtype)
def _diag_part(self): if not all(operator.is_square for operator in self.operators): raise NotImplementedError( "`diag_part` not implemented for an operator whose blocks are not " "square.") diag_list = [] for operator in self.operators: # Extend the axis for broadcasting. diag_list = diag_list + [operator.diag_part()[..., _ops.newaxis]] diag_list = linear_operator_util.broadcast_matrix_batch_dims(diag_list) diagonal = array_ops.concat(diag_list, axis=-2) return array_ops.squeeze(diagonal, axis=-1)
def _eigvals(self): if not all(operator.is_square for operator in self.operators): raise NotImplementedError( "`eigvals` not implemented for an operator whose blocks are not " "square.") eig_list = [] for operator in self.operators: # Extend the axis for broadcasting. eig_list = eig_list + [operator.eigvals()[..., _ops.newaxis]] eig_list = linear_operator_util.broadcast_matrix_batch_dims(eig_list) eigs = array_ops.concat(eig_list, axis=-2) return array_ops.squeeze(eigs, axis=-1)
def _to_dense(self): num_cols = 0 dense_rows = [] flat_broadcast_operators = linear_operator_util.broadcast_matrix_batch_dims( [op.to_dense() for row in self.operators for op in row]) # pylint: disable=g-complex-comprehension broadcast_operators = [ flat_broadcast_operators[i * (i + 1) // 2:(i + 1) * (i + 2) // 2] for i in range(len(self.operators))] for row_blocks in broadcast_operators: batch_row_shape = array_ops.shape(row_blocks[0])[:-1] num_cols += array_ops.shape(row_blocks[-1])[-1] zeros_to_pad_after_shape = array_ops.concat( [batch_row_shape, [self.domain_dimension_tensor() - num_cols]], axis=-1) zeros_to_pad_after = array_ops.zeros( shape=zeros_to_pad_after_shape, dtype=self.dtype) row_blocks.append(zeros_to_pad_after) dense_rows.append(array_ops.concat(row_blocks, axis=-1)) mat = array_ops.concat(dense_rows, axis=-2) tensorshape_util.set_shape(mat, tensor_shape.TensorShape(self.shape)) return mat
def _matmul(self, x, adjoint=False, adjoint_arg=False): split_dim = -1 if adjoint_arg else -2 # Split input by rows normally, and otherwise columns. split_x = self._split_input_into_blocks(x, axis=split_dim) result_list = [] for index, operator in enumerate(self.operators): result_list += [ operator.matmul(split_x[index], adjoint=adjoint, adjoint_arg=adjoint_arg) ] result_list = linear_operator_util.broadcast_matrix_batch_dims( result_list) return array_ops.concat(result_list, axis=-2)
def _diag_part(self): diag_part = self.operators[0].diag_part() for operator in self.operators[1:]: diag_part = diag_part[..., :, array_ops.newaxis] op_diag_part = operator.diag_part()[..., array_ops.newaxis, :] diag_part *= op_diag_part diag_part = array_ops.reshape( diag_part, shape=array_ops.concat([array_ops.shape(diag_part)[:-2], [-1]], axis=0)) if self.range_dimension > self.domain_dimension: diag_dimension = self.domain_dimension else: diag_dimension = self.range_dimension diag_part.set_shape(self.batch_shape.concatenate(diag_dimension)) return diag_part
def _solve(self, rhs, adjoint=False, adjoint_arg=False): split_dim = -1 if adjoint_arg else -2 # Split input by rows normally, and otherwise columns. split_rhs = self._split_input_into_blocks(rhs, axis=split_dim) solution_list = [] for index, operator in enumerate(self.operators): solution_list += [ operator.solve(split_rhs[index], adjoint=adjoint, adjoint_arg=adjoint_arg) ] solution_list = linear_operator_util.broadcast_matrix_batch_dims( solution_list) return array_ops.concat(solution_list, axis=-2)
def _shape_tensor(self): # Avoid messy broadcasting if possible. if tensor_shape.TensorShape(self.shape).is_fully_defined(): return ops.convert_to_tensor( tensor_shape.TensorShape(self.shape).as_list(), dtype=dtypes.int32, name="shape") domain_dimension = sum(self._block_domain_dimension_tensors()) range_dimension = sum(self._block_range_dimension_tensors()) matrix_shape = array_ops.stack([domain_dimension, range_dimension]) # Dummy Tensor of zeros. Will never be materialized. zeros = array_ops.zeros(shape=self.operators[0].batch_shape_tensor()) for operator in self.operators[1:]: zeros = zeros + array_ops.zeros(shape=operator.batch_shape_tensor()) batch_shape = array_ops.shape(zeros) return array_ops.concat((batch_shape, matrix_shape), 0)