def _GetVal(use_xla): with self.cached_session(): t0 = array_ops.placeholder(np.float32, shape=input_sizes) t1 = constant_op.constant(filter_sizes, shape=[len(filter_sizes)]) t2 = array_ops.placeholder(np.float32, shape=output_sizes) native_t0 = t0 native_t2 = t2 strides = [1, stride, stride, 1] if use_xla: if data_format == "NCHW": # Transpose from NWHC input to NCHW # Ex. [4, 5, 5, 48] to [4, 48, 5, 5] native_t0 = array_ops.transpose(t0, [0, 3, 1, 2]) native_t2 = array_ops.transpose(t2, [0, 3, 1, 2]) strides = [1, 1, stride, stride] with self.test_scope(): backprop = nn_ops.depthwise_conv2d_native_backprop_filter( native_t0, t1, native_t2, strides=strides, padding=padding, data_format=data_format) else: # For CPU, the format NCHW is not supported. Therefore we always use # NHWC here. backprop = nn_ops.depthwise_conv2d_native_backprop_filter( native_t0, t1, native_t2, strides=strides, padding=padding) ret = backprop.eval({t0: x0, t2: x2}) self.assertShapeEqual(ret, backprop) return ret
def reduce_to_final(images, num_filters_out, nhidden=None, scope=None): """Reduce an image to a final state by running two LSTMs. Args: images: (num_images, height, width, depth) tensor num_filters_out: output layer depth nhidden: hidden layer depth (defaults to num_filters_out) scope: optional scope name Returns: A (num_images, num_filters_out) batch. """ with variable_scope.variable_scope(scope, "ReduceToFinal", [images]): nhidden = nhidden or num_filters_out batch_size, height, width, depth = _shape(images) transposed = array_ops.transpose(images, [1, 0, 2, 3]) reshaped = array_ops.reshape(transposed, [height, batch_size * width, depth]) with variable_scope.variable_scope("reduce1"): reduced = lstm1d.sequence_to_final(reshaped, nhidden) transposed_hidden = array_ops.reshape(reduced, [batch_size, width, nhidden]) hidden = array_ops.transpose(transposed_hidden, [1, 0, 2]) with variable_scope.variable_scope("reduce2"): output = lstm1d.sequence_to_final(hidden, num_filters_out) return output
def rot90(image, k=1): """Rotate an image counter-clockwise by 90 degrees. Args: image: A 3-D tensor of shape `[height, width, channels].` k: Number of times the image is rotated by 90 degrees. Returns: A rotated 3-D tensor of the same type and shape as `image`. """ image = ops.convert_to_tensor(image, name='image') _Check3DImage(image, require_static=False) k %= 4 if k == 0: return image elif k == 1: return array_ops.transpose( array_ops.reverse(image, [False, True, False]), [1, 0, 2], name='rot90') elif k == 2: return array_ops.reverse(image, [True, True, False], name='rot90') elif k == 3: return array_ops.reverse( array_ops.transpose(image, [1, 0, 2], name='rot90'), [False, True, False])
def call(self, inputs): if self.data_format == 'channels_first': # Reshape to channels last inputs = array_ops.transpose(inputs, (0, 2, 3, 1)) # Apply the actual ops. outputs = nn.separable_conv2d( inputs, self.depthwise_kernel, self.pointwise_kernel, strides=(1,) + self.strides + (1,), padding=self.padding.upper(), rate=self.dilation_rate) if self.data_format == 'channels_first': # Reshape to channels first outputs = array_ops.transpose(outputs, (0, 3, 1, 2)) if self.bias: outputs = nn.bias_add( outputs, self.bias, data_format=utils.convert_data_format(self.data_format, ndim=4)) if self.activation is not None: return self.activation(outputs) return outputs
def _state_to_olabel_unique(labels, num_labels, states, unique): """Sum state log probs to ilabel log probs using unique label indices.""" num_label_states = _get_dim(labels, 1) + 1 label_states = states[:, :, 1:num_label_states] blank_states = states[:, :, num_label_states:] unique_y, unique_idx = unique mul_reduce = _sum_states(unique_idx, label_states) num_frames = states.shape[0] batch_size = states.shape[1] num_states = num_label_states - 1 batch_state_major = array_ops.transpose(mul_reduce, perm=[1, 2, 0]) batch_state_major = array_ops.reshape( batch_state_major, [batch_size * num_states, num_frames]) batch_offset = math_ops.range(batch_size, dtype=unique_y.dtype) * num_labels indices = unique_y + array_ops.expand_dims(batch_offset, axis=-1) indices = array_ops.reshape(indices, [-1, 1]) scatter = array_ops.scatter_nd( indices=indices, updates=batch_state_major, shape=[batch_size * num_labels, num_frames]) scatter = array_ops.reshape(scatter, [batch_size, num_labels, num_frames]) scatter = array_ops.where( math_ops.equal(scatter, 0.0), array_ops.fill(array_ops.shape(scatter), math_ops.log(0.0)), scatter) label_olabels = array_ops.transpose(scatter, [2, 0, 1]) label_olabels = label_olabels[:, :, 1:] blank_olabels = math_ops.reduce_logsumexp( blank_states, axis=2, keepdims=True) return array_ops.concat([blank_olabels, label_olabels], axis=-1)
def frames(signal, frame_length, frame_step, name=None): """Frame a signal into overlapping frames. May be used in front of spectral functions. For example: ```python pcm = tf.placeholder(tf.float32, [None, 9152]) frames = tf.contrib.signal.frames(pcm, 512, 180) magspec = tf.abs(tf.spectral.rfft(frames, [512])) image = tf.expand_dims(magspec, 3) ``` Args: signal: A `Tensor` of shape `[batch_size, signal_length]`. frame_length: An `int32` or `int64` `Tensor`. The length of each frame. frame_step: An `int32` or `int64` `Tensor`. The step between frames. name: A name for the operation (optional). Returns: A `Tensor` of frames with shape `[batch_size, num_frames, frame_length]`. Raises: ValueError: if signal does not have rank 2. """ with ops.name_scope(name, "frames", [signal, frame_length, frame_step]): signal = ops.convert_to_tensor(signal, name="signal") frame_length = ops.convert_to_tensor(frame_length, name="frame_length") frame_step = ops.convert_to_tensor(frame_step, name="frame_step") signal_rank = signal.shape.ndims if signal_rank != 2: raise ValueError("expected signal to have rank 2 but was " + signal_rank) signal_length = array_ops.shape(signal)[1] num_frames = math_ops.ceil((signal_length - frame_length) / frame_step) num_frames = 1 + math_ops.cast(num_frames, dtypes.int32) pad_length = (num_frames - 1) * frame_step + frame_length pad_signal = array_ops.pad(signal, [[0, 0], [0, pad_length - signal_length]]) indices_frame = array_ops.expand_dims(math_ops.range(frame_length), 0) indices_frames = array_ops.tile(indices_frame, [num_frames, 1]) indices_step = array_ops.expand_dims( math_ops.range(num_frames) * frame_step, 1) indices_steps = array_ops.tile(indices_step, [1, frame_length]) indices = indices_frames + indices_steps # TODO(androbin): remove `transpose` when `gather` gets `axis` support pad_signal = array_ops.transpose(pad_signal) signal_frames = array_ops.gather(pad_signal, indices) signal_frames = array_ops.transpose(signal_frames, perm=[2, 0, 1]) return signal_frames
def _SparseMatMul(t1, t2, transpose_a=False, transpose_b=False): """Helper function to create SparseMatMul op.""" assert t1 in is_sparse and t2 in is_sparse t1_sparse = is_sparse[t1] t2_sparse = is_sparse[t2] if not t1_sparse and not t2_sparse: return math_ops.matmul(t1, t2, transpose_a=transpose_a, transpose_b=transpose_b) transpose_out = False if not t1_sparse: transpose_out = True t1, t2 = t2, t1 t1_sparse, t2_sparse = t2_sparse, t1_sparse assert t1_sparse transpose_a, transpose_b = not transpose_b, not transpose_a if transpose_b: t2 = array_ops.transpose(t2) transpose_b = False m = math_ops.matmul(t1, t2, transpose_a=transpose_a, transpose_b=transpose_b, a_is_sparse=t1_sparse, b_is_sparse=t2_sparse) if transpose_out: m = array_ops.transpose(m) return m
def separable_lstm(images, num_filters_out, kernel_size=None, nhidden=None, scope=None): """Run bidirectional LSTMs first horizontally then vertically. Args: images: (num_images, height, width, depth) tensor num_filters_out: output layer depth kernel_size: A list of length 2 holding the [kernel_height, kernel_width] of of the pooling. Can be an int if both values are the same. Set to None for not using blocks nhidden: hidden layer depth scope: optional scope name Returns: (num_images, height/kernel_height, width/kernel_width, num_filters_out) tensor """ with variable_scope.variable_scope(scope, "SeparableLstm", [images]): if nhidden is None: nhidden = num_filters_out if kernel_size is not None: images = get_blocks(images, kernel_size) hidden = horizontal_lstm(images, nhidden) with variable_scope.variable_scope("vertical"): transposed = array_ops.transpose(hidden, [0, 2, 1, 3]) output_transposed = horizontal_lstm(transposed, num_filters_out) output = array_ops.transpose(output_transposed, [0, 2, 1, 3]) return output
def _sample_n(self, n, seed): batch_shape = self.batch_shape_tensor() event_shape = self.event_shape_tensor() batch_ndims = array_ops.shape(batch_shape)[0] ndims = batch_ndims + 3 # sample_ndims=1, event_ndims=2 shape = array_ops.concat([[n], batch_shape, event_shape], 0) # Complexity: O(nbk**2) x = random_ops.random_normal(shape=shape, mean=0., stddev=1., dtype=self.dtype, seed=seed) # Complexity: O(nbk) # This parametrization is equivalent to Chi2, i.e., # ChiSquared(k) == Gamma(alpha=k/2, beta=1/2) expanded_df = self.df * array_ops.ones( self.scale_operator.batch_shape_tensor(), dtype=self.df.dtype.base_dtype) g = random_ops.random_gamma(shape=[n], alpha=self._multi_gamma_sequence( 0.5 * expanded_df, self.dimension), beta=0.5, dtype=self.dtype, seed=distribution_util.gen_new_seed( seed, "wishart")) # Complexity: O(nbk**2) x = array_ops.matrix_band_part(x, -1, 0) # Tri-lower. # Complexity: O(nbk) x = array_ops.matrix_set_diag(x, math_ops.sqrt(g)) # Make batch-op ready. # Complexity: O(nbk**2) perm = array_ops.concat([math_ops.range(1, ndims), [0]], 0) x = array_ops.transpose(x, perm) shape = array_ops.concat([batch_shape, [event_shape[0]], [-1]], 0) x = array_ops.reshape(x, shape) # Complexity: O(nbM) where M is the complexity of the operator solving a # vector system. E.g., for LinearOperatorDiag, each matmul is O(k**2), so # this complexity is O(nbk**2). For LinearOperatorLowerTriangular, # each matmul is O(k^3) so this step has complexity O(nbk^3). x = self.scale_operator.matmul(x) # Undo make batch-op ready. # Complexity: O(nbk**2) shape = array_ops.concat([batch_shape, event_shape, [n]], 0) x = array_ops.reshape(x, shape) perm = array_ops.concat([[ndims - 1], math_ops.range(0, ndims - 1)], 0) x = array_ops.transpose(x, perm) if not self.cholesky_input_output_matrices: # Complexity: O(nbk^3) x = math_ops.matmul(x, x, adjoint_b=True) return x
def cudnn_lstm(inputs, input_h, input_c, kernel, recurrent_kernel, bias, units): inputs = array_ops.transpose(inputs, perm=(1, 0, 2)) input_h = array_ops.expand_dims(input_h, axis=0) input_c = array_ops.expand_dims(input_c, axis=0) params = _canonical_to_params( weights=[ kernel[:, :units], kernel[:, units:units * 2], kernel[:, units * 2:units * 3], kernel[:, units * 3:], recurrent_kernel[:, :units], recurrent_kernel[:, units:units * 2], recurrent_kernel[:, units * 2:units * 3], recurrent_kernel[:, units * 3:], ], biases=[ bias[:units], bias[units:units * 2], bias[units * 2:units * 3], bias[units * 3:units * 4], bias[units * 4:units * 5], bias[units * 5:units * 6], bias[units * 6:units * 7], bias[units * 7:], ], shape=constant_op.constant([-1])) outputs, h, c, _ = gen_cudnn_rnn_ops.cudnn_rnn( inputs, input_h=input_h, input_c=input_c, params=params) outputs = array_ops.transpose(outputs, perm=[1, 0, 2]) h = h[0] c = c[0] return outputs, [h, c], constant_op.constant( 'cudnn', dtype=dtypes.string, name='runtime')
def _ExtractImagePatchesGrad(op, grad): batch_size, rows_in, cols_in, channels = [ dim.value for dim in op.inputs[0].shape.dims ] input_bhwc = array_ops.shape(op.inputs[0]) batch_size = input_bhwc[0] channels = input_bhwc[3] # Create indices matrix for input tensor. # Note that 0 is preserved for padding location, # so indices for input start from 1 to 1 + rows_in * cols_in. input_indices_num = 1 + rows_in * cols_in input_idx = array_ops.reshape(math_ops.range(1, input_indices_num, dtype=ops.dtypes.int64), (1, rows_in, cols_in, 1)) input_idx_patched = gen_array_ops.extract_image_patches( input_idx, op.get_attr("ksizes"), op.get_attr("strides"), op.get_attr("rates"), op.get_attr("padding")) # Create indices matrix for output tensor. _, rows_out, cols_out, _ = [dim.value for dim in op.outputs[0].shape.dims] _, ksize_r, ksize_c, _ = op.get_attr("ksizes") # Indices for output start from 0. output_indices_num = rows_out * cols_out * ksize_r * ksize_c output_idx = array_ops.reshape(math_ops.range(output_indices_num, dtype=ops.dtypes.int64), (1, rows_out, cols_out, ksize_r * ksize_c)) # Construct mapping table for indices: (input -> output). idx_matrix = array_ops.concat( [array_ops.expand_dims(input_idx_patched, axis=-1), array_ops.expand_dims(output_idx, axis=-1)], axis=-1) idx_map = array_ops.reshape(idx_matrix, (-1, 2)) sp_shape = (input_indices_num, output_indices_num) sp_mat_full = sparse_tensor.SparseTensor( idx_map, array_ops.ones([output_indices_num], dtype=grad.dtype), sp_shape) # Remove all padding locations [0, :]. sp_mat = sparse_ops.sparse_slice(sp_mat_full, (1, 0), (input_indices_num - 1, output_indices_num)) grad_expanded = array_ops.transpose( array_ops.reshape( grad, (batch_size, rows_out, cols_out, ksize_r, ksize_c, channels)), (1, 2, 3, 4, 0, 5)) grad_flat = array_ops.reshape(grad_expanded, (-1, batch_size * channels)) jac = sparse_ops.sparse_tensor_dense_matmul(sp_mat, grad_flat) grad_out = array_ops.reshape(jac, (rows_in, cols_in, batch_size, channels)) grad_out = array_ops.transpose(grad_out, (2, 0, 1, 3)) return [grad_out]
def make_inverse_update_ops(self): """Create and return update ops corresponding to registered computations.""" ops = [] num_inverses = len(self._inverses_by_damping) matrix_power_registered = bool(self._matpower_by_exp_and_damping) use_eig = ( self._eigendecomp or matrix_power_registered or num_inverses >= EIGENVALUE_DECOMPOSITION_THRESHOLD) if use_eig: self.register_eigendecomp() # ensures self._eigendecomp is set eigenvalues, eigenvectors = self._eigendecomp # pylint: disable=unpacking-non-sequence for damping, inv in self._inverses_by_damping.items(): ops.append( inv.assign( math_ops.matmul(eigenvectors / (eigenvalues + damping), array_ops.transpose(eigenvectors)))) for (exp, damping), matpower in self._matpower_by_exp_and_damping.items(): ops.append( matpower.assign( math_ops.matmul(eigenvectors * (eigenvalues + damping)**exp, array_ops.transpose(eigenvectors)))) # These ops share computation and should be run on a single device. ops = [control_flow_ops.group(*ops)] else: for damping, inv in self._inverses_by_damping.items(): ops.append(inv.assign(utils.posdef_inv(self._cov, damping))) return ops
def make_inverse_update_ops(self): """Create and return update ops corresponding to registered computations.""" ops = super(InverseProvidingFactor, self).make_inverse_update_ops() num_inverses = len(self._inverses_by_damping) matrix_power_registered = bool(self._matpower_by_exp_and_damping) use_eig = (self._eigendecomp or matrix_power_registered or num_inverses >= EIGENVALUE_DECOMPOSITION_THRESHOLD) if use_eig: self.register_eigendecomp() # ensures self._eigendecomp is set eigenvalues, eigenvectors = self._eigendecomp # pylint: disable=unpacking-non-sequence # The matrix self._cov is positive semidefinite by construction, but the # numerical eigenvalues could be negative due to numerical errors, so here # we clip them to be at least EIGENVALUE_CLIPPING_THRESHOLD. clipped_eigenvalues = math_ops.maximum(eigenvalues, EIGENVALUE_CLIPPING_THRESHOLD) for damping, inv in self._inverses_by_damping.items(): ops.append( inv.assign( math_ops.matmul(eigenvectors / (clipped_eigenvalues + damping), array_ops.transpose(eigenvectors)))) for (exp, damping), matpower in self._matpower_by_exp_and_damping.items(): ops.append( matpower.assign( math_ops.matmul(eigenvectors * (clipped_eigenvalues + damping)** exp, array_ops.transpose(eigenvectors)))) else: for damping, inv in self._inverses_by_damping.items(): ops.append(inv.assign(utils.posdef_inv(self._cov, damping))) return ops
def functional_rnn(cell, inputs, sequence_length=None, initial_state=None, dtype=None, time_major=False, scope=None, use_tpu=False): """Same interface as `tf.nn.dynamic_rnn`.""" with variable_scope.variable_scope(scope or 'rnn'): if not time_major: inputs = nest.map_structure( lambda t: array_ops.transpose(t, [1, 0, 2]), inputs) inputs_flat = nest.flatten(inputs) batch_size = array_ops.shape(inputs_flat[0])[1] if initial_state is None: initial_state = cell.zero_state(batch_size, dtype) func_cell = _FunctionalRnnCell(cell, inputs, initial_state) if sequence_length is not None: max_length = math_ops.reduce_max(sequence_length) else: max_length = None extended_acc_state, extended_final_state = recurrent.Recurrent( theta=func_cell.theta, state0=func_cell.extended_initial_state, inputs=inputs, cell_fn=func_cell.cell_step, max_input_length=max_length, use_tpu=use_tpu) tf_output, tf_state = _PostProcessOutput( extended_acc_state, extended_final_state, func_cell, inputs_flat[0].shape[0], sequence_length) if time_major: tf_output = array_ops.transpose(tf_output, [1, 0, 2]) return tf_output, tf_state
def runFiniteDifferences(self, shapes, dtypes=(dtypes_lib.float32, dtypes_lib.float64), scalarTest=False): with self.test_session(use_gpu=False): for shape in shapes: for batch in False, True: for dtype in dtypes: if not scalarTest: x = constant_op.constant(np.random.randn(shape[0], shape[1]), dtype) tensor = math_ops.matmul(x, array_ops.transpose(x)) / shape[0] else: # This is designed to be a faster test for larger matrices. x = constant_op.constant(np.random.randn(), dtype) R = constant_op.constant(np.random.randn(shape[0], shape[1]), dtype) e = math_ops.mul(R, x) tensor = math_ops.matmul(e, array_ops.transpose(e)) / shape[0] # Inner-most matrices in tensor are positive definite. if batch: tensor = array_ops.tile(array_ops.expand_dims(tensor, 0), [4, 1, 1]) y = linalg_ops.cholesky(tensor) if scalarTest: y = math_ops.reduce_mean(y) error = gradient_checker.compute_gradient_error(x, x._shape_as_list(), y, y._shape_as_list()) tf_logging.info("error = %f", error) if dtype == dtypes_lib.float64: self.assertLess(error, 1e-5) else: self.assertLess(error, 3e-3)
def build_graph(device, input_shape, perm, datatype, num_iters): """builds a graph containing a sequence of conv2d operations. Args: device: String, the device to run on. input_shape: Shape of the input tensor. perm: A list of ints with the same length as input tensor's dimension. datatype: numpy data type of the input tensor. num_iters: number of iterations to run transpose. Returns: An array of tensors to run() """ with ops.device("/%s:0" % device): total_size = np.prod(input_shape) inp = np.arange(1, total_size + 1, dtype=datatype).reshape(input_shape) t = constant_op.constant(inp, shape=input_shape) outputs = [] transpose_op = array_ops.transpose(t, perm) outputs.append(transpose_op) for _ in range(1, num_iters): with ops.control_dependencies([transpose_op]): transpose_op = array_ops.transpose(t, perm) outputs.append(transpose_op) return control_flow_ops.group(*outputs)
def gather_tree_from_array(t, parent_ids, sequence_length): """Calculates the full beams for `TensorArray`s. Args: t: A stacked `TensorArray` of size `max_time` that contains `Tensor`s of shape `[batch_size, beam_width, s]` or `[batch_size * beam_width, s]` where `s` is the depth shape. parent_ids: The parent ids of shape `[max_time, batch_size, beam_width]`. sequence_length: The sequence length of shape `[batch_size, beam_width]`. Returns: A `Tensor` which is a stacked `TensorArray` of the same size and type as `t` and where beams are sorted in each `Tensor` according to `parent_ids`. """ max_time = parent_ids.shape[0].value or array_ops.shape(parent_ids)[0] batch_size = parent_ids.shape[1].value or array_ops.shape(parent_ids)[1] beam_width = parent_ids.shape[2].value or array_ops.shape(parent_ids)[2] # Generate beam ids that will be reordered by gather_tree. beam_ids = array_ops.expand_dims( array_ops.expand_dims(math_ops.range(beam_width), 0), 0) beam_ids = array_ops.tile(beam_ids, [max_time, batch_size, 1]) mask = array_ops.sequence_mask( sequence_length, maxlen=max_time, dtype=dtypes.int32) mask = array_ops.transpose(mask, perm=[2, 0, 1]) # Use beam_width + 1 to mark the end of beam. masked_beam_ids = (beam_ids * mask) + (1 - mask) * (beam_width + 1) max_sequence_lengths = math_ops.to_int32( math_ops.reduce_max(sequence_length, axis=1)) sorted_beam_ids = beam_search_ops.gather_tree( step_ids=masked_beam_ids, parent_ids=parent_ids, max_sequence_lengths=max_sequence_lengths, end_token=beam_width + 1) # For out of range steps, simply copy the same beam. sorted_beam_ids = array_ops.where( math_ops.cast(mask, dtypes.bool), x=sorted_beam_ids, y=beam_ids) # Generate indices for gather_nd. time_ind = array_ops.tile(array_ops.reshape( math_ops.range(max_time), [-1, 1, 1]), [1, batch_size, beam_width]) batch_ind = array_ops.tile(array_ops.reshape( math_ops.range(batch_size), [-1, 1, 1]), [1, max_time, beam_width]) batch_ind = array_ops.transpose(batch_ind, perm=[1, 0, 2]) indices = array_ops.stack([time_ind, batch_ind, sorted_beam_ids], -1) # Gather from a tensor with collapsed additional dimensions. gather_from = t final_shape = array_ops.shape(gather_from) gather_from = array_ops.reshape( gather_from, [max_time, batch_size, beam_width, -1]) ordered = array_ops.gather_nd(gather_from, indices) ordered = array_ops.reshape(ordered, final_shape) return ordered
def testError(self): with self.assertRaises(ValueError): array_ops.transpose( np.arange(0., 30).reshape([2, 3, 5]), [[0, 1], [2, 3]]) with self.assertRaises(ValueError): array_ops.transpose(np.arange(0., 30).reshape([2, 3, 5]), [0, 1, 3]) self._testError( np.arange(0., 30).reshape([2, 3, 5]), [0, 1, 1], "2 is missing")
def _GatherV2Grad(op, grad): """Gradient for GatherV2 op.""" # params can be large, so colocate the shape calculation with it. # # params can be very large for sparse model, array_ops.shape raises # exception on the Windows platform when any dimension is larger than # int32. params_shape is not used in optimizer apply_sparse gradients, # so it's fine to convert it back to int32 regardless of truncation. params = op.inputs[0] with ops.colocate_with(params): params_shape = array_ops.shape(params, out_type=ops.dtypes.int64) params_shape = math_ops.to_int32(params_shape) indices = op.inputs[1] indices_size = array_ops.expand_dims(array_ops.size(indices), 0) axis = op.inputs[2] axis_static = tensor_util.constant_value(axis) # For axis 0 gathers, build an appropriately shaped IndexedSlices. if axis_static == 0: values_shape = array_ops.concat([indices_size, params_shape[1:]], 0) values = array_ops.reshape(grad, values_shape) indices = array_ops.reshape(indices, indices_size) return [ops.IndexedSlices(values, indices, params_shape), None, None] outer_shape = params_shape[:axis] outer_dims = array_ops.size(outer_shape) inner_shape = params_shape[axis:][1:] inner_dims = array_ops.size(inner_shape) outer_axes_indices = math_ops.range(outer_dims) inner_axes_indices = math_ops.range(outer_dims + 1, outer_dims + 1 + inner_dims) values_shape = array_ops.concat([outer_shape, indices_size, inner_shape], 0) values = array_ops.reshape(grad, values_shape) indices = array_ops.reshape(indices, indices_size) # We need to sum up every slice `values[..., i, ....]` corresponding to # `params[..., indices[i], ...]`. Since `unsorted_segment_sum` does not # support an axis parameter, we transpose the gather dimension to the front, # then use `unsorted_segment_sum` to build a # [gather_axis, outer_axes, inner_axes] tensor with all the gradients # affecting each index in `gather_axis` summed up. transpose_dims = array_ops.concat( [[outer_dims], outer_axes_indices, inner_axes_indices], 0) values_transpose = array_ops.transpose(values, transpose_dims) num_segments = params_shape[axis] params_grad = math_ops.unsorted_segment_sum( values_transpose, indices, num_segments) # Inverts the above transpose by moving dimension 0 back to its original # position. invert_transpose_dims = array_ops.concat( [outer_axes_indices + 1, [0], inner_axes_indices], 0) params_grad = array_ops.transpose(params_grad, invert_transpose_dims) return [params_grad, None, None]
def set_model(self, model): """Sets Keras model and creates summary ops.""" self.model = model self.sess = K.get_session() # only make histogram summary op if it hasn't already been made if self.histogram_freq and self.merged is None: for layer in self.model.layers: for weight in layer.weights: mapped_weight_name = weight.name.replace(':', '_') tf_summary.histogram(mapped_weight_name, weight) if self.write_images: w_img = array_ops.squeeze(weight) shape = K.int_shape(w_img) if len(shape) == 2: # dense layer kernel case if shape[0] > shape[1]: w_img = array_ops.transpose(w_img) shape = K.int_shape(w_img) w_img = array_ops.reshape(w_img, [1, shape[0], shape[1], 1]) elif len(shape) == 3: # convnet case if K.image_data_format() == 'channels_last': # switch to channels_first to display # every kernel as a separate image w_img = array_ops.transpose(w_img, perm=[2, 0, 1]) shape = K.int_shape(w_img) w_img = array_ops.reshape(w_img, [shape[0], shape[1], shape[2], 1]) elif len(shape) == 1: # bias case w_img = array_ops.reshape(w_img, [1, shape[0], 1, 1]) else: # not possible to handle 3D convnets etc. continue shape = K.int_shape(w_img) assert len(shape) == 4 and shape[-1] in [1, 3, 4] tf_summary.image(mapped_weight_name, w_img) if self.write_grads: for weight in layer.trainable_weights: mapped_weight_name = weight.name.replace(':', '_') grads = model.optimizer.get_gradients(model.total_loss, weight) def is_indexed_slices(grad): return type(grad).__name__ == 'IndexedSlices' grads = [grad.values if is_indexed_slices(grad) else grad for grad in grads] tf_summary.histogram('{}_grad'.format(mapped_weight_name), grads) if hasattr(layer, 'output'): tf_summary.histogram('{}_out'.format(layer.name), layer.output) self.merged = tf_summary.merge_all() if self.write_graph: self.writer = self._writer_class(self.log_dir, self.sess.graph) else: self.writer = self._writer_class(self.log_dir)
def _sample_n(self, n, seed): batch_shape = self.batch_shape() event_shape = self.event_shape() batch_ndims = array_ops.shape(batch_shape)[0] ndims = batch_ndims + 3 # sample_ndims=1, event_ndims=2 shape = array_ops.concat(((n,), batch_shape, event_shape), 0) # Complexity: O(nbk^2) x = random_ops.random_normal(shape=shape, mean=0., stddev=1., dtype=self.dtype, seed=seed) # Complexity: O(nbk) # This parametrization is equivalent to Chi2, i.e., # ChiSquared(k) == Gamma(alpha=k/2, beta=1/2) g = random_ops.random_gamma(shape=(n,), alpha=self._multi_gamma_sequence( 0.5 * self.df, self.dimension), beta=0.5, dtype=self.dtype, seed=distribution_util.gen_new_seed( seed, "wishart")) # Complexity: O(nbk^2) x = array_ops.matrix_band_part(x, -1, 0) # Tri-lower. # Complexity: O(nbk) x = array_ops.matrix_set_diag(x, math_ops.sqrt(g)) # Make batch-op ready. # Complexity: O(nbk^2) perm = array_ops.concat((math_ops.range(1, ndims), (0,)), 0) x = array_ops.transpose(x, perm) shape = array_ops.concat((batch_shape, (event_shape[0], -1)), 0) x = array_ops.reshape(x, shape) # Complexity: O(nbM) where M is the complexity of the operator solving a # vector system. E.g., for OperatorPDDiag, each matmul is O(k^2), so # this complexity is O(nbk^2). For OperatorPDCholesky, each matmul is # O(k^3) so this step has complexity O(nbk^3). x = self.scale_operator_pd.sqrt_matmul(x) # Undo make batch-op ready. # Complexity: O(nbk^2) shape = array_ops.concat((batch_shape, event_shape, (n,)), 0) x = array_ops.reshape(x, shape) perm = array_ops.concat(((ndims - 1,), math_ops.range(0, ndims - 1)), 0) x = array_ops.transpose(x, perm) if not self.cholesky_input_output_matrices: # Complexity: O(nbk^3) x = math_ops.matmul(x, x, adjoint_b=True) return x
def _FusedBatchNormGrad(op, *grad): """Return the gradients for the 3 inputs of BatchNorm. Args: op: The BatchNormOp for which we need to compute gradients. *grad: An argument list for tensors of gradients wrt the outputs with grad[0] as grad_y. Returns: grad_x: gradient for x, which is scale * rsqrt(variance + epsilon) * [grad_y - mean(grad_y) - (x - mean(x)) * mean(grad_y * (x - mean(x))) / (variance + epsilon)] in training mode; grad_y * scale * rsqrt(pop_variance + epsilon) in freeze mode. grad_scale: gradient for scale, which is sum(grad_y * (x - mean(x)) * rsqrt(variance + epsilon)) in training mode; sum(grad_y * (x - pop_mean) * rsqrt(pop_variance + epsilon)) in freeze mode. grad_offset: gradient for offset, which is sum(grad_y) in training mode; sum(grad_y) in freeze mode. """ x = op.inputs[0] grad_y = grad[0] scale = op.inputs[1] epsilon = op.get_attr("epsilon") data_format = op.get_attr("data_format") is_training = op.get_attr("is_training") if is_training: return gen_nn_ops.fused_batch_norm_grad( grad_y, x, scale, op.outputs[3], op.outputs[4], epsilon=epsilon, data_format=data_format, is_training=is_training) else: pop_mean = op.inputs[3] pop_var = op.inputs[4] if data_format == b"NCHW": x = array_ops.transpose(x, [0, 2, 3, 1]) grad_y = array_ops.transpose(grad_y, [0, 2, 3, 1]) dx, dscale, doffset, _, _ = gen_nn_ops.fused_batch_norm_grad( grad_y, x, scale, pop_mean, pop_var, epsilon=epsilon, data_format='NHWC', is_training=is_training) if data_format == b"NCHW": dx = array_ops.transpose(dx, [0, 3, 1, 2]) return dx, dscale, doffset, None, None
def spatial_conv(batch, gain): s = array_ops.shape(batch) padded = array_ops.pad(batch, [[0, 0], [2, 2], [2, 2], [0, 0]], 'REFLECT') xt = array_ops.transpose(padded, [0, 3, 1, 2]) xt = array_ops.reshape(xt, [s[0] * s[3], s[1] + 4, s[2] + 4, 1]) conv_out = nn_ops.conv2d(xt, gaussian_filter * gain, [1] * 4, 'VALID') conv_xt = array_ops.reshape(conv_out, [s[0], s[3], s[1], s[2]]) conv_xt = array_ops.transpose(conv_xt, [0, 2, 3, 1]) return conv_xt
def _SparseTensorDenseMatMulGrad(op, grad): """Gradients for the dense tensor in the SparseTensorDenseMatMul op. If either input is complex, no gradient is provided. Args: op: the SparseTensorDenseMatMul op grad: the incoming gradient Returns: Gradient for each of the 4 input tensors: (sparse_indices, sparse_values, sparse_shape, dense_tensor) The gradients for indices and shape are None. Raises: TypeError: When the two operands don't have the same type. """ sp_t = ops.SparseTensor(*op.inputs[:3]) adj_a = op.get_attr("adjoint_a") adj_b = op.get_attr("adjoint_b") a_type = sp_t.values.dtype.base_dtype b_type = op.inputs[3].dtype.base_dtype if a_type != b_type: raise TypeError("SparseTensorDenseMatMul op received operands with " "different types: ", a_type, " and ", b_type) is_complex = a_type == ops.dtypes.complex64 if is_complex: raise NotImplementedError("SparseTensorDenseMatMul op does not support " "complex gradients.") # gradient w.r.t. dense b_grad = sparse_ops.sparse_tensor_dense_matmul(sp_t, grad, adjoint_a=not adj_a) if adj_b: b_grad = array_ops.transpose(b_grad) # gradient w.r.t. sparse values a_indices = op.inputs[0] b = op.inputs[3] rows = a_indices[:, 0] cols = a_indices[:, 1] # TODO(zongheng, ebrevdo): add conjugates in the right places when complex # values are allowed. # TODO(zongheng): these gather calls could potentially duplicate rows/cols in # memory. If there is a need, we should look into implementing this more # intelligently to avoid duplicating data. parts_a = array_ops.gather(grad, rows if not adj_a else cols) parts_b = array_ops.gather(b if not adj_b else array_ops.transpose(b), cols if not adj_a else rows) a_values_grad = math_ops.reduce_sum(parts_a * parts_b, reduction_indices=1) # gradients w.r.t. (a_indices, a_values, a_shape, b) return (None, a_values_grad, None, b_grad)
def _inference_ref(self, x, scale, offset, mean, var, epsilon, data_format): if data_format not in ['NHWC', 'NCHW']: raise ValueError('data_format must be NCHW or NHWC, ' 'got %s.' % data_format) if data_format == 'NCHW': x = array_ops.transpose(x, [0, 2, 3, 1]) y = self._batch_norm(x, mean, var, offset, scale, epsilon) if data_format == 'NCHW': y = array_ops.transpose(y, [0, 3, 1, 2]) return y.eval()
def _my_matmul_grad(op, dl_dc): a = op.inputs[0] b = op.inputs[1] #dl_da = math_ops.matmul( dl_dc, array_ops.transpose(b, [1,0])) #dl_db = math_ops.matmul( array_ops.transpose(a, [1,0]), dl_dc ) dl_da = my_matmul( dl_dc, array_ops.transpose(b, [1,0])) dl_db = my_matmul( array_ops.transpose(a, [1,0]), dl_dc ) return dl_da, dl_db
def _cudnn_to_tf_weights(self, *cu_weights): r"""Stitching cudnn canonical weights to generate tf canonical weights.""" w_i, w_r, w_h, r_i, r_r, r_h = cu_weights # pylint: disable=invalid-name W_i = array_ops.concat([w_i, r_i], axis=1) W_r = array_ops.concat([w_r, r_r], axis=1) # pylint: enable=invalid-name return (array_ops.transpose(array_ops.concat([W_i, W_r], axis=0)), array_ops.transpose(w_h), array_ops.transpose(r_h))
def test4DGPU(self): # If no GPU available, skip the test if not test.is_gpu_available(cuda_only=True): return large_shapes = [[4, 10, 10, 3], [4, 10, 10, 8], [4, 10, 10, 13], [4, 3, 10, 10], [4, 8, 10, 10], [4, 13, 10, 10]] * 3 perms = [[0, 3, 1, 2]] * 3 + [[0, 2, 3, 1]] * 3 + [[3, 1, 2, 0]] * 6 + [[ 1, 2, 3, 0 ]] * 3 + [[2, 3, 0, 1]] * 3 for input_shape, perm in zip(large_shapes, perms): total_size = np.prod(input_shape) inp = np.arange(1, total_size + 1, dtype=np.float32).reshape(input_shape) np_ans = self._np_transpose(inp, perm) with self.test_session(use_gpu=True): inx = ops.convert_to_tensor(inp) y = array_ops.transpose(inx, perm) tf_ans = y.eval() self.assertAllEqual(np_ans, tf_ans) self.assertShapeEqual(np_ans, y) # shapes related to Inception (taken from conv_ops_test.py) inception_shapes = [[4, 5, 5, 124], [4, 8, 8, 38], [4, 8, 8, 38], [ 4, 8, 8, 204 ], [4, 8, 8, 44], [4, 8, 8, 204], [4, 8, 8, 204], [4, 8, 8, 204], [ 4, 8, 8, 176 ], [4, 8, 8, 176], [4, 8, 8, 176], [4, 8, 8, 176], [4, 17, 17, 19], [ 4, 17, 17, 19 ], [4, 17, 17, 124], [4, 17, 17, 12], [4, 17, 17, 124], [4, 17, 17, 22], [ 4, 17, 17, 19 ], [4, 17, 17, 19], [4, 17, 17, 121], [4, 17, 17, 121], [4, 17, 17, 22], [ 4, 17, 17, 19 ], [4, 17, 17, 19], [4, 17, 17, 115], [4, 17, 17, 115], [4, 17, 17, 19], [ 4, 17, 17, 16 ], [4, 17, 17, 115], [4, 17, 17, 102], [4, 17, 17, 12], [4, 17, 17, 102], [ 4, 17, 17, 12 ], [4, 17, 17, 102], [4, 17, 17, 12], [4, 17, 17, 76], [4, 17, 17, 12], [ 4, 17, 17, 12 ], [4, 17, 17, 76], [4, 17, 17, 76], [4, 35, 35, 9], [4, 35, 35, 28], [ 4, 35, 35, 6 ], [4, 35, 35, 28], [4, 35, 35, 25], [4, 35, 35, 4], [4, 35, 35, 25], [4, 35, 35, 9], [4, 35, 35, 19], [4, 35, 35, 19], [4, 35, 35, 19], [4, 73, 73, 6], [4, 73, 73, 6], [4, 147, 147, 2]] for input_shape in inception_shapes: perm = [0, 3, 1, 2] total_size = np.prod(input_shape) inp = np.arange(1, total_size + 1, dtype=np.float32).reshape(input_shape) np_ans = self._np_transpose(inp, perm) with self.test_session(use_gpu=True): inx = ops.convert_to_tensor(inp) y = array_ops.transpose(inx, perm) tf_ans = y.eval() self.assertAllEqual(np_ans, tf_ans) self.assertShapeEqual(np_ans, y)
def testSpaceToDepthTranspose(self): x = np.arange(5 * 10 * 16 * 7, dtype=np.float32).reshape([5, 10, 16, 7]) block_size = 2 paddings = np.zeros((2, 2), dtype=np.int32) y1 = self.space_to_batch(x, paddings, block_size=block_size) y2 = array_ops.transpose( array_ops.space_to_depth( array_ops.transpose(x, [3, 1, 2, 0]), block_size=block_size), [3, 1, 2, 0]) with self.test_session(use_gpu=True): self.assertAllEqual(y1.eval(), y2.eval())
def _training_ref(self, x, scale, offset, epsilon, data_format): if data_format not in ['NHWC', 'NCHW']: raise ValueError('data_format must be NCHW or NHWC, ' 'got %s.' % data_format) if data_format == 'NCHW': x = array_ops.transpose(x, [0, 2, 3, 1]) mean, var = nn_impl.moments(x, [0, 1, 2], keep_dims=False) y = nn_impl.batch_normalization(x, mean, var, offset, scale, epsilon) if data_format == 'NCHW': y = array_ops.transpose(y, [0, 3, 1, 2]) return y.eval(), mean.eval(), var.eval()
def run_fn(): with lsgt.LossScaleGradientTape(loss_scale) as g: y = array_ops.transpose(x) * 2. return g.gradient(y, x)
def dynamic_rnn(cell, inputs, sequence_length=None, initial_state=None, dtype=None, parallel_iterations=None, swap_memory=False, time_major=False, scope=None): """Creates a recurrent neural network specified by RNNCell `cell`. This function is functionally identical to the function `rnn` above, but performs fully dynamic unrolling of `inputs`. Unlike `rnn`, the input `inputs` is not a Python list of `Tensors`, one for each frame. Instead, `inputs` may be a single `Tensor` where the maximum time is either the first or second dimension (see the parameter `time_major`). Alternatively, it may be a (possibly nested) tuple of Tensors, each of them having matching batch and time dimensions. The corresponding output is either a single `Tensor` having the same number of time steps and batch size, or a (possibly nested) tuple of such tensors, matching the nested structure of `cell.output_size`. The parameter `sequence_length` is optional and is used to copy-through state and zero-out outputs when past a batch element's sequence length. So it's more for correctness than performance, unlike in rnn(). Args: cell: An instance of RNNCell. inputs: The RNN inputs. If `time_major == False` (default), this must be a `Tensor` of shape: `[batch_size, max_time, ...]`, or a nested tuple of such elements. If `time_major == True`, this must be a `Tensor` of shape: `[max_time, batch_size, ...]`, or a nested tuple of such elements. This may also be a (possibly nested) tuple of Tensors satisfying this property. The first two dimensions must match across all the inputs, but otherwise the ranks and other shape components may differ. In this case, input to `cell` at each time-step will replicate the structure of these tuples, except for the time dimension (from which the time is taken). The input to `cell` at each time step will be a `Tensor` or (possibly nested) tuple of Tensors each with dimensions `[batch_size, ...]`. sequence_length: (optional) An int32/int64 vector sized `[batch_size]`. initial_state: (optional) An initial state for the RNN. If `cell.state_size` is an integer, this must be a `Tensor` of appropriate type and shape `[batch_size, cell.state_size]`. If `cell.state_size` is a tuple, this should be a tuple of tensors having shapes `[batch_size, s] for s in cell.state_size`. dtype: (optional) The data type for the initial state and expected output. Required if initial_state is not provided or RNN state has a heterogeneous dtype. parallel_iterations: (Default: 32). The number of iterations to run in parallel. Those operations which do not have any temporal dependency and can be run in parallel, will be. This parameter trades off time for space. Values >> 1 use more memory but take less time, while smaller values use less memory but computations take longer. swap_memory: Transparently swap the tensors produced in forward inference but needed for back prop from GPU to CPU. This allows training RNNs which would typically not fit on a single GPU, with very minimal (or no) performance penalty. time_major: The shape format of the `inputs` and `outputs` Tensors. If true, these `Tensors` must be shaped `[max_time, batch_size, depth]`. If false, these `Tensors` must be shaped `[batch_size, max_time, depth]`. Using `time_major = True` is a bit more efficient because it avoids transposes at the beginning and end of the RNN calculation. However, most TensorFlow data is batch-major, so by default this function accepts input and emits output in batch-major form. scope: VariableScope for the created subgraph; defaults to "RNN". Returns: A pair (outputs, state) where: outputs: The RNN output `Tensor`. If time_major == False (default), this will be a `Tensor` shaped: `[batch_size, max_time, cell.output_size]`. If time_major == True, this will be a `Tensor` shaped: `[max_time, batch_size, cell.output_size]`. Note, if `cell.output_size` is a (possibly nested) tuple of integers or `TensorShape` objects, then `outputs` will be a tuple having the same structure as `cell.output_size`, containing Tensors having shapes corresponding to the shape data in `cell.output_size`. state: The final state. If `cell.state_size` is an int, this will be shaped `[batch_size, cell.state_size]`. If it is a `TensorShape`, this will be shaped `[batch_size] + cell.state_size`. If it is a (possibly nested) tuple of ints or `TensorShape`, this will be a tuple having the corresponding shapes. Raises: TypeError: If `cell` is not an instance of RNNCell. ValueError: If inputs is None or an empty list. """ if not isinstance(cell, rnn_cell.RNNCell): raise TypeError("cell must be an instance of RNNCell") # By default, time_major==False and inputs are batch-major: shaped # [batch, time, depth] # For internal calculations, we transpose to [time, batch, depth] flat_input = nest.flatten(inputs) if not time_major: # (B,T,D) => (T,B,D) flat_input = tuple( array_ops.transpose(input_, [1, 0, 2]) for input_ in flat_input) parallel_iterations = parallel_iterations or 32 if sequence_length is not None: sequence_length = math_ops.to_int32(sequence_length) if sequence_length.get_shape().ndims not in (None, 1): raise ValueError( "sequence_length must be a vector of length batch_size, " "but saw shape: %s" % sequence_length.get_shape()) sequence_length = array_ops.identity( # Just to find it in the graph. sequence_length, name="sequence_length") # Create a new scope in which the caching device is either # determined by the parent scope, or is set to place the cached # Variable using the same placement as for the rest of the RNN. with vs.variable_scope(scope or "RNN") as varscope: if varscope.caching_device is None: varscope.set_caching_device(lambda op: op.device) input_shape = tuple(array_ops.shape(input_) for input_ in flat_input) batch_size = input_shape[0][1] for input_ in input_shape: if input_[1].get_shape() != batch_size.get_shape(): raise ValueError("All inputs should have the same batch size") if initial_state is not None: state = initial_state else: if not dtype: raise ValueError( "If no initial_state is provided, dtype must be.") state = cell.zero_state(batch_size, dtype) def _assert_has_shape(x, shape): x_shape = array_ops.shape(x) packed_shape = array_ops.pack(shape) return control_flow_ops.Assert( math_ops.reduce_all(math_ops.equal(x_shape, packed_shape)), [ "Expected shape for Tensor %s is " % x.name, packed_shape, " but saw shape: ", x_shape ]) if sequence_length is not None: # Perform some shape validation with ops.control_dependencies( [_assert_has_shape(sequence_length, [batch_size])]): sequence_length = array_ops.identity(sequence_length, name="CheckSeqLen") inputs = nest.pack_sequence_as(structure=inputs, flat_sequence=flat_input) (outputs, final_state) = _dynamic_rnn_loop( cell, inputs, state, parallel_iterations=parallel_iterations, swap_memory=swap_memory, sequence_length=sequence_length, dtype=dtype) # Outputs of _dynamic_rnn_loop are always shaped [time, batch, depth]. # If we are performing batch-major calculations, transpose output back # to shape [batch, time, depth] if not time_major: # (T,B,D) => (B,T,D) flat_output = nest.flatten(outputs) flat_output = [ array_ops.transpose(output, [1, 0, 2]) for output in flat_output ] outputs = nest.pack_sequence_as(structure=outputs, flat_sequence=flat_output) return (outputs, final_state)
def f(x, y): return math_ops.matmul( x, array_ops.reshape(array_ops.transpose(y), [384, 1536]))
def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, stride, padding, data_type, use_gpu, grouped_conv=False, data_format="NHWC"): """Verifies the output values of the convolution function. Args: tensor_in_sizes: Input tensor dimensions in [batch, input_rows, input_cols, input_depth]. filter_in_sizes: Filter tensor dimensions in [filter_rows, filter_cols, input_depth, depth_multiplier]. stride: Stride. padding: Padding type. data_type: The data type to use. use_gpu: Whether to use GPU. grouped_conv: Whether to use cuDNN 7's grouped convolution. data_format: The data_format of the input. "NHWC" or "NCHW". """ input_size = 1 filter_size = 1 for s in tensor_in_sizes: input_size *= s for s in filter_in_sizes: filter_size *= s # Initializes the input and filter tensor with numbers incrementing from 1. x1 = [f * 1.0 / input_size for f in range(1, input_size + 1)] x2 = [f * 1.0 / filter_size for f in range(1, filter_size + 1)] ops.reset_default_graph() graph = ops.get_default_graph() with self.session(graph=graph, use_gpu=use_gpu) as sess: tolerance = { dtypes.float16: 4e-2, dtypes.float32: 1e-5, dtypes.float64: 1e-12, }[data_type] t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=data_type) t1.set_shape(tensor_in_sizes) t2 = constant_op.constant(x2, shape=filter_in_sizes, dtype=data_type) native_t1 = t1 strides = [1, stride, stride, 1] if data_format == "NCHW": # Transpose from NHWC input to NCHW # Ex. [4, 5, 5, 48] to [4, 48, 5, 5] native_t1 = array_ops.transpose(t1, [0, 3, 1, 2]) strides = [1, 1, stride, stride] with sess.graph._kernel_label_map( {"DepthwiseConv2dNative": "cudnn_grouped_convolution"} if grouped_conv else {}): conv_native = nn_ops.depthwise_conv2d_native( native_t1, t2, strides=strides, data_format=data_format, padding=padding) if data_format == "NCHW": # Transpose back from NCHW to NHWC conv_native = array_ops.transpose(conv_native, [0, 2, 3, 1]) try: native_result = self.evaluate(conv_native) except errors.InvalidArgumentError as e: # Grouped convolution kernel is only registered for cuDNN 7. Silently # return when we are running on an earlier version or without GPU. if e.message.startswith( "No OpKernel was registered to support Op 'DepthwiseConv2dNative'" ): tf_logging.warn("Skipping grouped convolution test") return raise e conv_interface = nn_impl.depthwise_conv2d( t1, t2, strides=[1, stride, stride, 1], padding=padding) interface_result = self.evaluate(conv_interface) tf_logging.info( "data_type: %r, use_gpu: %r, grouped_conv: %r, max diff = %f", data_type, use_gpu, grouped_conv, np.amax(np.absolute(native_result - interface_result))) self.assertArrayNear(np.ravel(native_result), np.ravel(interface_result), tolerance) self.assertShapeEqual(native_result, conv_native) self.assertShapeEqual(native_result, conv_interface)
def call(self, inputs): if not isinstance(inputs, (list, tuple)): raise ValueError( 'A merge layer should be called on a list of inputs.') if self._reshape_required: reshaped_inputs = [] input_ndims = list(map(K.ndim, inputs)) if None not in input_ndims: # If ranks of all inputs are available, # we simply expand each of them at axis=1 # until all of them have the same rank. max_ndim = max(input_ndims) for x in inputs: x_ndim = K.ndim(x) for _ in range(max_ndim - x_ndim): x = array_ops.expand_dims(x, axis=1) reshaped_inputs.append(x) return self._merge_function(reshaped_inputs) else: # Transpose all inputs so that batch size is the last dimension. # (batch_size, dim1, dim2, ... ) -> (dim1, dim2, ... , batch_size) transposed = False for x in inputs: x_ndim = K.ndim(x) if x_ndim is None: x_shape = array_ops.shape(x) batch_size = x_shape[0] new_shape = K.concatenate([ x_shape[1:], array_ops.expand_dims(batch_size, axis=-1) ]) x_transposed = array_ops.reshape( x, array_ops.stack([ batch_size, math_ops.reduce_prod(x_shape[1:]) ], axis=0)) x_transposed = array_ops.transpose(x_transposed, perm=(1, 0)) x_transposed = array_ops.reshape( x_transposed, new_shape) reshaped_inputs.append(x_transposed) transposed = True elif x_ndim > 1: dims = list(range(1, x_ndim)) + [0] reshaped_inputs.append( array_ops.transpose(x, perm=dims)) transposed = True else: # We don't transpose inputs if they are 1D vectors or scalars. reshaped_inputs.append(x) y = self._merge_function(reshaped_inputs) y_ndim = K.ndim(y) if transposed: # If inputs have been transposed, we have to transpose the output too. if y_ndim is None: y_shape = array_ops.shape(y) y_ndim = array_ops.shape(y_shape)[0] batch_size = y_shape[y_ndim - 1] new_shape = K.concatenate([ array_ops.expand_dims(batch_size, axis=-1), y_shape[:y_ndim - 1] ]) y = array_ops.reshape(y, (-1, batch_size)) y = array_ops.transpose(y, perm=(1, 0)) y = array_ops.reshape(y, new_shape) elif y_ndim > 1: dims = [y_ndim - 1] + list(range(y_ndim - 1)) y = array_ops.transpose(y, perm=dims) return y else: return self._merge_function(inputs)
def fill_lower_triangular(x, validate_args=False, name="fill_lower_triangular"): """Creates a (batch of) lower triangular matrix from a vector of inputs. If `x.get_shape()` is `[b1, b2, ..., bK, d]` then the output shape is `[b1, b2, ..., bK, n, n]` where `n` is such that `d = n(n+1)/2`, i.e., `n = int(0.5 * (math.sqrt(1. + 8. * d) - 1.))`. Although the non-batch complexity is O(n^2), large constants and sub-optimal vectorization means the complexity of this function is 5x slower than zeroing out the upper triangular, i.e., `tf.matrix_band_part(X, -1, 0)`. This function becomes competitive only when several matmul/cholesky/etc ops can be ellided in constructing the input. Example: wiring a fully connected layer as a covariance matrix; this function reduces the final layer by 2x and possibly reduces the network arch complexity considerably. In most cases it is better to simply build a full matrix and zero out the upper triangular elements, e.g., `tril = tf.matrix_band_part(full, -1, 0)`, rather than directly construct a lower triangular. Example: ```python fill_lower_triangular([1, 2, 3, 4, 5, 6]) # Returns: [[1, 0, 0], # [2, 3, 0], # [4, 5, 6]] ``` For comparison, a pure numpy version of this function can be found in `distribution_util_test.py`, function `_fill_lower_triangular`. Args: x: `Tensor` representing lower triangular elements. validate_args: `Boolean`, default `False`. Whether to ensure the shape of `x` can be mapped to a lower triangular matrix (controls non-static checks only). name: `String`. The name to give this op. Returns: tril: `Tensor` with lower triangular elements filled from `x`. Raises: ValueError: if shape if `x` has static shape which cannot be mapped to a lower triangular matrix. """ # TODO(jvdillon): Replace this code with dedicated op when it exists. with ops.name_scope(name, values=(x, )): x = ops.convert_to_tensor(x, name="x") if (x.get_shape().ndims is not None and x.get_shape()[-1].value is not None): d = x.get_shape()[-1].value # d = n(n+1)/2 implies n is: n = int(0.5 * (math.sqrt(1. + 8. * d) - 1.)) d_inferred = n * (n + 1) / 2 if d != d_inferred: raise ValueError( "Input cannot be mapped to a lower triangular; " "n*(n+1)/2 = %d != %d" % (d_inferred, d)) final_shape = x.get_shape()[:-1].concatenate( tensor_shape.TensorShape([n, n])) else: d = math_ops.cast(array_ops.shape(x)[-1], dtype=dtypes.float32) # d = n(n+1)/2 implies n is: n = math_ops.cast(0.5 * (dtypes.sqrt(1. + 8. * d) - 1.), dtype=dtypes.int32) if validate_args: is_valid_input_shape = check_ops.assert_equal( n * (n + 1) / 2, d, message="Input cannot be mapped to a lower triangular.") n = control_flow_ops.with_dependencies([is_valid_input_shape], n) final_shape = x.get_shape()[:-1].concatenate( tensor_shape.TensorShape([None, None])) def tril_ids(n): """Internal helper to create vector of linear indices into y.""" # Build the ids statically; chose 512 because it implies 1MiB. if not contrib_framework.is_tensor(n) and n <= 512: ids = np.arange(n**2, dtype=np.int32) rows = (ids / n).astype(np.int32) # Implicit floor. # We need to stop incrementing the index when we encounter # upper-triangular elements. The idea here is to compute the # lower-right number of zeros then by "symmetry" subtract this from the # total number of zeros, n(n-1)/2. # Then we note that: n(n-1)/2 - (n-r)*(n-r-1)/2 = r(2n-r-1)/2 offset = (rows * (2 * n - rows - 1) / 2).astype(np.int32) # We could also zero out when (rows < cols) == (rows < ids-n*rows). # mask = (ids <= (n + 1) * rows).astype(np.int32) else: ids = math_ops.range(n**2) rows = math_ops.cast(ids / n, dtype=dtypes.int32) offset = math_ops.cast(rows * (2 * n - rows - 1) / 2, dtype=dtypes.int32) return ids - offset # Special-case non-batch case. if x.get_shape().ndims == 1: y = array_ops.gather(x, array_ops.reshape(tril_ids(n), [n, n])) y = array_ops.matrix_band_part(y, -1, 0) y.set_shape(y.get_shape().merge_with(final_shape)) return y # Make ids for each batch dim. if (x.get_shape().ndims is not None and x.get_shape()[:-1].is_fully_defined()): batch_shape = np.asarray(x.get_shape()[:-1].as_list(), dtype=np.int32) m = np.prod(batch_shape).astype(np.int32) else: batch_shape = array_ops.shape(x)[:-1] m = array_ops.reduce_prod(array_ops.shape(x)[:-1]) batch_ids = math_ops.range(m) # Assemble the tril_ids into batch,tril_id pairs. idx = array_ops.pack([ array_ops.tile(array_ops.expand_dims(batch_ids, 1), [1, n * n]), array_ops.tile(array_ops.expand_dims(tril_ids(n), 0), [m, 1]) ]) idx = array_ops.transpose(idx, [1, 2, 0]) # Gather up, reshape, and return. y = array_ops.reshape(x, [-1, d]) y = array_ops.gather_nd(y, idx) y = array_ops.reshape(y, array_ops.concat(0, [batch_shape, [n, n]])) y = array_ops.matrix_band_part(y, -1, 0) y.set_shape(y.get_shape().merge_with(final_shape)) return y
def get_batch_loss(self, features, mode, state): """Computes predictions and a loss. Args: features: A dictionary (such as is produced by a chunker) with the following key/value pairs (shapes are given as required for training): TrainEvalFeatures.TIMES: A [batch size, self.window_size] integer Tensor with times for each observation. To train on longer sequences, the data should first be chunked. TrainEvalFeatures.VALUES: A [batch size, self.window_size, self.num_features] Tensor with values for each observation. When evaluating, `TIMES` and `VALUES` must have a window size of at least self.window_size, but it may be longer, in which case the last window_size - self.input_window_size times (or fewer if this is not divisible by self.output_window_size) will be evaluated on with non-overlapping output windows (and will have associated predictions). This is primarily to support qualitative evaluation/plotting, and is not a recommended way to compute evaluation losses (since there is no overlap in the output windows, which for window-based models is an undesirable bias). mode: The tf.estimator.ModeKeys mode to use (TRAIN or EVAL). state: Unused Returns: A model.ModelOutputs object. Raises: ValueError: If `mode` is not TRAIN or EVAL, or if static shape information is incorrect. """ features = {feature_name: ops.convert_to_tensor(feature_value) for feature_name, feature_value in features.items()} times = features[TrainEvalFeatures.TIMES] exogenous_regressors = self._process_exogenous_features( times=times, features={key: value for key, value in features.items() if key not in [TrainEvalFeatures.TIMES, TrainEvalFeatures.VALUES, PredictionFeatures.STATE_TUPLE]}) if mode == estimator_lib.ModeKeys.TRAIN: # For training, we require the window size to be self.window_size as # iterating sequentially on larger windows could introduce a bias. return self._process_window( features, mode=mode, exogenous_regressors=exogenous_regressors) elif mode == estimator_lib.ModeKeys.EVAL: # For evaluation, we allow the user to pass in a larger window, in which # case we try to cover as much of the window as possible without # overlap. Quantitative evaluation is more efficient/correct with fixed # windows matching self.window_size (as with training), but this looping # allows easy plotting of "in-sample" predictions. times.get_shape().assert_has_rank(2) static_window_size = times.get_shape().dims[1].value if (static_window_size is not None and static_window_size < self.window_size): raise ValueError( ("ARModel requires a window of at least input_window_size + " "output_window_size to evaluate on (input_window_size={}, " "output_window_size={}, and got shape {} for feature '{}' (batch " "size, window size)).").format( self.input_window_size, self.output_window_size, times.get_shape(), TrainEvalFeatures.TIMES)) num_iterations = ((array_ops.shape(times)[1] - self.input_window_size) // self.output_window_size) output_size = num_iterations * self.output_window_size # Rather than dealing with overlapping windows of output, discard a bit at # the beginning if output windows don't cover evenly. crop_length = output_size + self.input_window_size features = {feature_name: feature_value[:, -crop_length:] for feature_name, feature_value in features.items()} # Note that, unlike the ARModel's predict() while_loop, each iteration # here can run in parallel, since we are not feeding predictions or state # from previous iterations. def _while_condition(iteration_number, loss_ta, mean_ta, covariance_ta): del loss_ta, mean_ta, covariance_ta # unused return iteration_number < num_iterations def _while_body(iteration_number, loss_ta, mean_ta, covariance_ta): """Perform a processing step on a single window of data.""" base_offset = iteration_number * self.output_window_size model_outputs = self._process_window( features={ feature_name: feature_value[:, base_offset:base_offset + self.window_size] for feature_name, feature_value in features.items()}, mode=mode, exogenous_regressors=exogenous_regressors[ :, base_offset:base_offset + self.window_size]) # This code needs to be updated if new predictions are added in # self._process_window assert len(model_outputs.predictions) == 3 assert "mean" in model_outputs.predictions assert "covariance" in model_outputs.predictions assert "observed" in model_outputs.predictions return (iteration_number + 1, loss_ta.write( iteration_number, model_outputs.loss), mean_ta.write( iteration_number, model_outputs.predictions["mean"]), covariance_ta.write( iteration_number, model_outputs.predictions["covariance"])) _, loss_ta, mean_ta, covariance_ta = control_flow_ops.while_loop( _while_condition, _while_body, [0, tensor_array_ops.TensorArray(dtype=self.dtype, size=num_iterations), tensor_array_ops.TensorArray(dtype=self.dtype, size=num_iterations), tensor_array_ops.TensorArray(dtype=self.dtype, size=num_iterations)]) values = math_ops.cast(features[TrainEvalFeatures.VALUES], dtype=self.dtype) batch_size = array_ops.shape(times)[0] prediction_shape = [batch_size, self.output_window_size * num_iterations, self.num_features] (previous_state_times, previous_state_values, previous_state_exogenous_regressors) = state # Make sure returned state always has windows of self.input_window_size, # even if we were passed fewer than self.input_window_size points this # time. if self.input_window_size > 0: new_state_times = array_ops.concat( [previous_state_times, math_ops.cast(times, dtype=dtypes.int64)], axis=1)[:, -self.input_window_size:] new_state_times.set_shape((None, self.input_window_size)) new_state_values = array_ops.concat( [previous_state_values, self._scale_data(values)], axis=1)[:, -self.input_window_size:, :] new_state_values.set_shape((None, self.input_window_size, self.num_features)) new_exogenous_regressors = array_ops.concat( [previous_state_exogenous_regressors, exogenous_regressors], axis=1)[:, -self.input_window_size:, :] new_exogenous_regressors.set_shape( (None, self.input_window_size, self.exogenous_size)) else: # There is no state to keep, and the strided slices above do not handle # input_window_size=0. new_state_times = previous_state_times new_state_values = previous_state_values new_exogenous_regressors = previous_state_exogenous_regressors return model.ModelOutputs( loss=math_ops.reduce_mean(loss_ta.stack(), axis=0), end_state=(new_state_times, new_state_values, new_exogenous_regressors), predictions={ "mean": array_ops.reshape( array_ops.transpose(mean_ta.stack(), [1, 0, 2, 3]), prediction_shape), "covariance": array_ops.reshape( array_ops.transpose(covariance_ta.stack(), [1, 0, 2, 3]), prediction_shape), "observed": values[:, -output_size:]}, prediction_times=times[:, -output_size:]) else: raise ValueError( "Unknown mode '{}' passed to get_batch_loss.".format(mode))
def call(self, inputs, initial_state=None, dtype=None, sequence_length=None): """Run this LSTM on inputs, starting from the given state. Args: inputs: `3-D` tensor with shape `[time_len, batch_size, input_size]`. initial_state: a tuple `(initial_cell_state, initial_output)` with tensors of shape `[batch_size, self._num_units]`. If this is not provided, the cell is expected to create a zero initial state of type `dtype`. dtype: The data type for the initial state and expected output. Required if `initial_state` is not provided or RNN state has a heterogeneous dtype. sequence_length: Specifies the length of each sequence in inputs. An `int32` or `int64` vector (tensor) size `[batch_size]`, values in `[0, time_len).` Defaults to `time_len` for each element. Returns: A pair containing: - Output: A `3-D` tensor of shape `[time_len, batch_size, output_size]` or a list of time_len tensors of shape `[batch_size, output_size]`, to match the type of the `inputs`. - Final state: a tuple `(cell_state, output)` matching `initial_state`. Raises: ValueError: in case of shape mismatches """ is_list = isinstance(inputs, list) if is_list: inputs = array_ops.stack(inputs) inputs_shape = inputs.get_shape().with_rank(3) if not inputs_shape[2]: raise ValueError("Expecting inputs_shape[2] to be set: %s" % inputs_shape) batch_size = inputs_shape[1].value if batch_size is None: batch_size = array_ops.shape(inputs)[1] time_len = inputs_shape[0].value if time_len is None: time_len = array_ops.shape(inputs)[0] # Provide default values for initial_state and dtype if initial_state is None: if dtype is None: raise ValueError( "Either initial_state or dtype needs to be specified") z = array_ops.zeros(array_ops.stack([batch_size, self.num_units]), dtype=dtype) initial_state = z, z else: if len(initial_state) != 2: raise ValueError( "Expecting initial_state to be a tuple with length 2 or None" ) if dtype is None: dtype = initial_state[0].dtype # create the actual cell if sequence_length is not None: sequence_length = ops.convert_to_tensor(sequence_length) initial_cell_state, initial_output = initial_state # pylint: disable=unpacking-non-sequence cell_states, outputs = self._call_cell(inputs, initial_cell_state, initial_output, dtype, sequence_length) if sequence_length is not None: # Mask out the part beyond sequence_length mask = array_ops.transpose( array_ops.sequence_mask(sequence_length, time_len, dtype=dtype), [1, 0]) mask = array_ops.tile(array_ops.expand_dims(mask, [-1]), [1, 1, self.num_units]) outputs *= mask # Prepend initial states to cell_states and outputs for indexing to work # correctly,since we want to access the last valid state at # sequence_length - 1, which can even be -1, corresponding to the # initial state. mod_cell_states = array_ops.concat( [array_ops.expand_dims(initial_cell_state, [0]), cell_states], 0) mod_outputs = array_ops.concat( [array_ops.expand_dims(initial_output, [0]), outputs], 0) final_cell_state = self._gather_states(mod_cell_states, sequence_length, batch_size) final_output = self._gather_states(mod_outputs, sequence_length, batch_size) else: # No sequence_lengths used: final state is the last state final_cell_state = cell_states[-1] final_output = outputs[-1] if is_list: # Input was a list, so return a list outputs = array_ops.unstack(outputs) final_state = rnn_cell_impl.LSTMStateTuple(final_cell_state, final_output) return outputs, final_state
def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, stride, padding, use_gpu, data_format="NHWC"): """Verifies the output values of the convolution function. Args: tensor_in_sizes: Input tensor dimensions in [batch, input_rows, input_cols, input_depth]. filter_in_sizes: Filter tensor dimensions in [filter_rows, filter_cols, input_depth, depth_multiplier]. stride: Stride. padding: Padding type. use_gpu: Whether to use GPU. data_format: The data_format of the input. "NHWC" or "NCHW". """ total_size_1 = 1 total_size_2 = 1 for s in tensor_in_sizes: total_size_1 *= s for s in filter_in_sizes: total_size_2 *= s # Initializes the input and filter tensor with numbers incrementing from 1. x1 = [f * 1.0 for f in range(1, total_size_1 + 1)] x2 = [f * 1.0 for f in range(1, total_size_2 + 1)] with self.test_session(use_gpu=use_gpu) as sess: t1 = constant_op.constant(x1, shape=tensor_in_sizes) t1.set_shape(tensor_in_sizes) t2 = constant_op.constant(x2, shape=filter_in_sizes) native_t1 = t1 strides = [1, stride, stride, 1] if data_format == "NCHW": # Transpose from NWHC input to NCHW # Ex. [4, 5, 5, 48] to [4, 48, 5, 5] native_t1 = array_ops.transpose(t1, [0, 3, 1, 2]) strides = [1, 1, stride, stride] conv_native = nn_ops.depthwise_conv2d_native( native_t1, t2, strides=strides, data_format=data_format, padding=padding) if data_format == "NCHW": # Transpose back from NCHW to NHWC conv_native = array_ops.transpose(conv_native, [0, 2, 3, 1]) conv_interface = nn_impl.depthwise_conv2d( t1, t2, strides=[1, stride, stride, 1], padding=padding) native_result = sess.run(conv_native) interface_result = sess.run(conv_interface) print("depthwise conv_2d: ", tensor_in_sizes, "*", filter_in_sizes, ", stride:", stride, ", padding: ", padding, ", max diff: ", np.amax(np.absolute(native_result - interface_result))) self.assertArrayNear( np.ravel(native_result), np.ravel(interface_result), 1e-5) self.assertShapeEqual(native_result, conv_native) self.assertShapeEqual(native_result, conv_interface)
def batch_jacobian(self, target, source, unconnected_gradients=UnconnectedGradients.NONE, parallel_iterations=None, experimental_use_pfor=True): """Computes and stacks per-example jacobians. See [wikipedia article](http://en.wikipedia.org/wiki/jacobian_matrix_and_determinant) for the definition of a Jacobian. This function is essentially an efficient implementation of the following: `tf.stack([self.jacobian(y[i], x[i]) for i in range(x.shape[0])])`. Note that compared to `GradientTape.jacobian` which computes gradient of each output value w.r.t each input value, this function is useful when `target[i,...]` is independent of `source[j,...]` for `j != i`. This assumption allows more efficient computation as compared to `GradientTape.jacobian`. The output, as well as intermediate activations, are lower dimensional and avoid a bunch of redundant zeros which would result in the jacobian computation given the independence assumption. Example usage: ```python with tf.GradientTape() as g: x = tf.constant([[1., 2.], [3., 4.]], dtype=tf.float32) g.watch(x) y = x * x batch_jacobian = g.batch_jacobian(y, x) # batch_jacobian is [[[2, 0], [0, 4]], [[6, 0], [0, 8]]] ``` Args: target: A tensor with rank 2 or higher and with shape [b, y1, ..., y_n]. `target[i,...]` should only depend on `source[i,...]`. source: A tensor with rank 2 or higher and with shape [b, x1, ..., x_m]. unconnected_gradients: a value which can either hold 'none' or 'zero' and alters the value which will be returned if the target and sources are unconnected. The possible values and effects are detailed in 'UnconnectedGradients' and it defaults to 'none'. parallel_iterations: A knob to control how many iterations are dispatched in parallel. This knob can be used to control the total memory usage. experimental_use_pfor: If true, uses pfor for computing the Jacobian. Else uses a tf.while_loop. Returns: A tensor `t` with shape [b, y_1, ..., y_n, x1, ..., x_m] where `t[i, ...]` is the jacobian of `target[i, ...]` w.r.t. `source[i, ...]`, i.e. stacked per-example jacobians. Raises: RuntimeError: If called on a non-persistent tape with eager execution enabled and without enabling experimental_use_pfor. ValueError: If vectorization of jacobian computation fails or if first dimension of `target` and `source` do not match. """ target_shape = target.shape if target_shape.rank is None: dim = tensor_shape.Dimension(None) else: dim = target_shape.dims[0] if not (target_shape.with_rank_at_least(2) and source.shape.with_rank_at_least(2) and dim.is_compatible_with(source.shape[0])): raise ValueError("Need first dimension of target shape (%s) and " "source shape (%s) to match." % (target.shape, source.shape)) if target_shape.is_fully_defined(): batch_size = int(target_shape[0]) target_row_size = target_shape.num_elements() // batch_size else: target_shape = array_ops.shape(target) batch_size = target_shape[0] target_row_size = array_ops.size(target) // batch_size source_shape = array_ops.shape(source) # Flatten target to 2-D. # Note that we push and pop the tape here and below. This is needed since we # need gradients through the enclosed operations. self._push_tape() with ops.control_dependencies( [check_ops.assert_equal(batch_size, source_shape[0])]): target = array_ops.reshape(target, [batch_size, target_row_size]) self._pop_tape() def loop_fn(i): self._push_tape() y = array_ops.gather(target, i, axis=1) self._pop_tape() return self.gradient(y, source, unconnected_gradients=unconnected_gradients) if experimental_use_pfor: try: output = pfor_ops.pfor(loop_fn, target_row_size, parallel_iterations=parallel_iterations) except ValueError as err: six.reraise( ValueError, ValueError( str(err) + "\nEncountered an exception while vectorizing the " "batch_jacobian computation. Vectorization can be disabled by " "setting experimental_use_pfor to False."), sys.exc_info()[2]) else: if context.executing_eagerly() and not self._persistent: raise RuntimeError( "GradientTape must be created with persistent=True" " to compute the batch_jacobian with eager execution enabled and " " with experimental_use_pfor set to False.") output = pfor_ops.for_loop(loop_fn, target.dtype, target_row_size, parallel_iterations=parallel_iterations) new_shape = array_ops.concat([target_shape, source_shape[1:]], axis=0) if output is None: return array_ops.zeros(new_shape) else: output = array_ops.reshape(output, [target_row_size, batch_size, -1]) output = array_ops.transpose(output, [1, 0, 2]) return array_ops.reshape(output, new_shape)
def f(a, b): return array_ops.transpose(a, b)
def log_prob(self, x, name='log_prob'): """Log of the probability density/mass function. Args: x: `float` or `double` `Tensor`. name: The name to give this op. Returns: log_prob: a `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type `self.dtype`. """ with ops.name_scope(self.name): with ops.name_scope(name, values=[x] + list(self.inputs.values())): x = ops.convert_to_tensor(x, name='x') contrib_tensor_util.assert_same_float_dtype( (self.scale_operator_pd, x)) if self.cholesky_input_output_matrices: x_sqrt = x else: # Complexity: O(nbk^3) x_sqrt = linalg_ops.batch_cholesky(x) batch_shape = self.batch_shape() event_shape = self.event_shape() ndims = array_ops.rank(x_sqrt) # sample_ndims = ndims - batch_ndims - event_ndims sample_ndims = ndims - array_ops.shape(batch_shape)[0] - 2 sample_shape = array_ops.slice(array_ops.shape(x_sqrt), [0], [sample_ndims]) # We need to be able to pre-multiply each matrix by its corresponding # batch scale matrix. Since a Distribution Tensor supports multiple # samples per batch, this means we need to reshape the input matrix `x` # so that the first b dimensions are batch dimensions and the last two # are of shape [dimension, dimensions*number_of_samples]. Doing these # gymnastics allows us to do a batch_solve. # # After we're done with sqrt_solve (the batch operation) we need to undo # this reshaping so what we're left with is a Tensor partitionable by # sample, batch, event dimensions. # Complexity: O(nbk^2) since transpose must access every element. scale_sqrt_inv_x_sqrt = x_sqrt perm = array_ops.concat(0, (math_ops.range( sample_ndims, ndims), math_ops.range(0, sample_ndims))) scale_sqrt_inv_x_sqrt = array_ops.transpose( scale_sqrt_inv_x_sqrt, perm) shape = array_ops.concat(0, (batch_shape, (math_ops.cast( self.dimension, dtype=dtypes.int32), -1))) scale_sqrt_inv_x_sqrt = array_ops.reshape( scale_sqrt_inv_x_sqrt, shape) # Complexity: O(nbM*k) where M is the complexity of the operator solving # a vector system. E.g., for OperatorPDDiag, each solve is O(k), so # this complexity is O(nbk^2). For OperatorPDCholesky, each solve is # O(k^2) so this step has complexity O(nbk^3). scale_sqrt_inv_x_sqrt = self.scale_operator_pd.sqrt_solve( scale_sqrt_inv_x_sqrt) # Undo make batch-op ready. # Complexity: O(nbk^2) shape = array_ops.concat( 0, (batch_shape, event_shape, sample_shape)) scale_sqrt_inv_x_sqrt = array_ops.reshape( scale_sqrt_inv_x_sqrt, shape) perm = array_ops.concat( 0, (math_ops.range(ndims - sample_ndims, ndims), math_ops.range(0, ndims - sample_ndims))) scale_sqrt_inv_x_sqrt = array_ops.transpose( scale_sqrt_inv_x_sqrt, perm) # Write V = SS', X = LL'. Then: # tr[inv(V) X] = tr[inv(S)' inv(S) L L'] # = tr[inv(S) L L' inv(S)'] # = tr[(inv(S) L) (inv(S) L)'] # = sum_{ik} (inv(S) L)_{ik}^2 # The second equality follows from the cyclic permutation property. # Complexity: O(nbk^2) trace_scale_inv_x = math_ops.reduce_sum( math_ops.square(scale_sqrt_inv_x_sqrt), reduction_indices=[-2, -1]) # Complexity: O(nbk) half_log_det_x = math_ops.reduce_sum(math_ops.log( array_ops.batch_matrix_diag_part(x_sqrt)), reduction_indices=[-1]) # Complexity: O(nbk^2) log_prob = ((self.df - self.dimension - 1.) * half_log_det_x - 0.5 * trace_scale_inv_x - self.log_normalizing_constant()) # Set shape hints. # Try to merge what we know from the input then what we know from the # parameters of this distribution. if x.get_shape().ndims is not None: log_prob.set_shape(x.get_shape()[:-2]) if (log_prob.get_shape().ndims is not None and self.get_batch_shape().ndims is not None and self.get_batch_shape().ndims > 0): log_prob.get_shape( )[-self.get_batch_shape().ndims:].merge_with( self.get_batch_shape()) return log_prob
def norm(tensor, ord='euclidean', axis=None, keepdims=None, name=None, keep_dims=None): r"""Computes the norm of vectors, matrices, and tensors. This function can compute several different vector norms (the 1-norm, the Euclidean or 2-norm, the inf-norm, and in general the p-norm for p > 0) and matrix norms (Frobenius, 1-norm, 2-norm and inf-norm). Args: tensor: `Tensor` of types `float32`, `float64`, `complex64`, `complex128` ord: Order of the norm. Supported values are 'fro', 'euclidean', `1`, `2`, `np.inf` and any positive real number yielding the corresponding p-norm. Default is 'euclidean' which is equivalent to Frobenius norm if `tensor` is a matrix and equivalent to 2-norm for vectors. Some restrictions apply: a) The Frobenius norm `fro` is not defined for vectors, b) If axis is a 2-tuple (matrix norm), only 'euclidean', 'fro', `1`, `2`, `np.inf` are supported. See the description of `axis` on how to compute norms for a batch of vectors or matrices stored in a tensor. axis: If `axis` is `None` (the default), the input is considered a vector and a single vector norm is computed over the entire set of values in the tensor, i.e. `norm(tensor, ord=ord)` is equivalent to `norm(reshape(tensor, [-1]), ord=ord)`. If `axis` is a Python integer, the input is considered a batch of vectors, and `axis` determines the axis in `tensor` over which to compute vector norms. If `axis` is a 2-tuple of Python integers it is considered a batch of matrices and `axis` determines the axes in `tensor` over which to compute a matrix norm. Negative indices are supported. Example: If you are passing a tensor that can be either a matrix or a batch of matrices at runtime, pass `axis=[-2,-1]` instead of `axis=None` to make sure that matrix norms are computed. keepdims: If True, the axis indicated in `axis` are kept with size 1. Otherwise, the dimensions in `axis` are removed from the output shape. name: The name of the op. keep_dims: Deprecated alias for `keepdims`. Returns: output: A `Tensor` of the same type as tensor, containing the vector or matrix norms. If `keepdims` is True then the rank of output is equal to the rank of `tensor`. Otherwise, if `axis` is none the output is a scalar, if `axis` is an integer, the rank of `output` is one less than the rank of `tensor`, if `axis` is a 2-tuple the rank of `output` is two less than the rank of `tensor`. Raises: ValueError: If `ord` or `axis` is invalid. @compatibility(numpy) Mostly equivalent to numpy.linalg.norm. Not supported: ord <= 0, 2-norm for matrices, nuclear norm. Other differences: a) If axis is `None`, treats the flattened `tensor` as a vector regardless of rank. b) Explicitly supports 'euclidean' norm as the default, including for higher order tensors. @end_compatibility """ keepdims = deprecation.deprecated_argument_lookup('keepdims', keepdims, 'keep_dims', keep_dims) if keepdims is None: keepdims = False is_matrix_norm = ((isinstance(axis, tuple) or isinstance(axis, list)) and len(axis) == 2) if is_matrix_norm: axis = tuple(axis) if (not isinstance(axis[0], int) or not isinstance(axis[1], int) or axis[0] == axis[1]): raise ValueError( "'axis' must be None, an integer, or a tuple of 2 unique integers" ) supported_matrix_norms = ['euclidean', 'fro', 1, 2, np.inf] if ord not in supported_matrix_norms: raise ValueError( "'ord' must be a supported matrix norm in %s, got %s" % (supported_matrix_norms, ord)) else: if not (isinstance(axis, int) or axis is None): raise ValueError( "'axis' must be None, an integer, or a tuple of 2 unique integers" ) supported_vector_norms = ['euclidean', 1, 2, np.inf] if (not np.isreal(ord) or ord <= 0) and ord not in supported_vector_norms: raise ValueError("'ord' must be a supported vector norm, got %s" % ord) if axis is not None: axis = (axis, ) with ops.name_scope(name, 'norm', [tensor]): tensor = ops.convert_to_tensor(tensor) if ord in ['fro', 'euclidean', 2, 2.0]: if is_matrix_norm and ord in [2, 2.0]: rank = array_ops.rank(tensor) positive_axis = map_fn.map_fn( lambda i: control_flow_ops.cond(i >= 0, lambda: i, lambda: i + rank), ops.convert_to_tensor(axis)) axes = math_ops.range(rank) perm_before = array_ops.concat([ array_ops.setdiff1d(axes, positive_axis)[0], positive_axis ], axis=0) perm_after = map_fn.map_fn( lambda i: math_ops.cast(array_ops.squeeze( array_ops.where(math_ops.equal(perm_before, i))), dtype=dtypes.int32), axes) permed = array_ops.transpose(tensor, perm=perm_before) matrix_2_norm = array_ops.expand_dims(math_ops.reduce_max( math_ops.abs( gen_linalg_ops.svd(permed, compute_uv=False)[0]), axis=-1, keepdims=True), axis=-1) result = array_ops.transpose(matrix_2_norm, perm=perm_after) else: result = math_ops.sqrt( math_ops.reduce_sum(tensor * math_ops.conj(tensor), axis, keepdims=True)) else: result = math_ops.abs(tensor) if ord == 1: sum_axis = None if axis is None else axis[0] result = math_ops.reduce_sum(result, sum_axis, keepdims=True) if is_matrix_norm: result = math_ops.reduce_max(result, axis[-1], keepdims=True) elif ord == np.inf: if is_matrix_norm: result = math_ops.reduce_sum(result, axis[1], keepdims=True) max_axis = None if axis is None else axis[0] result = math_ops.reduce_max(result, max_axis, keepdims=True) else: # General p-norms (positive p only) result = math_ops.pow( math_ops.reduce_sum(math_ops.pow(result, ord), axis, keepdims=True), 1.0 / ord) if not keepdims: result = array_ops.squeeze(result, axis) return result
def convert(w): return array_ops.transpose(w) if transpose_weights else w
def _VerifyValuesWithDilation(self, tensor_in_sizes, filter_in_sizes, stride, dilation, padding, data_type, data_format="NHWC"): """Verifies the output values of the convolution function. Args: tensor_in_sizes: Input tensor dimensions in [batch, input_rows, input_cols, input_depth]. filter_in_sizes: Filter tensor dimensions in [filter_rows, filter_cols, input_depth, depth_multiplier]. stride: Stride. dilation: Dilation. padding: Padding type. data_type: The data type to use. data_format: The data_format of the input. "NHWC" or "NCHW". """ total_size_1 = 1 total_size_2 = 1 for s in tensor_in_sizes: total_size_1 *= s for s in filter_in_sizes: total_size_2 *= s # Initializes the input and filter tensor with numbers incrementing from 1. x1 = np.array([f * 1.0 for f in range(1, total_size_1 + 1)], dtype=data_type).reshape(tensor_in_sizes) x2 = np.array([f * 1.0 for f in range(1, total_size_2 + 1)], dtype=data_type).reshape(filter_in_sizes) with self.session() as sess: if data_type == np.float32: # TODO(b/64210055): Tolerance for TPU is high. tolerance = 1e-2 else: self.assertEqual(data_type, np.float64) tolerance = 1e-8 t1 = array_ops.placeholder(shape=tensor_in_sizes, dtype=data_type) t2 = array_ops.placeholder(shape=filter_in_sizes, dtype=data_type) native_t1 = t1 strides = [1, stride, stride, 1] dilations = [dilation, dilation] if data_format == "NCHW": # Transpose from NWHC input to NCHW # Ex. [4, 5, 5, 48] to [4, 48, 5, 5] native_t1 = array_ops.transpose(t1, [0, 3, 1, 2]) strides = [1, 1, stride, stride] with self.test_scope(): conv_native = nn_impl.depthwise_conv2d( native_t1, t2, strides=strides, rate=dilations, data_format=data_format, padding=padding) if data_format == "NCHW": # Transpose back from NCHW to NHWC conv_native = array_ops.transpose(conv_native, [0, 2, 3, 1]) with ops.device("CPU"): # CPU only support NHWC format strides = [1, stride, stride, 1] conv_interface = nn_impl.depthwise_conv2d( t1, t2, strides=strides, rate=dilations, padding=padding) native_result = sess.run(conv_native, {t1: x1, t2: x2}) interface_result = sess.run(conv_interface, {t1: x1, t2: x2}) print("data_type:", data_type, "max diff = ", np.amax(np.absolute(native_result - interface_result))) self.assertAllClose( np.ravel(native_result), np.ravel(interface_result), rtol=tolerance)
def sample_n(self, n, seed=None, name='sample'): # pylint: disable=line-too-long """Generate `n` samples. Complexity: O(nbk^3) The sampling procedure is based on the [Bartlett decomposition]( https://en.wikipedia.org/wiki/Wishart_distribution#Bartlett_decomposition) and [using a Gamma distribution to generate Chi2 random variates]( https://en.wikipedia.org/wiki/Chi-squared_distribution#Gamma.2C_exponential.2C_and_related_distributions). Args: n: Scalar. Number of samples to draw from each distribution. seed: Python integer; random number generator seed. name: The name of this op. Returns: samples: a `Tensor` of shape `(n,) + self.batch_shape + self.event_shape` with values of type `self.dtype`. """ with ops.name_scope(self.name): with ops.name_scope(name, values=[n] + list(self.inputs.values())): n = ops.convert_to_tensor(n, name='n') if n.dtype != dtypes.int32: raise TypeError('n.dtype=%s which is not int32' % n.dtype) batch_shape = self.batch_shape() event_shape = self.event_shape() batch_ndims = array_ops.shape(batch_shape)[0] ndims = batch_ndims + 3 # sample_ndims=1, event_ndims=2 shape = array_ops.concat(0, ((n, ), batch_shape, event_shape)) # Complexity: O(nbk^2) x = random_ops.random_normal(shape=shape, mean=0., stddev=1., dtype=self.dtype, seed=seed) # Complexity: O(nbk) # This parametrization is equivalent to Chi2, i.e., # ChiSquared(k) == Gamma(alpha=k/2, beta=1/2) g = random_ops.random_gamma(shape=(n, ), alpha=self._multi_gamma_sequence( 0.5 * self.df, self.dimension), beta=0.5, dtype=self.dtype, seed=seed) # Complexity: O(nbk^2) x = array_ops.batch_matrix_band_part(x, -1, 0) # Tri-lower. # Complexity: O(nbk) x = array_ops.batch_matrix_set_diag(x, math_ops.sqrt(g)) # Make batch-op ready. # Complexity: O(nbk^2) perm = array_ops.concat(0, (math_ops.range(1, ndims), (0, ))) x = array_ops.transpose(x, perm) shape = array_ops.concat(0, (batch_shape, (event_shape[0], -1))) x = array_ops.reshape(x, shape) # Complexity: O(nbM) where M is the complexity of the operator solving a # vector system. E.g., for OperatorPDDiag, each matmul is O(k^2), so # this complexity is O(nbk^2). For OperatorPDCholesky, each matmul is # O(k^3) so this step has complexity O(nbk^3). x = self.scale_operator_pd.sqrt_matmul(x) # Undo make batch-op ready. # Complexity: O(nbk^2) shape = array_ops.concat(0, (batch_shape, event_shape, (n, ))) x = array_ops.reshape(x, shape) perm = array_ops.concat( 0, ((ndims - 1, ), math_ops.range(0, ndims - 1))) x = array_ops.transpose(x, perm) if not self.cholesky_input_output_matrices: # Complexity: O(nbk^3) x = math_ops.batch_matmul(x, x, adj_y=True) # Set shape hints. if self.scale_operator_pd.get_shape().ndims is not None: x.set_shape( tensor_shape.TensorShape( [tensor_util.constant_value(n)] + self.scale_operator_pd.get_shape().as_list())) elif x.get_shape().ndims is not None: x.get_shape()[0].merge_with( tensor_shape.TensorDimension( tensor_util.constant_value(n))) return x
def _ConstructAndTestGradient(self, input_shape, filter_shape, output_shape, stride, padding, data_type, test_input, use_gpu, data_format="NHWC"): input_size = 1 for x in input_shape: input_size *= x filter_size = 1 for x in filter_shape: filter_size *= x input_data = [x * 1.0 / input_size for x in range(0, input_size)] filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)] with self.test_session(use_gpu=use_gpu): if data_type == dtypes.float32: tolerance = 0.002 else: self.assertEqual(data_type, dtypes.float64) tolerance = 1e-8 input_tensor = constant_op.constant( input_data, shape=input_shape, dtype=data_type, name="input") filter_tensor = constant_op.constant( filter_data, shape=filter_shape, dtype=data_type, name="filter") native_input = input_tensor strides = [1, stride, stride, 1] if data_format == "NCHW": # Transpose from NWHC input to NCHW # Ex. [4, 5, 5, 48] to [4, 48, 5, 5] native_input = array_ops.transpose(input_tensor, [0, 3, 1, 2]) input_shape = [ input_shape[0], input_shape[3], input_shape[1], input_shape[2] ] output_shape = [ output_shape[0], output_shape[3], output_shape[1], output_shape[2] ] strides = [1, 1, stride, stride] depthwise_conv2d = nn_ops.depthwise_conv2d_native( native_input, filter_tensor, strides, padding, data_format=data_format, name="depthwise_conv2d") self.assertEqual(output_shape, depthwise_conv2d.get_shape()) if test_input: err = gradient_checker.compute_gradient_error( native_input, input_shape, depthwise_conv2d, output_shape) else: err = gradient_checker.compute_gradient_error(filter_tensor, filter_shape, depthwise_conv2d, output_shape) print("depthwise conv_2d gradient error = ", err) self.assertLess(err, tolerance)
def HwioToOihw(in_tensor): return array_ops.transpose(in_tensor, [3, 2, 0, 1])
def rotate_transpose(x, shift, name="rotate_transpose"): """Circularly moves dims left or right. Effectively identical to: ```python numpy.transpose(x, numpy.roll(numpy.arange(len(x.shape)), shift)) ``` When `validate_args=False` additional graph-runtime checks are performed. These checks entail moving data from to GPU to CPU. Example: ```python x = ... # Tensor of shape [1, 2, 3, 4]. rotate_transpose(x, -1) # result shape: [2, 3, 4, 1] rotate_transpose(x, -2) # result shape: [3, 4, 1, 2] rotate_transpose(x, 1) # result shape: [4, 1, 2, 3] rotate_transpose(x, 2) # result shape: [3, 4, 1, 2] rotate_transpose(x, 7) == rotate_transpose(x, 3) rotate_transpose(x, -7) == rotate_transpose(x, -3) ``` Args: x: `Tensor`. shift: `Tensor`. Number of dimensions to transpose left (shift<0) or transpose right (shift>0). name: `String`. The name to give this op. Returns: rotated_x: Input `Tensor` with dimensions circularly rotated by shift. Raises: TypeError: if shift is not integer type. """ with ops.name_scope(name, values=[x, shift]): x = ops.convert_to_tensor(x, name="x") shift = ops.convert_to_tensor(shift, name="shift") # We do not assign back to preserve constant-ness. check_ops.assert_integer(shift) shift_value_static = tensor_util.constant_value(shift) ndims = x.get_shape().ndims if ndims is not None and shift_value_static is not None: if ndims < 2: return x shift_value_static = np.sign(shift_value_static) * ( abs(shift_value_static) % ndims) if shift_value_static == 0: return x perm = np.roll(np.arange(ndims), shift_value_static) return array_ops.transpose(x, perm=perm) else: # Consider if we always had a positive shift, and some specified # direction. # When shifting left we want the new array: # last(x, n-shift) + first(x, shift) # and if shifting right then we want: # last(x, shift) + first(x, n-shift) # Observe that last(a) == slice(a, n) and first(a) == slice(0, a). # Also, we can encode direction and shift as one: direction * shift. # Combining these facts, we have: # a = cond(shift<0, -shift, n-shift) # last(x, n-a) + first(x, a) == x[a:n] + x[0:a] # Finally, we transform shift by modulo length so it can be specified # independently from the array upon which it operates (like python). ndims = array_ops.rank(x) shift = math_ops.select(math_ops.less(shift, 0), math_ops.mod(-shift, ndims), ndims - math_ops.mod(shift, ndims)) first = math_ops.range(0, shift) last = math_ops.range(shift, ndims) perm = array_ops.concat(0, (last, first)) return array_ops.transpose(x, perm=perm)
def NchwToNchwVectC(in_tensor): n, c, h, w = in_tensor.shape.as_list() assert c % 4 == 0 t = array_ops.reshape(in_tensor, [n, c // 4, 4, h, w]) return array_ops.transpose(t, [0, 1, 3, 4, 2])
def _ExtractImagePatchesGrad(op, grad): batch_size, rows_in, cols_in, channels = [ dim.value for dim in op.inputs[0].get_shape() ] input_bhwc = array_ops.shape(op.inputs[0]) batch_size = input_bhwc[0] channels = input_bhwc[3] _, rows_out, cols_out, _ = [ dim.value for dim in op.outputs[0].get_shape() ] _, ksize_r, ksize_c, _ = op.get_attr('ksizes') _, stride_r, stride_h, _ = op.get_attr('strides') _, rate_r, rate_c, _ = op.get_attr('rates') padding = op.get_attr('padding') ksize_r_eff = ksize_r + (ksize_r - 1) * (rate_r - 1) ksize_c_eff = ksize_c + (ksize_c - 1) * (rate_c - 1) if padding == b'SAME': rows_out = int(ceil(rows_in / stride_r)) cols_out = int(ceil(cols_in / stride_h)) pad_rows = ((rows_out - 1) * stride_r + ksize_r_eff - rows_in) // 2 pad_cols = ((cols_out - 1) * stride_h + ksize_c_eff - cols_in) // 2 elif padding == b'VALID': rows_out = int(ceil((rows_in - ksize_r_eff + 1) / stride_r)) cols_out = int(ceil((cols_in - ksize_c_eff + 1) / stride_h)) pad_rows = (rows_out - 1) * stride_r + ksize_r_eff - rows_in pad_cols = (cols_out - 1) * stride_h + ksize_c_eff - cols_in pad_rows, pad_cols = max(0, pad_rows), max(0, pad_cols) grad_expanded = array_ops.transpose( array_ops.reshape(grad, (batch_size, rows_out, cols_out, ksize_r, ksize_c, channels)), (1, 2, 3, 4, 0, 5) ) grad_flat = array_ops.reshape(grad_expanded, (-1, batch_size * channels)) row_steps = range(0, rows_out * stride_r, stride_r) col_steps = range(0, cols_out * stride_h, stride_h) idx = [] for i in range(rows_out): for j in range(cols_out): r_low, c_low = row_steps[i] - pad_rows, col_steps[j] - pad_cols r_high, c_high = r_low + ksize_r_eff, c_low + ksize_c_eff idx.extend([(r * (cols_in) + c, i * (cols_out * ksize_r * ksize_c) + j * (ksize_r * ksize_c) + ri * (ksize_c) + ci) for (ri, r) in enumerate(range(r_low, r_high, rate_r)) for (ci, c) in enumerate(range(c_low, c_high, rate_c)) if 0 <= r and r < rows_in and 0 <= c and c < cols_in ]) sp_shape = (rows_in * cols_in, rows_out * cols_out * ksize_r * ksize_c) sp_mat = sparse_tensor.SparseTensor( array_ops.constant(idx, dtype=ops.dtypes.int64), array_ops.ones((len(idx),), dtype=ops.dtypes.float32), sp_shape ) jac = sparse_ops.sparse_tensor_dense_matmul(sp_mat, grad_flat) grad_out = array_ops.reshape( jac, (rows_in, cols_in, batch_size, channels) ) grad_out = array_ops.transpose(grad_out, (2, 0, 1, 3)) return [grad_out]
def _TransposeGrad(op, grad): """Returns unshuffle(grad).""" p = op.inputs[1] return [array_ops.transpose(grad, array_ops.invert_permutation(p)), None]
def _ConstructAndTestGradient(self, input_shape, filter_shape, output_shape, stride, padding, data_type, test_input, use_gpu, grouped_conv=False, data_format="NHWC"): input_size = 1 for x in input_shape: input_size *= x filter_size = 1 for x in filter_shape: filter_size *= x input_data = [x * 1.0 / input_size for x in range(0, input_size)] filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)] ops.reset_default_graph() graph = ops.get_default_graph() with self.session(graph=graph, use_gpu=use_gpu) as sess: tolerance = { dtypes.float16: 4e-0, dtypes.float32: 8e-4, dtypes.float64: 1e-12, }[data_type] input_tensor = constant_op.constant(input_data, shape=input_shape, dtype=data_type, name="input") filter_tensor = constant_op.constant(filter_data, shape=filter_shape, dtype=data_type, name="filter") native_input = input_tensor strides = [1, stride, stride, 1] if data_format == "NCHW": # Transpose from NHWC input to NCHW # Ex. [4, 5, 5, 48] to [4, 48, 5, 5] native_input = array_ops.transpose(input_tensor, [0, 3, 1, 2]) input_shape = [ input_shape[0], input_shape[3], input_shape[1], input_shape[2] ] output_shape = [ output_shape[0], output_shape[3], output_shape[1], output_shape[2] ] strides = [1, 1, stride, stride] with sess.graph._kernel_label_map({ "DepthwiseConv2dNative": "cudnn_grouped_convolution", "DepthwiseConv2dNativeBackpropInput": "cudnn_grouped_convolution", "DepthwiseConv2dNativeBackpropFilter": "cudnn_grouped_convolution", } if grouped_conv else {}): depthwise_conv2d = nn_ops.depthwise_conv2d_native( native_input, filter_tensor, strides, padding, data_format=data_format, name="depthwise_conv2d") self.assertEqual(output_shape, depthwise_conv2d.get_shape()) try: if test_input: err = gradient_checker.compute_gradient_error( native_input, input_shape, depthwise_conv2d, output_shape) else: err = gradient_checker.compute_gradient_error( filter_tensor, filter_shape, depthwise_conv2d, output_shape) except errors.InvalidArgumentError as e: # Grouped convolution kernel is only registered for cuDNN 7. Silently # return when we are running on an earlier version or without GPU. if grouped_conv and e.message.startswith( "No OpKernel was registered to support Op 'DepthwiseConv2dNative'" ): tf_logging.warn("Skipping grouped convolution test") return raise e tf_logging.info( "data_type: %r, use_gpu: %r, grouped_conv: %r, error = %f", data_type, use_gpu, grouped_conv, err) self.assertLess(err, tolerance)
def _GatherV2Grad(op, grad): """Gradient for GatherV2 op.""" # params can be large, so colocate the shape calculation with it. # # params can be very large for sparse model, array_ops.shape raises # exception on the Windows platform when any dimension is larger than # int32. params_shape is not used in optimizer apply_sparse gradients, # so it's fine to convert it back to int32 regardless of truncation. params = op.inputs[0] with ops.colocate_with(params): params_shape = array_ops.shape(params, out_type=ops.dtypes.int64) params_shape = math_ops.to_int32(params_shape) indices = op.inputs[1] indices_size = array_ops.expand_dims(array_ops.size(indices), 0) axis = op.inputs[2] axis_static = tensor_util.constant_value(axis) # For axis 0 gathers, build an appropriately shaped IndexedSlices. if axis_static == 0: if context.in_eager_mode(): params_tail_shape = params_shape.cpu()[1:] else: params_tail_shape = params_shape[1:] values_shape = array_ops.concat([indices_size, params_tail_shape], 0) values = array_ops.reshape(grad, values_shape) indices = array_ops.reshape(indices, indices_size) return [ops.IndexedSlices(values, indices, params_shape), None, None] outer_shape = params_shape[:axis] outer_dims = array_ops.size(outer_shape) inner_shape = params_shape[axis:][1:] inner_dims = array_ops.size(inner_shape) outer_axes_indices = math_ops.range(outer_dims) inner_axes_indices = math_ops.range(outer_dims + 1, outer_dims + 1 + inner_dims) values_shape = array_ops.concat([outer_shape, indices_size, inner_shape], 0) values = array_ops.reshape(grad, values_shape) indices = array_ops.reshape(indices, indices_size) # We need to sum up every slice `values[..., i, ....]` corresponding to # `params[..., indices[i], ...]`. Since `unsorted_segment_sum` does not # support an axis parameter, we transpose the gather dimension to the front, # then use `unsorted_segment_sum` to build a # [gather_axis, outer_axes, inner_axes] tensor with all the gradients # affecting each index in `gather_axis` summed up. transpose_dims = array_ops.concat( [[outer_dims], outer_axes_indices, inner_axes_indices], 0) values_transpose = array_ops.transpose(values, transpose_dims) num_segments = params_shape[axis] params_grad = math_ops.unsorted_segment_sum( values_transpose, indices, num_segments) # Inverts the above transpose by moving dimension 0 back to its original # position. invert_transpose_dims = array_ops.concat( [outer_axes_indices + 1, [0], inner_axes_indices], 0) params_grad = array_ops.transpose(params_grad, invert_transpose_dims) return [params_grad, None, None]
def call(self, inputs): return array_ops.transpose(inputs, perm=(0, ) + self.dims)
def multiply_inverse(self, vector): # pylint: disable=invalid-name Z = utils.layer_params_to_mat2d(vector) # Derivations were done for "batch_dim==1" case so we need to convert to # that orientation: Z = array_ops.transpose(Z) if self._option == SeriesFBApproximation.option1: # Note that L_A = A0^(-1/2) * U_A and L_G = G0^(-1/2) * U_G. L_A, psi_A = self._input_factor.get_option1quants(self._damping_input) L_G, psi_G = self._output_factor.get_option1quants(self._damping_output) def gamma(x): # We are assuming that each case has the same number of time-steps. # If this stops being the case one shouldn't simply replace this T # with its average value. Instead, one needs to go back to the # definition of the gamma function from the paper. T = self._num_timesteps return (1 - x)**2 / (T * (1 - x**2) - 2 * x * (1 - x**T)) # Y = gamma( psi_G*psi_A^T ) (computed element-wise) # Even though Y is Z-independent we are recomputing it from the psi's # each since Y depends on both A and G quantities, and it is relatively # cheap to compute. Y = gamma(array_ops.reshape(psi_G, [int(psi_G.shape[0]), -1]) * psi_A) # Z = L_G^T * Z * L_A # This is equivalent to the following computation from the original # pseudo-code: # Z = G0^(-1/2) * Z * A0^(-1/2) # Z = U_G^T * Z * U_A Z = math_ops.matmul(L_G, math_ops.matmul(Z, L_A), transpose_a=True) # Z = Z .* Y Z *= Y # Z = L_G * Z * L_A^T # This is equivalent to the following computation from the original # pseudo-code: # Z = U_G * Z * U_A^T # Z = G0^(-1/2) * Z * A0^(-1/2) Z = math_ops.matmul(L_G, math_ops.matmul(Z, L_A, transpose_b=True)) elif self._option == SeriesFBApproximation.option2: # Note that P_A = A_1^T * A_0^(-1) and P_G = G_1^T * G_0^(-1), # and K_A = A_0^(-1/2) * E_A and K_G = G_0^(-1/2) * E_G. P_A, K_A, mu_A = self._input_factor.get_option2quants(self._damping_input) P_G, K_G, mu_G = self._output_factor.get_option2quants( self._damping_output) # Our approach differs superficially from the pseudo-code in the paper # in order to reduce the total number of matrix-matrix multiplies. # In particular, the first three computations in the pseudo code are # Z = G0^(-1/2) * Z * A0^(-1/2) # Z = Z - hPsi_G^T * Z * hPsi_A # Z = E_G^T * Z * E_A # Noting that hPsi = C0^(-1/2) * C1 * C0^(-1/2), so that # C0^(-1/2) * hPsi = C0^(-1) * C1 * C0^(-1/2) = P^T * C0^(-1/2) # the entire computation can be written as # Z = E_G^T * (G0^(-1/2) * Z * A0^(-1/2) # - hPsi_G^T * G0^(-1/2) * Z * A0^(-1/2) * hPsi_A) * E_A # = E_G^T * (G0^(-1/2) * Z * A0^(-1/2) # - G0^(-1/2) * P_G * Z * P_A^T * A0^(-1/2)) * E_A # = E_G^T * G0^(-1/2) * Z * A0^(-1/2) * E_A # - E_G^T* G0^(-1/2) * P_G * Z * P_A^T * A0^(-1/2) * E_A # = K_G^T * Z * K_A - K_G^T * P_G * Z * P_A^T * K_A # This final expression is computed by the following two lines: # Z = Z - P_G * Z * P_A^T Z -= math_ops.matmul(P_G, math_ops.matmul(Z, P_A, transpose_b=True)) # Z = K_G^T * Z * K_A Z = math_ops.matmul(K_G, math_ops.matmul(Z, K_A), transpose_a=True) # Z = Z ./ (1*1^T - mu_G*mu_A^T) # Be careful with the outer product. We don't want to accidentally # make it an inner-product instead. tmp = 1.0 - array_ops.reshape(mu_G, [int(mu_G.shape[0]), -1]) * mu_A # Prevent some numerical issues by setting any 0.0 eigs to 1.0 tmp += 1.0 * math_ops.cast(math_ops.equal(tmp, 0.0), dtype=tmp.dtype) Z /= tmp # We now perform the transpose/reverse version of the operations # derived above, whose derivation from the original pseudo-code is # analgous. # Z = K_G * Z * K_A^T Z = math_ops.matmul(K_G, math_ops.matmul(Z, K_A, transpose_b=True)) # Z = Z - P_G^T * Z * P_A Z -= math_ops.matmul(P_G, math_ops.matmul(Z, P_A), transpose_a=True) # Z = normalize (1/E[T]) * Z # Note that this normalization is done because we compute the statistics # by averaging, not summing, over time. (And the gradient is presumably # summed over time, not averaged, and thus their scales are different.) Z /= math_ops.cast(self._num_timesteps, Z.dtype) # Convert back to the "batch_dim==0" orientation. Z = array_ops.transpose(Z) return utils.mat2d_to_layer_params(vector, Z)
def _make_histogram_ops(self, model): """Defines histogram ops when histogram_freq > 0.""" # only make histogram summary op if it hasn't already been made if self.histogram_freq and self.merged is None: for layer in self.model.layers: for weight in layer.weights: mapped_weight_name = weight.name.replace(':', '_') tf_summary.histogram(mapped_weight_name, weight) if self.write_images: w_img = array_ops.squeeze(weight) shape = K.int_shape(w_img) if len(shape) == 2: # dense layer kernel case if shape[0] > shape[1]: w_img = array_ops.transpose(w_img) shape = K.int_shape(w_img) w_img = array_ops.reshape( w_img, [1, shape[0], shape[1], 1]) elif len(shape) == 3: # convnet case if K.image_data_format() == 'channels_last': # switch to channels_first to display # every kernel as a separate image w_img = array_ops.transpose(w_img, perm=[2, 0, 1]) shape = K.int_shape(w_img) w_img = array_ops.reshape( w_img, [shape[0], shape[1], shape[2], 1]) elif len(shape) == 1: # bias case w_img = array_ops.reshape(w_img, [1, shape[0], 1, 1]) else: # not possible to handle 3D convnets etc. continue shape = K.int_shape(w_img) assert len(shape) == 4 and shape[-1] in [1, 3, 4] tf_summary.image(mapped_weight_name, w_img) if self.write_grads: for weight in layer.trainable_weights: mapped_weight_name = weight.name.replace(':', '_') grads = model.optimizer.get_gradients( model.total_loss, weight) def is_indexed_slices(grad): return type(grad).__name__ == 'IndexedSlices' grads = [ grad.values if is_indexed_slices(grad) else grad for grad in grads ] tf_summary.histogram( '{}_grad'.format(mapped_weight_name), grads) if hasattr(layer, 'output'): if isinstance(layer.output, list): for i, output in enumerate(layer.output): tf_summary.histogram( '{}_out_{}'.format(layer.name, i), output) else: tf_summary.histogram('{}_out'.format(layer.name), layer.output)
def all_gather(self, input_tensor, axis, communication_hint='AUTO', timeout=0): """All-gather a dense tensor. This method must be called inside a tf.function. Args: input_tensor: a dense tensor. It must have the same rank on all replicas, and dimensions other than `axis` need to be the same as well. axis: 0-D int32 Tensor. Dimension along which to gather. Must be in the range [0, rank(value)). communication_hint: string providing hint to runtime for choosing collective implementation. Available options are `AUTO`, `NCCL`, and `RING`. timeout: a float. The timeout in seconds. Returns: The gathered Tensor. Raises: RuntimeError: if called in eager mode. """ if context.executing_eagerly(): raise RuntimeError('all_gather in eager mode is not supported') instance_key_tensor = self._collective_keys.get_instance_key( self._group_key, self._device) instance_key_shape = self._collective_keys.get_instance_key( self._group_key, self._device) with ops.device(self._device), \ ops.control_dependencies([array_ops.identity(input_tensor)]): # 1. Transpose # E.g. Given an input_tensor with shape [2,2,5,1] and axis to gather is 3, # we use perm_pre=[3 0 1 2] to reshape it to [1,2,2,5], which # brings the 3rd dim first; afterwards we use perm_after=[1,2,3,0] to # place it back. perm_pre = array_ops.concat( ([axis], math_ops.range(axis), math_ops.range(axis + 1, array_ops.rank(input_tensor))), axis=0) input_tensor_t = array_ops.transpose(input_tensor, perm=perm_pre) # 2. Pad gathered_shape = collective_ops.all_gather( array_ops.expand_dims_v2(array_ops.shape_v2(input_tensor_t), axis=0), self._group_size, self._group_key, instance_key_shape, communication_hint, timeout=timeout) first_dims = gathered_shape[:, 0] full_axis_dim = math_ops.reduce_max(first_dims) padded_input_tensor = _pad_util(input_tensor_t, full_axis_dim) # 3. Gather gather_padded_out_tensor = collective_ops.all_gather( padded_input_tensor, self._group_size, self._group_key, instance_key_tensor, communication_hint, timeout=timeout) # 4. Unpad split_tensors = [] for i in range(first_dims.shape[0]): start_pos = i * full_axis_dim split_tensors.append( gather_padded_out_tensor[start_pos:start_pos + first_dims[i]]) out_tensor_t = array_ops.concat(split_tensors, 0) # 5. Transpose back perm_after = array_ops.concat( (math_ops.range(1, axis + 1), [0], math_ops.range(axis + 1, array_ops.rank(input_tensor_t))), axis=0) return array_ops.transpose(out_tensor_t, perm=perm_after)
def ctc_loss(labels, inputs, sequence_length, preprocess_collapse_repeated=False, ctc_merge_repeated=True, ignore_longer_outputs_than_inputs=False, time_major=True): """Computes the CTC (Connectionist Temporal Classification) Loss. This op implements the CTC loss as presented in the article: [A. Graves, S. Fernandez, F. Gomez, J. Schmidhuber. Connectionist Temporal Classification: Labeling Unsegmented Sequence Data with Recurrent Neural Networks. ICML 2006, Pittsburgh, USA, pp. 369-376.](http://www.cs.toronto.edu/~graves/icml_2006.pdf) Input requirements: ``` sequence_length(b) <= time for all b max(labels.indices(labels.indices[:, 1] == b, 2)) <= sequence_length(b) for all b. ``` Notes: This class performs the softmax operation for you, so inputs should be e.g. linear projections of outputs by an LSTM. The `inputs` Tensor's innermost dimension size, `num_classes`, represents `num_labels + 1` classes, where num_labels is the number of true labels, and the largest value `(num_classes - 1)` is reserved for the blank label. For example, for a vocabulary containing 3 labels `[a, b, c]`, `num_classes = 4` and the labels indexing is `{a: 0, b: 1, c: 2, blank: 3}`. Regarding the arguments `preprocess_collapse_repeated` and `ctc_merge_repeated`: If `preprocess_collapse_repeated` is True, then a preprocessing step runs before loss calculation, wherein repeated labels passed to the loss are merged into single labels. This is useful if the training labels come from, e.g., forced alignments and therefore have unnecessary repetitions. If `ctc_merge_repeated` is set False, then deep within the CTC calculation, repeated non-blank labels will not be merged and are interpreted as individual labels. This is a simplified (non-standard) version of CTC. Here is a table of the (roughly) expected first order behavior: * `preprocess_collapse_repeated=False`, `ctc_merge_repeated=True` Classical CTC behavior: Outputs true repeated classes with blanks in between, and can also output repeated classes with no blanks in between that need to be collapsed by the decoder. * `preprocess_collapse_repeated=True`, `ctc_merge_repeated=False` Never learns to output repeated classes, as they are collapsed in the input labels before training. * `preprocess_collapse_repeated=False`, `ctc_merge_repeated=False` Outputs repeated classes with blanks in between, but generally does not require the decoder to collapse/merge repeated classes. * `preprocess_collapse_repeated=True`, `ctc_merge_repeated=True` Untested. Very likely will not learn to output repeated classes. The `ignore_longer_outputs_than_inputs` option allows to specify the behavior of the CTCLoss when dealing with sequences that have longer outputs than inputs. If true, the CTCLoss will simply return zero gradient for those items, otherwise an InvalidArgument error is returned, stopping training. Args: labels: An `int32` `SparseTensor`. `labels.indices[i, :] == [b, t]` means `labels.values[i]` stores the id for (batch b, time t). `labels.values[i]` must take on values in `[0, num_labels)`. See `core/ops/ctc_ops.cc` for more details. inputs: 3-D `float` `Tensor`. If time_major == False, this will be a `Tensor` shaped: `[batch_size x max_time x num_classes]`. If time_major == True (default), this will be a `Tensor` shaped: `[max_time x batch_size x num_classes]`. The logits. sequence_length: 1-D `int32` vector, size `[batch_size]`. The sequence lengths. preprocess_collapse_repeated: Boolean. Default: False. If True, repeated labels are collapsed prior to the CTC calculation. ctc_merge_repeated: Boolean. Default: True. ignore_longer_outputs_than_inputs: Boolean. Default: False. If True, sequences with longer outputs than inputs will be ignored. time_major: The shape format of the `inputs` Tensors. If True, these `Tensors` must be shaped `[max_time, batch_size, num_classes]`. If False, these `Tensors` must be shaped `[batch_size, max_time, num_classes]`. Using `time_major = True` (default) is a bit more efficient because it avoids transposes at the beginning of the ctc_loss calculation. However, most TensorFlow data is batch-major, so by this function also accepts inputs in batch-major form. Returns: A 1-D `float` `Tensor`, size `[batch]`, containing the negative log probabilities. Raises: TypeError: if labels is not a `SparseTensor`. """ # The second, third, etc output tensors contain the gradients. We use it in # _CTCLossGrad() below. if not isinstance(labels, sparse_tensor.SparseTensor): raise TypeError("Expected labels (first argument) to be a SparseTensor") # For internal calculations, we transpose to [time, batch, num_classes] if not time_major: inputs = array_ops.transpose(inputs, [1, 0, 2]) # (B,T,N) => (T,B,N) loss, _ = gen_ctc_ops._ctc_loss( inputs, labels.indices, labels.values, sequence_length, preprocess_collapse_repeated=preprocess_collapse_repeated, ctc_merge_repeated=ctc_merge_repeated, ignore_longer_outputs_than_inputs=ignore_longer_outputs_than_inputs) return loss
def triplet_semihard_without_grad(embeddings_labels, sigma1=1.0, sigma2=1.0): ''' This function is used to calculate the probabilities for positive and negative weighted embeddings Input: embeddings: the embeddings which represents the images in other space where distance is interpreable labels: corresponding label for each embeddings sigma: tells how much weight to be assigned based on distance. Low sigma will assign high weight to nearby sample. Output: positive and negative probabilties ''' # First we extract the labels and embeddings labels = embeddings_labels[:, :1] labels = tf.cast(labels, dtype='int32') embeddings = embeddings_labels[:, 1:] # Build pairwise squared distance matrix. pdist_matrix = pairwise_distance(embeddings, squared=True) # This matrix will have 1 when labels are same and 0 when they are different adjacency = math_ops.equal(labels, array_ops.transpose(labels)) # Invert so we can select negatives only. # This matrix will have 1 when labels are different and 0 when they are same adjacency_not = math_ops.logical_not(adjacency) #Infer batch size batch_size = array_ops.size(labels) # For calculating positive probability affinity = math_ops.exp(-pdist_matrix / sigma1) - array_ops.diag( array_ops.ones([batch_size])) d_a_p = math_ops.multiply(math_ops.cast(adjacency, dtype=dtypes.float32), affinity) pos_prob = math_ops.divide(d_a_p, tf.reduce_sum(d_a_p, axis=1, keepdims=True)) # Set pos-prob of nearest to 1 in case of nan. mask_is_nan = tf.tile( tf.math.is_nan(tf.reduce_sum(pos_prob, axis=1, keepdims=True)), [1, embeddings.shape.as_list()[0]]) pdist_matrix_pos = math_ops.multiply( math_ops.cast(adjacency, dtype=dtypes.float32), pdist_matrix) select_nearest = tf.cast(tf.math.equal( pdist_matrix_pos, tf.reduce_max(pdist_matrix_pos, axis=1, keepdims=True)), dtype=dtypes.float32) pos_prob = array_ops.where(mask_is_nan, select_nearest, pos_prob) # For calculating negative probability affinity = math_ops.exp(-pdist_matrix / sigma2) - array_ops.diag( array_ops.ones([batch_size])) d_a_n = math_ops.multiply( math_ops.cast(adjacency_not, dtype=dtypes.float32), affinity) neg_prob = math_ops.divide(d_a_n, tf.reduce_sum(d_a_n, axis=1, keepdims=True)) # Set neg-prob of nearest to 1 in case of nan. mask_is_nan = tf.tile( tf.math.is_nan(tf.reduce_sum(neg_prob, axis=1, keepdims=True)), [1, embeddings.shape.as_list()[0]]) pdist_matrix_neg = math_ops.multiply( math_ops.cast(adjacency, dtype=dtypes.float32), pdist_matrix) select_nearest = tf.cast(tf.math.equal( pdist_matrix_pos, tf.reduce_max(pdist_matrix_neg, axis=1, keepdims=True)), dtype=dtypes.float32) # print (select_nearest) neg_prob = array_ops.where(mask_is_nan, select_nearest, neg_prob) return pos_prob, neg_prob