def _upsample(x, up_sz, f, direction, shift): """Upsample by a factor of 2 using transposed reflecting boundary conditions. This function undecimates `x` along the axis specified by `direction` and then convolves it with filter `f`, thereby upsampling it to have a size of `up_sz`. This function is a bit awkward, as it's written to be the transpose of _downsample(), which uses reflecting boundary conditions. As such, this function approximates *the transpose of reflecting boundary conditions*, which is not the same as reflecting boundary conditions. TODO(barron): Write out the true transpose of reflecting boundary conditions. Args: x: The input tensor (numpy or TF), of size (num_channels, width, height). up_sz: A tuple of ints of size (upsampled_width, upsampled_height). Care should be taken by the caller to match the upsampled_width/height with the input width/height along the axis that isn't being upsampled. f: The input filter, which must be an odd-length 1D numpy array. direction: The spatial direction in [0, 1] along which `x` will be convolved with `f` after being undecimated. Because `x` has a batch/channels dimension, `direction` == 0 corresponds to downsampling along axis 1 in `x`, and `direction` == 1 corresponds to downsampling along axis 2 in `x`. shift: A shift amount in [0, 1] by which `x` will be shifted along the axis specified by `direction` after undecimating. Returns: `x` undecimated and convolved with `f` along the spatial dimension `direction` with transposed reflection boundary conditions with an offset of `shift`, to match size `up_sz`. """ _check_resample_inputs(x, f, direction, shift) assert_ops = [tf.Assert(tf.equal(tf.rank(f), 1), [tf.rank(f)])] with tf.control_dependencies(assert_ops): # Undecimate `x` by a factor of 2 along `direction`, by stacking it with # and tensor of all zeros along the right axis and then reshaping it such # that the zeros are interleaved. if direction == 0: sz_ex = tf.shape(x) * [1, 2, 1] elif direction == 1: sz_ex = tf.shape(x) * [1, 1, 2] if shift == 0: x_and_zeros = [x, tf.zeros_like(x)] elif shift == 1: x_and_zeros = [tf.zeros_like(x), x] x_undecimated = tf.reshape(tf.stack(x_and_zeros, direction + 2), sz_ex) # Ensure that `x_undecimated` has a size of `up_sz`, by slicing and padding # as needed. x_undecimated = x_undecimated[:, 0:up_sz[0], 0:up_sz[1]] x_undecimated = tf.pad(x_undecimated, [[0, 0], [0, up_sz[0] - tf.shape(x_undecimated)[1]], [0, up_sz[1] - tf.shape(x_undecimated)[2]]]) # Pad `x_undecimated` with reflection boundary conditions. x_padded = pad_reflecting(x_undecimated, len(f) // 2, (len(f) - 1) // 2, direction + 1) # Convolved x_undecimated with a flipped version of f. f_ex = tf.expand_dims(f[::-1], 1 - direction) y = tf.nn.conv2d(x_padded[:, :, :, tf.newaxis], tf.cast(f_ex, x.dtype)[:, :, tf.newaxis, tf.newaxis], [1, 1, 1, 1], 'VALID')[:, :, :, 0] return y
def batch_gather(values, indices): """Gather slices from values. Args: values: a tensor in the shape of [batch_size, length, depth]. indices: a tensor in the shape of [batch_size, slice_count] where slice_count < length. Returns: a tensor in the shape of [batch_size, slice_count, depth]. """ with tf.control_dependencies([ tf.assert_equal(tf.rank(values), 3, message="values"), tf.assert_equal(tf.rank(indices), 2, message="indices"), tf.assert_equal(tf.shape(values)[0], tf.shape(indices)[0], message="batch"), ]): shape = common_layers.shape_list(indices) depth = common_layers.shape_list(values)[-1] batch_indices = tf.reshape( tf.tile(tf.expand_dims(tf.range(shape[0]), [1]), [1, shape[1]]), [-1, 1]) indices = tf.concat( [batch_indices, tf.cast(tf.reshape(indices, [-1, 1]), tf.int32)], axis=-1) slices = tf.gather_nd(params=values, indices=indices) return tf.reshape(slices, [shape[0], shape[1], depth])
def predict_refs(logits, starts, ends): """Outputs the refs based on area predictions.""" with tf.control_dependencies([ tf.assert_equal(tf.rank(logits), 3), tf.assert_equal(tf.rank(starts), 2), tf.assert_equal(tf.rank(ends), 2) ]): predicted_areas = tf.argmax(logits, -1) return area_utils.area_to_refs(starts, ends, predicted_areas)
def lossfun_students(x, scale_lo=1e-5, scale_init=1.): """A variant of lossfun() that uses the NLL of a Student's t-distribution. Args: x: The residual for which the loss is being computed. Must be a rank-2 tensor, where the innermost dimension is the batch index, and the outermost dimension corresponds to different "channels", where this function will assign each channel its own variable shape (log-df) and scale parameters that are constructed as TF variables and can be optimized over. Must be a TF tensor or numpy array of single or double precision floats. The precision of `x` will determine the precision of the latent variables used to model scale and log-df internally. scale_lo: The lowest possible value for the loss's scale parameters. Must be > 0 and a scalar. This value may have more of an effect than you think, as the loss is unbounded as scale approaches zero (say, at a delta function). scale_init: The initial value used for the loss's scale parameters. This also defines the zero-point of the latent representation of scales, so SGD may cause optimization to gravitate towards producing scales near this value. Returns: A tuple of the form (`loss`, `log_df`, `scale`). `loss`: a TF tensor of the same type and shape as input `x`, containing the loss at each element of `x` as a function of `x`, `log_df`, and `scale`. These "losses" are actually negative log-likelihoods (as produced by distribution.nllfun()) and so they are not actually bounded from below by zero. You'll probably want to minimize their sum or mean. `scale`: a TF tensor of the same type as x, of size (1, x.shape[1]), as we construct a scale variable for each dimension of `x` but not for each batch element. This contains the current estimated scale parameter for each dimension, and will change during optimization. `log_df`: a TF tensor of the same type as x, of size (1, x.shape[1]), as we construct an log-DF variable for each dimension of `x` but not for each batch element. This contains the current estimated log(degrees-of-freedom) parameter for each dimension, and will change during optimization. Raises: ValueError: If any of the arguments are invalid. """ _check_scale(scale_lo, scale_init) float_dtype = x.dtype assert_ops = [tf.Assert(tf.equal(tf.rank(x), 2), [tf.rank(x)])] with tf.control_dependencies(assert_ops): log_df = tf.compat.v1.get_variable(name='LogDf', initializer=tf.zeros( (1, x.shape[1]), float_dtype)) scale = _construct_scale(x, scale_lo, scale_init, float_dtype) loss = util.students_t_nll(x, tf.math.exp(log_df), scale) return loss, log_df, scale
def _rgb_to_grayscale(images, name=None): """Converts one or more images from RGB to Grayscale. Outputs a tensor of the same `DType` and rank as `images`. The size of the last dimension of the output is 1, containing the Grayscale value of the pixels. Args: images: The RGB tensor to convert. Last dimension must have size 3 and should contain RGB values. name: A name for the operation (optional). Returns: The converted grayscale image(s). """ with tf.name_scope(name, 'rgb_to_grayscale', [images]) as name: images = tf.convert_to_tensor(images, name='images') # Remember original dtype to so we can convert back if needed orig_dtype = images.dtype flt_image = tf.image.convert_image_dtype(images, tf.float32) # Reference for converting between RGB and grayscale. # https://en.wikipedia.org/wiki/Luma_%28video%29 rgb_weights = [0.2989, 0.5870, 0.1140] rank_1 = tf.expand_dims(tf.rank(images) - 1, 0) gray_float = tf.reduce_sum(flt_image * rgb_weights, rank_1, keep_dims=True) gray_float.set_shape(images.get_shape()[:-1].concatenate([1])) return tf.image.convert_image_dtype(gray_float, orig_dtype, name=name)
def _example_to_string(ex): key_to_string = {} for k in keys: if k not in ex: key_to_string[k] = "" continue value = ex[k] if FLAGS.detokenize: try: value = task_or_mixture.output_features[ k].vocabulary.decode_tf(tf.abs(value)) except RuntimeError as err: value = f"Error {err} while decoding {value}" if (FLAGS.apply_postprocess_fn and k == "targets" and hasattr(task_or_mixture, "postprocess_fn")): value = task_or_mixture.postprocess_fn(value) if tf.rank(value) == 0: value = [value] if tf.is_numeric_tensor(value): value = tf.strings.format("{}", tf.squeeze(value), summarize=-1) else: value = tf.strings.join(value, separator="\n\n") key_to_string[k] = pretty(value.numpy().decode("utf-8")) return FLAGS.format_string.format(**key_to_string)
def convert_padding_mask_to_attention_mask(sequence, padding_mask): """Given a padded input tensor of sequences and a boolean mask for each position in the sequence, returns a 3D boolean mask for use in attention. Args: sequence (tf.Tensor): Tensor of shape [batch_size, sequence_length_1, ndim] padding_mask (tf.Tensor[bool]): Tensor of shape [batch_size, sequence_length_2] Returns: tf.Tensor[bool]: Tensor of shape [batch_size, sequence_length_1, sequence_length_2] """ batch_assert = tf.assert_equal( tf.shape(padding_mask)[0], tf.shape(sequence)[0], message='batch size mismatch between input sequence and \ padding_mask') rank_assert = tf.assert_equal( tf.rank(padding_mask), 2, message='Can only convert 2D position mask to 3D attention mask') with tf.control_dependencies([batch_assert, rank_assert]): attention_mask = tf.tile(padding_mask[:, None, :], (1, tf.shape(sequence)[1], 1)) return attention_mask
def convert_sequence_length_to_sequence_mask(sequence, sequence_lengths): """Given a padded input tensor of sequences and a tensor of lengths, returns a boolean mask for each position in the sequence indicating whether or not that position is padding. Args: sequence (tf.Tensor): Tensor of shape [batch_size, sequence_length, ndim] sequence_lengths (tf.Tensor[int]): Tensor of shape [batch_size] Returns: tf.Tensor[bool]: Tensor of shape [batch_size, sequence_length] """ batch_assert = tf.assert_equal( tf.shape(sequence_lengths)[0], tf.shape(sequence)[0], message='batch size mismatch between input sequence and \ sequence_lengths') rank_assert = tf.assert_equal( tf.rank(sequence_lengths), 1, message='Can only convert 1D sequence_lengths to 2D mask') with tf.control_dependencies([batch_assert, rank_assert]): indices = tf.tile( tf.range(tf.shape(sequence)[1])[None, :], (tf.shape(sequence_lengths)[0], 1)) mask = indices < sequence_lengths[:, None] return mask
def loss_func(y_true, y_pred): axes = tf.range(1, tf.rank(y_true)) y_true = tf.cast(y_true, dtype=y_pred.dtype) y_pred = tf.clip_by_value(y_pred, epsilon, 1.0 - epsilon) loss = compute_cross_entropy(w1, w2, y_true, y_pred) loss = tf.reduce_sum(loss, axis=axes) return -weight*tf.reduce_mean(loss)
def loss_func(y_true, y_pred): axes = tf.range(1, tf.rank(y_true)) y_true = tf.cast(y_true, dtype=y_pred.dtype) nominator = 2*tf.reduce_sum(y_true*y_pred, axis=axes) denominator = tf.reduce_sum(y_true**2 + y_pred**2, axis=axes) loss = 1 - tf.reduce_mean((nominator + epsilon)/(denominator + epsilon)) return weight*loss
def _naive_rdft(signal_tensor, fft_length): """Implement real-input Fourier Transform by matmul.""" # We are right-multiplying by the DFT matrix, and we are keeping # only the first half ("positive frequencies"). # So discard the second half of rows, but transpose the array for # right-multiplication. # The DFT matrix is symmetric, so we could have done it more # directly, but this reflects our intention better. complex_dft_matrix_kept_values = _dft_matrix(fft_length)[:( fft_length // 2 + 1), :].transpose() real_dft_tensor = tf.constant( np.real(complex_dft_matrix_kept_values).astype(np.float32), name='real_dft_matrix') imag_dft_tensor = tf.constant( np.imag(complex_dft_matrix_kept_values).astype(np.float32), name='imaginary_dft_matrix') signal_frame_length = signal_tensor.shape[-1].value half_pad = (fft_length - signal_frame_length) // 2 pad_values = tf.concat([ tf.zeros([tf.rank(signal_tensor) - 1, 2], tf.int32), [[half_pad, fft_length - signal_frame_length - half_pad]] ], axis=0) padded_signal = tf.pad(signal_tensor, pad_values) result_real_part = tf.matmul(padded_signal, real_dft_tensor) result_imag_part = tf.matmul(padded_signal, imag_dft_tensor) return result_real_part, result_imag_part
def _slice_cov(self, cov): """ Slice the correct dimensions for use in the kernel, as indicated by `self.active_dims` for covariance matrices. This requires slicing the rows *and* columns. This will also turn flattened diagonal matrices into a tensor of full diagonal matrices. :param cov: Tensor of covariance matrices (NxDxD or NxD). :return: N x self.input_dim x self.input_dim. """ cov = tf.cond(tf.equal(tf.rank(cov), 2), lambda: tf.matrix_diag(cov), lambda: cov) if isinstance(self.active_dims, slice): cov = cov[..., self.active_dims, self.active_dims] else: cov_shape = tf.shape(cov) covr = tf.reshape(cov, [-1, cov_shape[-1], cov_shape[-1]]) gather1 = tf.gather(tf.transpose(covr, [2, 1, 0]), self.active_dims) gather2 = tf.gather(tf.transpose(gather1, [1, 0, 2]), self.active_dims) cov = tf.reshape( tf.transpose(gather2, [2, 0, 1]), tf.concat([ cov_shape[:-2], [len(self.active_dims), len(self.active_dims)] ], 0)) return cov
def _input_booster(example): with tf.control_dependencies([tf.rank(example['input_refs']), 2]): has_input = tf.reduce_any( tf.greater(example['input_refs'][:, 1], example['input_refs'][:, 0])) return tf.logical_or(has_input, tf.less(tf.random_uniform([]), 0.1))
def area_range_to_index(area_range, length, max_area_width): """Computes the indices of each area in the area expansion. Args: area_range: tensor in shape of [batch_size, 2] length: a scalar tensor gives the length of the original feature space. max_area_width: a constant scalar. Returns: indices: area indices tensor in shape of [batch_size] """ with tf.control_dependencies([ tf.assert_equal(tf.rank(area_range), 2), tf.assert_equal(tf.shape(area_range)[1], 2) ]): area_range = tf.cast(area_range, tf.int32) target_size = area_range[:, 1] - area_range[:, 0] with tf.control_dependencies( [tf.assert_less(target_size, max_area_width + 1, summarize=100000)]): sizes = target_size - 1 start_length = length pre_end_length = length - sizes + 1 base = (start_length + pre_end_length) *\ (start_length - pre_end_length + 1) // 2 base = tf.where(tf.less_equal(target_size, 1), tf.zeros_like(target_size), base) offset = area_range[:, 0] return base + offset
def tf_shape_last(tensor, prepend_neg1=False, squeeze=False, name=None): '''Gets length of last dimension of tensor. E.g. for shape (a, b, c, ..., y, z) returns (z) If prepend_neg1, returns (-1, z) If squeeze is True and prepend_neg1 is False, returns a scalar instead of 1D tensor. It is an error for prepend_neg1 and squeeze to both be true >>> inp = tf.placeholder('float32') >>> tsl = tf_shape_last(inp) >>> sess.run(tsl, {inp: np.zeros((2,3,4,5))}) array([5], dtype=int32) >>> sess.run(tsl, {inp: np.zeros((2,3))}) array([3], dtype=int32) >>> sess.run(tsl, {inp: np.zeros((2))}) array([2], dtype=int32) >>> sess.run(tsl, {inp: 0}) # Exception raised >>> tsl = tf_shape_last(inp, prepend_neg1=True) >>> sess.run(tsl, {inp: np.zeros((2,3,4,5))}) array([-1, 5], dtype=int32) >>> sess.run(tsl, {inp: np.zeros((2,3))}) array([-1, 3], dtype=int32) >>> sess.run(tsl, {inp: np.zeros((2))}) array([-1, 2], dtype=int32) >>> sess.run(tsl, {inp: 0}) # Exception raised ''' assert not (prepend_neg1 and squeeze), 'prepend_neg1 and squeeze are incompatible' if prepend_neg1: return tf.concat(0, (tf.expand_dims( -1, 0), tf.slice(tf.shape(tensor), [tf.rank(tensor) - 1], [1])), name=name) else: ret = tf.slice(tf.shape(tensor), [tf.rank(tensor) - 1], [1], name=name) if squeeze: return ret[0] else: return ret
def _deserialize_label(im, lab): lab = tf.cond(tf.equal(tf.rank(lab), 0), lambda: tf.reshape(lab, [1]), lambda: lab) sparse_lab = tf.string_split(lab, sep=' ') lab_values = tf.strings.to_number(sparse_lab.values) lab = tf.reshape(lab_values, [self._num_regression_outputs]) return im, lab
def log_prob(self, data, num_samples=1): unsquashed_data = (self.squash.inverse(data) - self.unsquashed_data_mean) log_prob = self.distribution.log_prob(unsquashed_data, num_samples=num_samples) log_prob = (log_prob + self.squash.inverse_log_det_jacobian( data, event_ndims=tf.rank(data) - 1)) return log_prob
def loss_func(y_true, y_pred): axes = tf.range(1, tf.rank(y_true)) y_true = tf.cast(y_true, dtype=y_pred.dtype) nominator = tf.reduce_sum(y_true*y_pred, axis=axes) # TODO: Swap beta and 1-beta? denominator = y_true*y_pred + beta*(1-y_true)*y_pred + (1-beta)*y_true*(1-y_pred) denominator = tf.reduce_sum(denominator, axis=axes) loss = 1 - tf.reduce_mean((nominator + epsilon)/(denominator + epsilon)) return weight*loss
def log_GaussPDF(X, mu, sigma): distance = distanceFunc(X, mu) sigma = tf.squeeze(sigma) dimension = tf.to_float(tf.rank(X)) E = distance / (2 * sigma) result = -0.5 * dimension * (tf.log(2 * np.pi * sigma)) pdf = result - E return pdf
def _downsample(x, f, direction, shift): """Downsample by a factor of 2 using reflecting boundary conditions. This function convolves `x` with filter `f` with reflecting boundary conditions, and then decimates by a factor of 2. This is usually done to downsample `x`, assuming `f` is some smoothing filter, but will also be used for wavelet transformations in which `f` is not a smoothing filter. Args: x: The input tensor (numpy or TF), of size (num_channels, width, height). f: The input filter, which must be an odd-length 1D numpy array. direction: The spatial direction in [0, 1] along which `x` will be convolved with `f` and then decimated. Because `x` has a batch/channels dimension, `direction` == 0 corresponds to downsampling along axis 1 in `x`, and `direction` == 1 corresponds to downsampling along axis 2 in `x`. shift: A shift amount in [0, 1] by which `x` will be shifted along the axis specified by `direction` before filtering. Returns: `x` convolved with `f` along the spatial dimension `direction` with reflection boundary conditions with an offset of `shift`. """ _check_resample_inputs(x, f, direction, shift) assert_ops = [tf.Assert(tf.equal(tf.rank(f), 1), [tf.rank(f)])] with tf.control_dependencies(assert_ops): # The above and below padding amounts are different so as to support odd # and even length filters. An odd-length filter of length n causes a padding # of (n-1)/2 on both sides, while an even-length filter will pad by one less # below than above. x_padded = pad_reflecting(x, (len(f) - 1) // 2, len(f) // 2, direction + 1) if direction == 0: x_padded = x_padded[:, shift:, :] f_ex = f[:, tf.newaxis] strides = [1, 2, 1, 1] elif direction == 1: x_padded = x_padded[:, :, shift:] f_ex = f[tf.newaxis, :] strides = [1, 1, 2, 1] y = tf.nn.conv2d(x_padded[:, :, :, tf.newaxis], tf.cast(f_ex, x.dtype)[:, :, tf.newaxis, tf.newaxis], strides, 'VALID')[:, :, :, 0] return y
def log_p(self, data, num_importance_samples=10, summarize=True): unsquashed_data = (self.squash.inverse(data) - self.unsquashed_data_mean) log_prob = self.distribution.log_p( unsquashed_data, num_importance_samples=num_importance_samples, summarize=summarize) log_prob = (log_prob + self.squash.inverse_log_det_jacobian( data, event_ndims=tf.rank(data) - 1)) return log_prob
def _maybe_mask(m, seq_len_mask): """Mask the sequence with m.""" rank = m.get_shape().ndims rank = rank if rank is not None else tf.rank(m) extra_ones = tf.ones(rank - 2, dtype=tf.int32) m_batch_size = dimension_value(m.shape[0]) or tf.shape(m)[0] with tf.control_dependencies( [tf.assert_equal(seq_len_batch_size, m_batch_size, message="batch")]): seq_len_mask = tf.reshape( seq_len_mask, tf.concat((tf.shape(seq_len_mask), extra_ones), 0)) return m * seq_len_mask
def span_embedding(encoder_input_length, area_encodings, spans, hparams): """Computes the embedding for each span. (TODO: liyang): comment shapes.""" with tf.control_dependencies([tf.assert_equal(tf.rank(area_encodings), 3)]): area_indices = area_utils.area_range_to_index( area_range=tf.reshape(spans, [-1, 2]), length=encoder_input_length, max_area_width=hparams.max_span) return area_utils.batch_gather( area_encodings, tf.reshape(area_indices, [tf.shape(spans)[0], tf.shape(spans)[1]]))
def query_area(query, area_encodings, area_bias): """Predicts a range of tokens based on the query. Args: query: a Tensor of shape [batch_size, length, depth] area_encodings: a tensor in shape of [batch_size, num_areas, depth] area_bias: a tensor in shape of [batch_size, num_areas]. Returns: the logits to each area. """ with tf.control_dependencies([ tf.assert_equal(tf.rank(query), 3), tf.assert_equal(tf.rank(area_encodings), 3), tf.assert_equal(tf.shape(query)[-1], tf.shape(area_encodings)[-1]), tf.assert_equal(tf.rank(area_bias), 2) ]): dot_products = tf.matmul(query, tf.transpose(area_encodings, [0, 2, 1])) area_logits = dot_products + tf.expand_dims(area_bias, 1) return area_logits
def tf_shape_notlast(tensor, append_neg1=False, name=None): '''Gets shape of tensor but not including the last dimension. E.g. for shape (a, b, c, ..., y, z) returns (a, b, c, ..., y) If append_neg1, returns (a, b, c, ..., y, -1) >>> inp = tf.placeholder('float32') >>> tsn = tf_shape_notlast(inp) >>> sess.run(tsn, {inp: np.zeros((2,3,4,5))}) array([2, 3, 4], dtype=int32) >>> sess.run(tsn, {inp: np.zeros((2,3))}) array([2], dtype=int32) >>> sess.run(tsn, {inp: np.zeros((2,))}) array([], dtype=int32) >>> sess.run(tsn, {inp: 0}) array([], dtype=int32) >>> tsn = tf_shape_notlast(inp, append_neg1=True) >>> sess.run(tsn, {inp: np.zeros((2,3,4,5))}) array([2, 3, 4, -1], dtype=int32) >>> sess.run(tsn, {inp: np.zeros((2,3))}) array([2, -1], dtype=int32) >>> sess.run(tsn, {inp: np.zeros((2,))}) array([-1], dtype=int32) >>> sess.run(tsn, {inp: 0}) array([-1], dtype=int32) ''' if append_neg1: return tf.concat( 0, (tf.slice(tf.shape(tensor), [0], [tf.rank(tensor) - 1]), tf.expand_dims(-1, 0)), name=name) else: return tf.slice(tf.shape(tensor), [0], [tf.rank(tensor) - 1], name=name)
def tf_squeeze(sess): #a = tf.Variable([[[[1],[1],[1]], [[2],[2],[2]]]]) a = tf.Variable([1, 1, 1, 1, 1]) #shape=(bs,) init = tf.global_variables_initializer() sess.run(init) b = tf.squeeze(a) print("a.shape=", a.shape, ", tf.rank=", sess.run(tf.rank(a))) print("b.shape=", b.shape) print("a=\n", sess.run(a)) print("tf.squeeze(a)=\n", sess.run(b)) c = tf.expand_dims(b, 0) print("tf.expand_dims=\n", sess.run(c)) d = tf.tile(c, [5, 1]) print("tf.tile=\n", sess.run(d))
def meshgrid(x, y): """Tiles the contents of x and y into a pair of grids. Multidimensional analog of numpy.meshgrid, giving the same behavior if x and y are vectors. Generally, this will give: xgrid(i1, ..., i_m, j_1, ..., j_n) = x(j_1, ..., j_n) ygrid(i1, ..., i_m, j_1, ..., j_n) = y(i_1, ..., i_m) Keep in mind that the order of the arguments and outputs is reverse relative to the order of the indices they go into, done for compatibility with numpy. The output tensors have the same shapes. Specifically: xgrid.get_shape() = y.get_shape().concatenate(x.get_shape()) ygrid.get_shape() = y.get_shape().concatenate(x.get_shape()) Args: x: A tensor of arbitrary shape and rank. xgrid will contain these values varying in its last dimensions. y: A tensor of arbitrary shape and rank. ygrid will contain these values varying in its first dimensions. Returns: A tuple of tensors (xgrid, ygrid). """ with tf.name_scope('Meshgrid'): x = tf.convert_to_tensor(x) y = tf.convert_to_tensor(y) x_exp_shape = expanded_shape(tf.shape(x), 0, tf.rank(y)) y_exp_shape = expanded_shape(tf.shape(y), tf.rank(y), tf.rank(x)) xgrid = tf.tile(tf.reshape(x, x_exp_shape), y_exp_shape) ygrid = tf.tile(tf.reshape(y, y_exp_shape), x_exp_shape) new_shape = y.get_shape().concatenate(x.get_shape()) xgrid.set_shape(new_shape) ygrid.set_shape(new_shape) return xgrid, ygrid
def pad_to_bounding_box(image, offset_height, offset_width, target_height, target_width, pad_value): """Pads the given image with the given pad_value. Works like tf.image.pad_to_bounding_box, except it can pad the image with any given arbitrary pad value and also handle images whose sizes are not known during graph construction. Args: image: 3-D tensor with shape [height, width, channels] offset_height: Number of rows of zeros to add on top. offset_width: Number of columns of zeros to add on the left. target_height: Height of output image. target_width: Width of output image. pad_value: Value to pad the image tensor with. Returns: 3-D tensor of shape [target_height, target_width, channels]. Raises: ValueError: If the shape of image is incompatible with the offset_* or target_* arguments. """ image_rank = tf.rank(image) image_rank_assert = tf.Assert( tf.equal(image_rank, 3), ['Wrong image tensor rank [Expected] [Actual]', 3, image_rank]) with tf.control_dependencies([image_rank_assert]): image -= pad_value image_shape = tf.shape(image) height, width = image_shape[0], image_shape[1] target_width_assert = tf.Assert(tf.greater_equal(target_width, width), ['target_width must be >= width']) target_height_assert = tf.Assert(tf.greater_equal(target_height, height), ['target_height must be >= height']) with tf.control_dependencies([target_width_assert]): after_padding_width = target_width - offset_width - width with tf.control_dependencies([target_height_assert]): after_padding_height = target_height - offset_height - height offset_assert = tf.Assert( tf.logical_and(tf.greater_equal(after_padding_width, 0), tf.greater_equal(after_padding_height, 0)), ['target size not possible with the given target offsets']) height_params = tf.stack([offset_height, after_padding_height]) width_params = tf.stack([offset_width, after_padding_width]) channel_params = tf.stack([0, 0]) with tf.control_dependencies([offset_assert]): paddings = tf.stack([height_params, width_params, channel_params]) padded = tf.pad(image, paddings) return padded + pad_value
def decode_raw_image(contents, channels=0): '''Decodes an image, ensuring that the result is height x width x channels.''' image = tf.image.decode_image(contents, channels) # Note: GIFs are decoded with 4 dimensions [num_frames, height, width, 3] image = tf.cond( tf.equal(tf.rank(image), 4), lambda: image[0, :], # Extract first frame lambda: image) image_channel_shape = tf.shape(image)[2] image = tf.cond(tf.equal(image_channel_shape, 1), lambda: tf.image.grayscale_to_rgb(image), lambda: image) image.set_shape([None, None, 3]) return image
def _call_sampler(sample_n_fn, sample_shape, name=None): """Reshapes vector of samples.""" with tf.name_scope(name, "call_sampler", values=[sample_shape]): sample_shape = tf.convert_to_tensor( sample_shape, dtype=tf.int32, name="sample_shape") # Ensure sample_shape is a vector (vs just a scalar). pad = tf.cast(tf.equal(tf.rank(sample_shape), 0), tf.int32) sample_shape = tf.reshape( sample_shape, tf.pad(tf.shape(sample_shape), paddings=[[pad, 0]], constant_values=1)) samples = sample_n_fn(tf.reduce_prod(sample_shape)) batch_event_shape = tf.shape(samples)[1:] final_shape = tf.concat([sample_shape, batch_event_shape], 0) return tf.reshape(samples, final_shape)