def _kernel_constraint(self, kernel): """Radially constraints a kernel with shape (height, width, channels).""" padding = K.constant([[1, 1], [1, 1]], dtype='int32') kernel_shape = K.shape(kernel)[0] start = K.cast(kernel_shape / 2, 'int32') kernel_new = K.switch( K.cast(math_ops.floormod(kernel_shape, 2), 'bool'), lambda: kernel[start - 1:start, start - 1:start], lambda: kernel[start - 1:start, start - 1:start] + K.zeros( # pylint: disable=g-long-lambda (2, 2), dtype=kernel.dtype)) index = K.switch( K.cast(math_ops.floormod(kernel_shape, 2), 'bool'), lambda: K.constant(0, dtype='int32'), lambda: K.constant(1, dtype='int32')) while_condition = lambda index, *args: K.less(index, start) def body_fn(i, array): return i + 1, array_ops.pad( array, padding, constant_values=kernel[start + i, start + i]) _, kernel_new = control_flow_ops.while_loop( while_condition, body_fn, [index, kernel_new], shape_invariants=[index.get_shape(), tensor_shape.TensorShape([None, None])]) return kernel_new
def testFloorModBfloat16(self): nums, divs = self.floatTestData() tf_result = math_ops.floormod( math_ops.cast(nums, dtypes.bfloat16), math_ops.cast(divs, dtypes.bfloat16)) np_result = nums % divs self.assertAllEqual(tf_result, np_result)
def testConsistent(self): nums, divs = self.intTestData() with self.test_session(): tf_result = ( math_ops.floor_div(nums, divs) * divs + math_ops.floormod(nums, divs) ).eval() tf_nums = array_ops.constant(nums) tf_divs = array_ops.constant(divs) tf2_result = (tf_nums // tf_divs * tf_divs + tf_nums % tf_divs).eval() np_result = (nums // divs) * divs + (nums % divs) # consistentcy with numpy self.assertAllEqual(tf_result, np_result) # consistentcy with two forms of divide self.assertAllEqual(tf_result, tf2_result) # consistency for truncation form tf3_result = ( math_ops.truncatediv(nums, divs) * divs + math_ops.truncatemod(nums, divs) ).eval() expanded_nums = np.reshape(np.tile(nums, divs.shape[1]), (nums.shape[0], divs.shape[1])) # Consistent with desire to get numerator self.assertAllEqual(tf3_result, expanded_nums) # Consistent with desire to get numerator self.assertAllEqual(tf_result, expanded_nums)
def testFloorModInt(self): nums, divs = self.intTestData() # TODO(aselle): Change test to use % after switch # tf_result = math_ops.floor_mod(nums, divs) tf_result = math_ops.floormod(nums, divs) np_result = nums % divs self.assertAllEqual(tf_result, np_result)
def testFloorModInt(self): nums, divs = self.intTestData() with self.test_session(): # TODO (aselle): Change test to use % after switch id:3425 gh:3426 # tf_result = math_ops.floor_mod(nums, divs).eval() tf_result = math_ops.floormod(nums, divs).eval() np_result = nums % divs self.assertAllEqual(tf_result, np_result)
def testFloorModInt(self): nums, divs = self.intTestData() with self.test_session(): # TODO(aselle): Change test to use % after switch # tf_result = math_ops.floor_mod(nums, divs).eval() tf_result = math_ops.floormod(nums, divs).eval() np_result = nums % divs self.assertAllEqual(tf_result, np_result)
def testFloorModFloat(self): nums, divs = self.floatTestData() for dtype in [np.float16, np.float32, np.float64]: x = nums.astype(dtype) y = divs.astype(dtype) tf_result = math_ops.floormod(x, y) np_result = x % y self.assertAllEqual(tf_result, np_result) tf2_result = (array_ops.constant(x) % array_ops.constant(y)) self.assertAllEqual(tf2_result, tf_result)
def testFloorModGradient(self): # Making sure the input is not near the discontinuity point where # x/y == floor(x/y) ns = constant_op.constant([17.], dtype=dtypes.float32) inputs = constant_op.constant([131.], dtype=dtypes.float32) floor_mod = math_ops.floormod(inputs, ns) with self.cached_session(): error = gradient_checker.compute_gradient_error(inputs, [1], floor_mod, [1]) self.assertLess(error, 1e-4)
def testFloorDivModIntEdges(self): for dtype in [np.int32, np.int64]: x, y = self.intEdgeTestData(dtype) tf_floor_div = math_ops.floor_div(x, y) np_floor_div = self.numpySafeFloorDivInt(x, y) self.assertAllEqual(tf_floor_div, np_floor_div) tf_floor_mod = math_ops.floormod(x, y) np_floor_mod = self.numpySafeFloorModInt(x, y) self.assertAllEqual(tf_floor_mod, np_floor_mod) z = math_ops.add(math_ops.multiply(tf_floor_div, y), tf_floor_mod) # x = floor_div(x, y) * y + floor_mod(x, y) self.assertAllEqual(z, np.broadcast_to(x, z.shape))
def _replace_oov_buckets(self, inputs, lookups): """Replace the default OOV value with one of the OOV bucket values.""" if self.oov_tokens is None: return lookups num_oov_elements = self.oov_tokens.shape.num_elements() if inputs.dtype.is_integer: oov_indices = math_ops.floormod(inputs, num_oov_elements) else: oov_indices = string_ops.string_to_hash_bucket_fast( inputs, num_buckets=num_oov_elements) oov_values = array_ops.gather(self.oov_tokens, oov_indices) oov_locations = math_ops.equal(lookups, self.table._default_value) # pylint: disable=protected-access return array_ops.where(oov_locations, oov_values, lookups)
def testConsistent(self): nums, divs = self.intTestData() tf_result = (math_ops.floor_div(nums, divs) * divs + math_ops.floormod(nums, divs)) tf_nums = array_ops.constant(nums) tf_divs = array_ops.constant(divs) tf2_result = (tf_nums // tf_divs * tf_divs + tf_nums % tf_divs) np_result = (nums // divs) * divs + (nums % divs) # Consistent with numpy self.assertAllEqual(tf_result, np_result) # Consistent with two forms of divide self.assertAllEqual(tf_result, tf2_result) # consistency for truncation form tf3_result = (math_ops.truncatediv(nums, divs) * divs + math_ops.truncatemod(nums, divs)) expanded_nums = np.reshape(np.tile(nums, divs.shape[1]), (nums.shape[0], divs.shape[1])) # Consistent with desire to get numerator self.assertAllEqual(tf3_result, expanded_nums) # Consistent with desire to get numerator self.assertAllEqual(tf_result, expanded_nums)
def testFloorModFloat(self): nums, divs = self.floatTestData() with self.test_session(): tf_result = math_ops.floormod(nums, divs).eval() np_result = nums % divs self.assertAllEqual(tf_result, np_result)
def _finish(self, state): var_dtype = self._variables[0].dtype.base_dtype # Update global step. global_step = self._get_global_step(state) update_global_step = state_ops.assign_add(global_step, 1.) # Update the first moment estimate. beta1 = state.get_hyper("beta1", dtype=var_dtype) moment1 = self._get_moment1(state) flat_grad = self._get_flat_grad(state) # moment1_t := beta1 * moment1_{t-1} + (1 - beta1) * flat_grad_t update_moment1 = moment1.assign(beta1 * moment1 + (1. - beta1) * flat_grad) # Update the gradient buffer. window = state.get_hyper("window") grad_buffer = self._get_grad_buffer(state) next_grad_index = math_ops.floormod( math_ops.to_int32(update_global_step - 1.), window) # grad_buffer[(t-1) % window] := moment1_t update_grad_buffer = state_ops.scatter_update(grad_buffer, next_grad_index, update_moment1) # Compute the update step. eps = state.get_hyper("eps", dtype=var_dtype) svd_eps = state.get_hyper("svd_eps", dtype=var_dtype) sigma_eps = state.get_hyper("sigma_eps", dtype=var_dtype) lr = state.get_hyper("lr", dtype=var_dtype) denom = math_ops.sqrt( math_ops.minimum( ops.convert_to_tensor(update_global_step), ops.convert_to_tensor(math_ops.cast(window, dtype=var_dtype)))) moment1_2d = array_ops.expand_dims(update_moment1, -1) # m = grad_buffer^T / sqrt(min(t, window)) # m has shape [model dimension, window], where model dimension is the sum # of the dimensions of the flattened variables. m = array_ops.transpose(math_ops.divide(update_grad_buffer, denom)) # sigma, u, _ = SVD(m^Tm + I * svd_eps) mm = math_ops.matmul(m, m, transpose_a=True) damping = math_ops.cast(linalg_ops.eye(window), dtype=var_dtype) * svd_eps sigma, u, _ = linalg_ops.svd(mm + damping) sigma_sqrt = math_ops.sqrt(sigma) sigma_sqrt_min = math_ops.reduce_min(sigma_sqrt) # sigma_sqrt_inv = 1 / (\sqrt{sigma} + sigma_eps) ^ 3 # We add sigma_eps to alleviate numerical instability. # Note that (m^Tm)^(-3/2) = u diag(sigma_sqrt_inv) u^T. sigma_sqrt_inv = math_ops.divide( math_ops.cast(1.0, dtype=var_dtype), math_ops.pow(sigma_sqrt + sigma_eps, 3)) # In full matrix AdaGrad, the update step computes (mm^T)^(-1/2)g, where the # inversion of a model dimension by model dimension matrix is needed. To # speed up this computation we calculate the following instead: # m(m^Tm)^(-3/2)m^T moment1 = m u diag(sigma_sqrt_inv) u^T m^T moment1. new_step = array_ops.expand_dims( array_ops.zeros(flat_grad.get_shape(), dtype=var_dtype), -1) head = math_ops.matmul( m, math_ops.matmul( u, math_ops.matmul( array_ops.diag(sigma_sqrt_inv), math_ops.matmul( u, math_ops.matmul(m, moment1_2d, transpose_a=True), transpose_a=True)))) # When inverting (mm^t)^(1/2), we also add epsilon * I regularization for # degenerate cases. We expand ((mm^t)^(1/2) + epsilon * I)^(-1) using # Woodbury's identity. # For full derivation please see paper at # https://arxiv.org/pdf/1806.02958.pdf tail = moment1_2d - math_ops.matmul( m, math_ops.matmul( u, math_ops.matmul( array_ops.diag( math_ops.divide(math_ops.cast(1.0, dtype=var_dtype), sigma)), math_ops.matmul( u, math_ops.matmul(m, moment1_2d, transpose_a=True), transpose_a=True)))) scaled_tail = math_ops.divide(tail, sigma_sqrt_min) update_new_step = control_flow_ops.cond( sigma_sqrt_min > eps, lambda: math_ops.add(head, scaled_tail), lambda: math_ops.add(new_step, head)) # Update each variable. update_step = [] for var in self._variables: dim = self.shape_dict[var.name] start_index = self.index_dict[var.name] end_index = start_index + dim var_update_correct_shape = array_ops.reshape( update_new_step[start_index:end_index], var.get_shape()) var_updated = state_ops.assign_sub(var, lr * var_update_correct_shape) update_step.append(var_updated) return control_flow_ops.group(update_step)
def _finish(self, state): var_dtype = self._variables[0].dtype.base_dtype # Update global step. global_step = self._get_global_step(state) update_global_step = state_ops.assign_add(global_step, 1.) # Update the first moment estimate. beta1 = state.get_hyper("beta1", dtype=var_dtype) moment1 = self._get_moment1(state) flat_grad = self._get_flat_grad(state) # moment1_t := beta1 * moment1_{t-1} + (1 - beta1) * flat_grad_t update_moment1 = moment1.assign(beta1 * moment1 + (1. - beta1) * flat_grad) # Update the gradient buffer. window = state.get_hyper("window") grad_buffer = self._get_grad_buffer(state) next_grad_index = math_ops.floormod( math_ops.to_int32(update_global_step - 1.), window) # grad_buffer[(t-1) % window] := moment1_t update_grad_buffer = state_ops.scatter_update(grad_buffer, next_grad_index, update_moment1) # Compute the update step. eps = state.get_hyper("eps", dtype=var_dtype) svd_eps = state.get_hyper("svd_eps", dtype=var_dtype) sigma_eps = state.get_hyper("sigma_eps", dtype=var_dtype) lr = state.get_hyper("lr", dtype=var_dtype) denom = math_ops.sqrt( math_ops.minimum( ops.convert_to_tensor(update_global_step), ops.convert_to_tensor(math_ops.cast(window, dtype=var_dtype)))) moment1_2d = array_ops.expand_dims(update_moment1, -1) # m = grad_buffer^T / sqrt(min(t, window)) # m has shape [model dimension, window], where model dimension is the sum # of the dimensions of the flattened variables. m = array_ops.transpose(math_ops.divide(update_grad_buffer, denom)) # sigma, u, _ = SVD(m^Tm + I * svd_eps) mm = math_ops.matmul(m, m, transpose_a=True) damping = math_ops.cast(linalg_ops.eye(window), dtype=var_dtype) * svd_eps sigma, u, _ = linalg_ops.svd(mm + damping) sigma_sqrt = math_ops.sqrt(sigma) sigma_sqrt_min = math_ops.reduce_min(sigma_sqrt) # sigma_sqrt_inv = 1 / (\sqrt{sigma} + sigma_eps) ^ 3 # We add sigma_eps to alleviate numerical instability. # Note that (m^Tm)^(-3/2) = u diag(sigma_sqrt_inv) u^T. sigma_sqrt_inv = math_ops.divide( math_ops.cast(1.0, dtype=var_dtype), math_ops.pow(sigma_sqrt + sigma_eps, 3)) # In full matrix AdaGrad, the update step computes (mm^T)^(-1/2)g, where the # inversion of a model dimension by model dimension matrix is needed. To # speed up this computation we calculate the following instead: # m(m^Tm)^(-3/2)m^T moment1 = m u diag(sigma_sqrt_inv) u^T m^T moment1. new_step = array_ops.expand_dims( array_ops.zeros(flat_grad.get_shape(), dtype=var_dtype), -1) head = math_ops.matmul( m, math_ops.matmul( u, math_ops.matmul( array_ops.diag(sigma_sqrt_inv), math_ops.matmul(u, math_ops.matmul(m, moment1_2d, transpose_a=True), transpose_a=True)))) # When inverting (mm^t)^(1/2), we also add epsilon * I regularization for # degenerate cases. We expand ((mm^t)^(1/2) + epsilon * I)^(-1) using # Woodbury's identity. # For full derivation please see paper at # https://arxiv.org/pdf/1806.02958.pdf tail = moment1_2d - math_ops.matmul( m, math_ops.matmul( u, math_ops.matmul( array_ops.diag( math_ops.divide(math_ops.cast(1.0, dtype=var_dtype), sigma)), math_ops.matmul(u, math_ops.matmul( m, moment1_2d, transpose_a=True), transpose_a=True)))) scaled_tail = math_ops.divide(tail, sigma_sqrt_min) update_new_step = control_flow_ops.cond( sigma_sqrt_min > eps, lambda: math_ops.add(head, scaled_tail), lambda: math_ops.add(new_step, head)) # Update each variable. update_step = [] for var in self._variables: dim = self.shape_dict[var.name] start_index = self.index_dict[var.name] end_index = start_index + dim var_update_correct_shape = array_ops.reshape( update_new_step[start_index:end_index], var.get_shape()) var_updated = state_ops.assign_sub(var, lr * var_update_correct_shape) update_step.append(var_updated) return control_flow_ops.group(update_step)
def testFloorModFloat(self): nums, divs = self.floatTestData() with self.cached_session(): tf_result = math_ops.floormod(nums, divs).eval() np_result = nums % divs self.assertAllEqual(tf_result, np_result)
def __mod__(self, other): return math_ops.floormod(self, other)
def testFloorModFloat(self): nums, divs = self.floatTestData() tf_result = math_ops.floormod(nums, divs) np_result = nums % divs self.assertAllEqual(tf_result, np_result)
def __rmod__(self, other): return math_ops.floormod(other, self)
def __rmod__(self, other): return math_ops.floormod(other, self)
def __mod__(self, other): return math_ops.floormod(self, other)