def testVonMisesSampleMoments(self): locs_v = np.array([-1., 0.3, 2.3]) concentrations_v = np.array([1.0, 2.0, 10.0]) von_mises = tfd.VonMises(self.make_tensor(locs_v), self.make_tensor(concentrations_v), validate_args=True) n = 10000 seed = test_util.test_seed() samples = von_mises.sample(n, seed=seed) expected_mean = von_mises.mean() actual_mean = tf.atan2(tf.reduce_mean(tf.sin(samples), axis=0), tf.reduce_mean(tf.cos(samples), axis=0)) expected_variance = von_mises.variance() standardized_samples = samples - tf.expand_dims(von_mises.mean(), 0) actual_variance = 1. - tf.reduce_mean(tf.cos(standardized_samples), axis=0) [ expected_mean_val, expected_variance_val, actual_mean_val, actual_variance_val ] = self.evaluate( [expected_mean, expected_variance, actual_mean, actual_variance]) self.assertAllClose(expected_mean_val, actual_mean_val, rtol=0.1) self.assertAllClose(expected_variance_val, actual_variance_val, rtol=0.1)
def testVonMisesSampleMoments(self): locs_v = np.array([-1., 0.3, 2.3]) concentrations_v = np.array([1., 2., 10.]) von_mises = tfd.VonMises(self.make_tensor(locs_v), self.make_tensor(concentrations_v), validate_args=True) n = 10000 seed = test_util.test_seed() samples = von_mises.sample(n, seed=seed) expected_mean = von_mises.mean() actual_mean = tf.atan2(tf.reduce_mean(tf.sin(samples), axis=0), tf.reduce_mean(tf.cos(samples), axis=0)) expected_variance = von_mises.variance() standardized_samples = samples - tf.expand_dims(von_mises.mean(), 0) variance_samples = 1. - tf.cos(standardized_samples) [ expected_mean_val, expected_variance_val, actual_mean_val, variance_samples_ ] = self.evaluate( [expected_mean, expected_variance, actual_mean, variance_samples]) # TODO(axch, cgs): atan2(means) is not mean(atan2), but maybe there # is a formulation of what this is testing that does use IID samples # and is amenable to assertAllMeansClose? self.assertAllClose(actual_mean_val, expected_mean_val, rtol=0.1) self.assertAllMeansClose(variance_samples_, expected_variance_val, axis=0, rtol=0.1)
def augment(points_xyz, points_mask, bboxes): """data augmentation.""" rand = tf.random.uniform([], minval=-1.0, maxval=1.0, dtype=tf.dtypes.float32) rand = tf.where(rand > 0, 1, -1) rand = tf.cast(rand, tf.dtypes.float32) points_xyz = tf.concat([points_xyz[:, 0:1], points_xyz[:, 1:2] * rand, points_xyz[:, 2:]], axis=-1) bboxes = tf.concat([bboxes[:, 0:1], bboxes[:, 1:2] * rand, bboxes[:, 2:6], bboxes[:, 6:] * rand], axis=-1) theta = tf.random.uniform([], minval=-1, maxval=1, dtype=tf.dtypes.float32) * np.pi / 4.0 rz = tf.stack([tf.cos(theta), tf.sin(theta), 0, -tf.sin(theta), tf.cos(theta), 0, 0, 0, 1]) rz = tf.reshape(rz, [3, 3]) points_xyz = tf.matmul(points_xyz, rz) theta = tf.reshape(theta, [1, 1]) bboxes = tf.concat( [tf.matmul(bboxes[:, 0:3], rz), bboxes[:, 3:6], tf_util.wrap_angle_rad(bboxes[:, 6:] + theta, -np.pi, np.pi)], axis=-1) jitter = tf.random.normal(points_xyz.shape, 0.0, 0.02) points_xyz = points_xyz + jitter return points_xyz, points_mask, bboxes
def func(batch_of_x0, batch_of_x1): """Function that does something different for batch 0 and batch 1.""" # batch_0_result.shape = [..., 2]. x0, x1 = batch_of_x0[0, ...], batch_of_x1[0, ...] batch_0_result = tf.stack([tf.sin(x0 * x1), tf.cos(x0 * x1)], axis=-1) x0, x1 = batch_of_x0[1, ...], batch_of_x1[1, ...] batch_1_result = tf.stack([tf.sin(2 * x0), tf.cos(2 * x1)], axis=-1) return tf.stack([batch_0_result, batch_1_result], axis=0)
def test_matrices_from_component(self): num_timesteps = 4 drift_scale = 1.23 period = 12 frequency_multipliers = [1, 3] component = SmoothSeasonal(period=period, frequency_multipliers=frequency_multipliers) ssm = component.make_state_space_model(num_timesteps, [drift_scale]) frequency_0 = 2 * np.pi * frequency_multipliers[0] / period frequency_1 = 2 * np.pi * frequency_multipliers[1] / period first_frequency_transition = tf.linalg.LinearOperatorFullMatrix( [[tf.cos(frequency_0), tf.sin(frequency_0)], [-tf.sin(frequency_0), tf.cos(frequency_0)]]) second_frequency_transition = tf.linalg.LinearOperatorFullMatrix( [[tf.cos(frequency_1), tf.sin(frequency_1)], [-tf.sin(frequency_1), tf.cos(frequency_1)]]) latents_transition = self.evaluate( tf.linalg.LinearOperatorBlockDiag( [first_frequency_transition, second_frequency_transition]).to_dense()) for t in range(num_timesteps): observation_matrix = self.evaluate( ssm.get_observation_matrix_for_timestep(t).to_dense()) self.assertAllClose([[1.0, 0.0, 1.0, 0.0]], observation_matrix) observation_noise_mean = self.evaluate( ssm.get_observation_noise_for_timestep(t).mean()) observation_noise_covariance = self.evaluate( ssm.get_observation_noise_for_timestep(t).covariance()) self.assertAllClose([0.0], observation_noise_mean) self.assertAllClose([[0.0]], observation_noise_covariance) transition_matrix = self.evaluate( ssm.get_transition_matrix_for_timestep(t).to_dense()) self.assertAllClose(latents_transition, transition_matrix) transition_noise_mean = self.evaluate( ssm.get_transition_noise_for_timestep(t).mean()) transition_noise_covariance = self.evaluate( ssm.get_transition_noise_for_timestep(t).covariance()) self.assertAllClose(np.zeros([4]), transition_noise_mean) self.assertAllClose( np.square(drift_scale) * np.eye(4), transition_noise_covariance)
def call(self, inputs): inputs = tf.convert_to_tensor(inputs, dtype=self.dtype) inputs = tf.cast(inputs, tf.float32) kernel = (1.0 / self.kernel_scale) * self.unscaled_kernel outputs = tf.raw_ops.MatMul(a=inputs, b=kernel) outputs = tf.nn.bias_add(outputs, self.bias) return tf.cos(outputs)
def __call__(self, step): with tf.name_scope(self.name or "NoisyLinearCosineDecay") as name: initial_learning_rate = tf.convert_to_tensor( self.initial_learning_rate, name="initial_learning_rate") dtype = initial_learning_rate.dtype decay_steps = tf.cast(self.decay_steps, dtype) initial_variance = tf.cast(self.initial_variance, dtype) variance_decay = tf.cast(self.variance_decay, dtype) num_periods = tf.cast(self.num_periods, dtype) alpha = tf.cast(self.alpha, dtype) beta = tf.cast(self.beta, dtype) global_step_recomp = tf.cast(step, dtype) global_step_recomp = tf.minimum(global_step_recomp, decay_steps) linear_decayed = (decay_steps - global_step_recomp) / decay_steps variance = initial_variance / (tf.pow(1.0 + global_step_recomp, variance_decay)) std = tf.sqrt(variance) noisy_linear_decayed = ( linear_decayed + tf.random.normal(linear_decayed.shape, stddev=std)) completed_fraction = global_step_recomp / decay_steps fraction = 2.0 * num_periods * completed_fraction cosine_decayed = 0.5 * (1.0 + tf.cos(tf.constant(math.pi) * fraction)) noisy_linear_cosine_decayed = ( (alpha + noisy_linear_decayed) * cosine_decayed + beta) return tf.multiply(initial_learning_rate, noisy_linear_cosine_decayed, name=name)
def __call__(self, step): with tf.name_scope(self.name or "LinearCosineDecay") as name: initial_learning_rate = tf.convert_to_tensor( self.initial_learning_rate, name="initial_learning_rate" ) dtype = initial_learning_rate.dtype decay_steps = tf.cast(self.decay_steps, dtype) num_periods = tf.cast(self.num_periods, dtype) alpha = tf.cast(self.alpha, dtype) beta = tf.cast(self.beta, dtype) global_step_recomp = tf.cast(step, dtype) global_step_recomp = tf.minimum(global_step_recomp, decay_steps) linear_decayed = (decay_steps - global_step_recomp) / decay_steps completed_fraction = global_step_recomp / decay_steps fraction = 2.0 * num_periods * completed_fraction cosine_decayed = 0.5 * ( 1.0 + tf.cos(tf.constant(math.pi, dtype=dtype) * fraction) ) linear_cosine_decayed = ( alpha + linear_decayed ) * cosine_decayed + beta return tf.multiply( initial_learning_rate, linear_cosine_decayed, name=name )
def test_basic_statistics_no_latent_variance_one_frequency(self): # fix the latent variables at the value 1 so the results are deterministic num_timesteps = 10 period = 42 frequency_multipliers = [3] drift_scale = 0. initial_state_loc = self._build_placeholder(np.ones([2])) initial_state_scale = tf.zeros_like(initial_state_loc) initial_state_prior = tfd.MultivariateNormalDiag( loc=initial_state_loc, scale_diag=initial_state_scale) ssm = SmoothSeasonalStateSpaceModel( num_timesteps=num_timesteps, period=period, frequency_multipliers=frequency_multipliers, drift_scale=drift_scale, initial_state_prior=initial_state_prior) two_pi = 6.283185307179586 sine_terms = tf.sin(two_pi * 3 * tf.range( 0, num_timesteps, dtype=tf.float32) / 42) cosine_terms = tf.cos(two_pi * 3 * tf.range( 0, num_timesteps, dtype=tf.float32) / 42) predicted_time_series_ = self.evaluate( (sine_terms + cosine_terms)[..., tf.newaxis]) self.assertAllClose(self.evaluate(ssm.mean()), predicted_time_series_) self.assertAllClose(*self.evaluate((ssm.stddev(), tf.zeros_like(predicted_time_series_))))
def _rotate_on_ellipse(state_parts, vectors, angle): new_state_parts = [] padded_angle = _right_pad_with_ones(angle, tf.rank(state_parts[0])) for state, vector in zip(state_parts, vectors): new_state_parts.append(state * tf.cos(padded_angle) + vector * tf.sin(padded_angle)) return new_state_parts
def _kl_von_mises_von_mises(d1, d2, name=None): """Batchwise KL divergence KL(d1 || d2) with d1 and d2 von Mises. Args: d1: instance of a von Mises distribution object. d2: instance of a a von Mises distribution object. name: (optional) Name to use for created operations. default is "kl_von_mises_von_mises". Returns: Batchwise KL(d1 || d2) """ with tf.name_scope(name or 'kl_von_mises_von_mises'): # The density of von Mises is (abbreviating the concentration for conc): # vonMises(x; loc, conc) = exp(conc cos(x - loc)) / (2 pi I_0 (conc) ) # We need two properties: # 1. Standardization: if z ~ vonMises(0, conc), then # z + loc ~ vonMises(loc, conc). # 2. Expectation of cosine: # E_q(z | 0, conc) cos z = I_1 (conc) / I_0 (conc) # Now, # KL(d1 || d2) # = E_vonMises(x; loc1, conc1) log vonMises(x; loc1, conc1) # / vonMises(x; loc2, conc2) # Plugging the densities and rearranging, we have # log I_0(conc2) / I_0(conc1) # + E_vonMises(x; loc1, conc1) [ conc1 cos (z - loc1) # - conc2 cos (z - loc2) ] # Let's transform the second term using the standardization property: # E_vonMises(x; 0, conc1) [conc1 cos z - conc2 cos (z - (loc2 - loc1))] # Applying the cos (x - y) = cos x cos y + sin x sin y expansion, we get # E_vonMises(x; 0, conc1) [conc1 cos z - conc2 cos (loc2 - loc1) cos z # - conc2 sin(loc2 - loc1) sin z] # Because the distribution is symmetric around zero, the last term vanishes # in expectation. The remaining two terms are computed using the # "expectation of cosine" property: # (conc1 - conc2 cos (loc2 - loc1) E_vonMises(x; 0, conc1) cos z # = (conc1 - conc2 cos (loc2 - loc1)) I_1(conc1) / I_0(conc1) # In total, we have # KL(d1 || d2) = log I_0(conc2) / I_0(conc1) # + (conc1 - conc2 cos (loc2 - loc1)) I_1(conc1) / I_0(conc1) # To improve the numerical stability, we can replace I_j(k) functions with # the exponentially scaled versions using the equality # I_j(k) = I_j^E(k) exp(k) (which holds for k >= 0): # KL(d1 || d2) = (conc2 - conc1) + log I_0^E(conc2) / I_0^E(conc1) # + (conc1 - conc2 cos (loc2 - loc1)) I_1^E(conc1) / I_0^E(conc1) # Note that this formula is numerically stable for conc1 = 0 and/or # conc2 = 0 because I_0 (0) = I_0^E (0) = 1. concentration1 = tf.convert_to_tensor(d1.concentration) concentration2 = tf.convert_to_tensor(d2.concentration) i0e_concentration1 = tf.math.bessel_i0e(concentration1) i1e_concentration1 = tf.math.bessel_i1e(concentration1) i0e_concentration2 = tf.math.bessel_i0e(concentration2) return ((concentration2 - concentration1) + tf.math.log(i0e_concentration2 / i0e_concentration1) + (concentration1 - concentration2 * tf.cos(d1.loc - d2.loc)) * (i1e_concentration1 / i0e_concentration1))
def __call__(self, step): with tf.name_scope(self.name or "SGDRDecay") as name: initial_learning_rate = tf.convert_to_tensor( self.initial_learning_rate, name="initial_learning_rate" ) dtype = initial_learning_rate.dtype first_decay_steps = tf.cast(self.first_decay_steps, dtype) alpha = tf.cast(self.alpha, dtype) t_mul = tf.cast(self._t_mul, dtype) m_mul = tf.cast(self._m_mul, dtype) global_step_recomp = tf.cast(step, dtype) completed_fraction = global_step_recomp / first_decay_steps def compute_step(completed_fraction, geometric=False): """Helper for `cond` operation.""" if geometric: i_restart = tf.floor( tf.math.log(1.0 - completed_fraction * (1.0 - t_mul)) / tf.math.log(t_mul) ) sum_r = (1.0 - t_mul**i_restart) / (1.0 - t_mul) completed_fraction = ( completed_fraction - sum_r ) / t_mul**i_restart else: i_restart = tf.floor(completed_fraction) completed_fraction -= i_restart return i_restart, completed_fraction i_restart, completed_fraction = tf.cond( tf.equal(t_mul, 1.0), lambda: compute_step(completed_fraction, geometric=False), lambda: compute_step(completed_fraction, geometric=True), ) m_fac = m_mul**i_restart cosine_decayed = ( 0.5 * m_fac * ( 1.0 + tf.cos( tf.constant(math.pi, dtype=dtype) * completed_fraction ) ) ) decayed = (1 - alpha) * cosine_decayed + alpha return tf.multiply(initial_learning_rate, decayed, name=name)
def get_rotation_matrix(angles, image_height, image_width, name=None): """Returns projective transform(s) for the given angle(s). Args: angles: A scalar angle to rotate all images by, or (for batches of images) a vector with an angle to rotate each image in the batch. The rank must be statically known (the shape is not `TensorShape(None)`). image_height: Height of the image(s) to be transformed. image_width: Width of the image(s) to be transformed. name: The name of the op. Returns: A tensor of shape (num_images, 8). Projective transforms which can be given to operation `image_projective_transform_v2`. If one row of transforms is [a0, a1, a2, b0, b1, b2, c0, c1], then it maps the *output* point `(x, y)` to a transformed *input* point `(x', y') = ((a0 x + a1 y + a2) / k, (b0 x + b1 y + b2) / k)`, where `k = c0 x + c1 y + 1`. """ with backend.name_scope(name or 'rotation_matrix'): x_offset = ((image_width - 1) - (tf.cos(angles) * (image_width - 1) - tf.sin(angles) * (image_height - 1))) / 2.0 y_offset = ((image_height - 1) - (tf.sin(angles) * (image_width - 1) + tf.cos(angles) * (image_height - 1))) / 2.0 num_angles = tf.compat.v1.shape(angles)[0] return tf.concat( values=[ tf.cos(angles)[:, None], -tf.sin(angles)[:, None], x_offset[:, None], tf.sin(angles)[:, None], tf.cos(angles)[:, None], y_offset[:, None], tf.zeros((num_angles, 2), tf.float32), ], axis=1)
def __call__(self, global_step): global_step = tf.cast(global_step, dtype=tf.float32) warmup_lr = self._params.warmup_learning_rate warmup_steps = self._params.warmup_steps init_lr = self._params.init_learning_rate total_steps = self._params.total_steps linear_warmup = ( warmup_lr + global_step / warmup_steps * (init_lr - warmup_lr)) cosine_learning_rate = ( init_lr * (tf.cos(np.pi * (global_step - warmup_steps) / (total_steps - warmup_steps)) + 1.0) / 2.0) learning_rate = tf.where(global_step < warmup_steps, linear_warmup, cosine_learning_rate) return learning_rate
def __call__(self, step): with tf.name_scope(self.name or "CosineDecay"): initial_learning_rate = tf.convert_to_tensor( self.initial_learning_rate, name="initial_learning_rate") dtype = initial_learning_rate.dtype decay_steps = tf.cast(self.decay_steps, dtype) global_step_recomp = tf.cast(step, dtype) global_step_recomp = tf.minimum(global_step_recomp, decay_steps) completed_fraction = global_step_recomp / decay_steps cosine_decayed = 0.5 * ( 1.0 + tf.cos(tf.constant(math.pi) * completed_fraction)) decayed = (1 - self.alpha) * cosine_decayed + self.alpha return tf.multiply(initial_learning_rate, decayed)
def grad(dy): """The gradient of the von Mises samples w.r.t. concentration.""" broadcast_concentration = concentration + tf.zeros_like(x) _, dcdf_dconcentration = value_and_gradient( lambda conc: von_mises_cdf(x, conc), broadcast_concentration) inv_prob = tf.exp(-broadcast_concentration * (tf.cos(x) - 1.)) * ( (2. * np.pi) * tf.math.bessel_i0e(broadcast_concentration)) # Compute the implicit reparameterization gradient [2], # dz/dconc = -(dF(z; conc) / dconc) / p(z; conc) ret = dy * (-inv_prob * dcdf_dconcentration) # Sum over the sample dimensions. Assume that they are always the first # ones. num_sample_dimensions = (tf.rank(broadcast_concentration) - tf.rank(concentration)) return tf.reduce_sum(ret, axis=tf.range(num_sample_dimensions))
def loop_body(done, u, w): """Resample the non-accepted points.""" # We resample u each time completely. Only its sign is used outside the # loop, which is random. u = tf.random.uniform( shape, minval=-1., maxval=1., dtype=dtype, seed=seed()) z = tf.cos(np.pi * u) # Update the non-accepted points. w = tf.where(done, w, (1. + s * z) / (s + z)) y = concentration * (s - w) v = tf.random.uniform( shape, minval=0., maxval=1., dtype=dtype, seed=seed()) accept = (y * (2. - y) >= v) | (tf.math.log(y / v) + 1. >= y) return done | accept, u, w
def _process_step_num(self, single_input, max_step): if self._step_encoding == 'one_hot': return tf.one_hot(single_input, max_step + 1) if self._step_encoding == 'sinusoid': i = tf.range(self._d_step_emb, dtype=tf.float32)[tf.newaxis, :] step_num = tf.cast(single_input, tf.float32)[:, tf.newaxis] rads = step_num / tf.math.pow( 1.0e4, 2 * (i // 2) / tf.cast(self._d_step_emb, tf.float32)) return tf.concat([tf.sin(rads[:, 0::2]), tf.cos(rads[:, 1::2])], axis=-1) if self._step_encoding == 'learned': return self._step_embedding_layer( tf.one_hot(single_input, max_step + 1)) raise ValueError( 'Step encoding must be one of ["one_hot, "sinusoid", "learned"].')
def testVonMisesSampleVarianceUniform(self): von_mises = tfd.VonMises( self.make_tensor(1.), self.make_tensor(0.), validate_args=True) n = 10000 samples = von_mises.sample(n, seed=test_util.test_seed()) # For circular uniform distribution, the mean is not well-defined, # so only checking the variance. expected_variance = 1. standardized_samples = samples - tf.expand_dims(von_mises.mean(), 0) actual_variance = 1. - tf.reduce_mean( tf.cos(standardized_samples), axis=0) self.assertAllClose( expected_variance, self.evaluate(actual_variance), rtol=0.1)
def loop_body(done, u_in, w, seed): """Resample the non-accepted points.""" # We resample u each time completely. Only its sign is used outside the # loop, which is random. u_seed, v_seed, next_seed = samplers.split_seed(seed, n=3) u = samplers.uniform( shape, minval=-1., maxval=1., dtype=concentration.dtype, seed=u_seed) tensorshape_util.set_shape(u, u_in.shape) z = tf.cos(np.pi * u) # Update the non-accepted points. w = tf.where(done, w, (1. + s * z) / (s + z)) y = concentration * (s - w) v = samplers.uniform( shape, minval=0., maxval=1., dtype=concentration.dtype, seed=v_seed) accept = (y * (2. - y) >= v) | (tf.math.log(y / v) + 1. >= y) return done | accept, u, w, next_seed
def _von_mises_sample_bwd(_, aux, dy): """The gradient of the von Mises samples w.r.t. concentration.""" concentration, samples = aux broadcast_concentration = tf.broadcast_to(concentration, ps.shape(samples)) _, dcdf_dconcentration = value_and_gradient( lambda conc: von_mises_cdf(samples, conc), broadcast_concentration) inv_prob = tf.exp(-broadcast_concentration * (tf.cos(samples) - 1.)) * ( (2. * np.pi) * tf.math.bessel_i0e(broadcast_concentration)) # Compute the implicit reparameterization gradient [2], # dz/dconc = -(dF(z; conc) / dconc) / p(z; conc) ret = dy * (-dcdf_dconcentration * inv_prob) # Sum over the sample dimensions. Assume that they are always the first # ones. num_sample_dimensions = (tf.rank(broadcast_concentration) - tf.rank(concentration)) # None gradients for seed return tf.reduce_sum(ret, axis=tf.range(num_sample_dimensions)), None
def rastrigin(x): """The value and gradient of the Rastrigin function. The Rastrigin function is a standard optimization test case. It is a multimodal non-convex function. While it has a large number of local minima, the global minimum is located at the origin and where the function value is zero. The standard search domain for optimization problems is the hypercube [-5.12, 5.12]**d in d-dimensions. Args: x: Real `Tensor` of shape [d]. The position at which to evaluate the function. Returns: value: A scalar `Tensor` of the function value at the supplied point. """ value = tf.reduce_sum(x**2 - 10.0 * tf.cos(2 * np.pi * x), axis=-1) + 10.0 * dim return value
def points_rotate(features, max_rotation, min_rotation=0.0, axis="z", keys=("image", )): """Randomly rotate points on a given axis. Args: features: Dictionary of data features to preprocess. max_rotation: The maximum possible rotation in radians. min_rotation: The minimum possible rotation in radians. axis: The rotation axis. keys: On which keys to apply this function. Returns: Features with rotated points. """ assert axis in {"x", "y", "z"}, "invalid rotation axis" for key in keys: phi = tf.random.uniform(shape=(1, ), minval=min_rotation, maxval=max_rotation) cos, sin, zero, one = (tf.cos(phi), tf.sin(phi), tf.zeros( (1, )), tf.ones((1, ))) # Matrices from # https://en.wikipedia.org/wiki/Rotation_matrix#Basic_rotations. if axis == "x": rotation_matrix = [ one, zero, zero, zero, cos, -sin, zero, sin, cos ] elif axis == "y": rotation_matrix = [ cos, zero, sin, zero, one, zero, -sin, zero, cos ] elif axis == "z": rotation_matrix = [ cos, -sin, zero, sin, cos, zero, zero, zero, one ] rotate = tf.reshape(tf.stack(rotation_matrix, axis=0), [3, 3]) features[key] = tf.matmul(features[key], rotate) return features
def _von_mises_sample_jvp(shape, primals, tangents): """Compute primals and tangents using implicit derivative.""" concentration, seed = primals dconcentration, dseed = tangents del dseed dconcentration = tf.broadcast_to(dconcentration, shape) broadcast_concentration = tf.broadcast_to(concentration, shape) samples = _von_mises_sample_no_gradient(shape, concentration, seed) _, dcdf_dconcentration = value_and_gradient( lambda conc: von_mises_cdf(samples, conc), broadcast_concentration) inv_prob = tf.exp(-concentration * (tf.cos(samples) - 1.)) * ( (2. * np.pi) * tf.math.bessel_i0e(concentration)) # Compute the implicit derivative, # dz = dconc * -(dF(z; conc) / dconc) / p(z; conc) dsamples = dconcentration * (-dcdf_dconcentration * inv_prob) return samples, dsamples
def rastrigin(x): """The value and gradient of the Rastrigin function. The Rastrigin function is a standard optimization test case. It is a multimodal non-convex function. While it has a large number of local minima, the global minimum is located at the origin and where the function value is zero. The standard search domain for optimization problems is the hypercube [-5.12, 5.12]**d in d-dimensions. Args: x: Real `Tensor` of shape [2]. The position at which to evaluate the function. Returns: value_and_gradient: A tuple of two `Tensor`s containing value: A scalar `Tensor` of the function value at the supplied point. gradient: A `Tensor` of shape [2] containing the gradient of the function along the two axes. """ return tf.reduce_sum(input_tensor=x**2 - 10.0 * tf.cos(2 * np.pi * x)) + 10.0 * dim
def easom(z): """The value of the two dimensional Easom function. The Easom function is a standard optimization test function. It has a single global minimum at (pi, pi) which is located inside a deep funnel. The expression for the function is: ```None f(x, y) = -cos(x) cos(y) exp(-(x-pi)**2 - (y-pi)**2) ``` Args: z: `Tensor` of shape [2] and real dtype. The argument at which to evaluate the function. Returns: value: Scalar real `Tensor`. The value of the Easom function at the supplied argument. """ f1 = tf.reduce_prod(tf.cos(z), axis=-1) f2 = tf.exp(-tf.reduce_sum((z - np.pi)**2, axis=-1)) return -f1 * f2
def build_smooth_seasonal_transition_matrix(period, frequency_multipliers, dtype): """Build the transition matrix for a SmoothSeasonalStateSpaceModel.""" two_pi = tf.constant(2. * np.pi, dtype=dtype) frequencies = two_pi * frequency_multipliers / period num_frequencies = static_num_frequencies(frequency_multipliers) sin_frequencies = tf.sin(frequencies) cos_frequencies = tf.cos(frequencies) trigonometric_values = tf.stack( [cos_frequencies, sin_frequencies, -sin_frequencies, cos_frequencies], axis=-1) transition_matrix = tf.linalg.LinearOperatorBlockDiag([ tf.linalg.LinearOperatorFullMatrix(matrix=tf.reshape( trigonometric_values[i], [2, 2]), is_square=True) for i in range(num_frequencies) ]) return transition_matrix
def grad(dy): prob = tf.exp(concentration * (tf.cos(x) - 1.)) / ( (2. * np.pi) * tf.math.bessel_i0e(concentration)) return dy * prob, dy * dcdf_dconcentration
def integral(a): return integrate_function(lambda x: tf.cos(a * x), 0.0, 1.0, dtype=tf.float64, **args)
def _log_unnormalized_prob(self, x, loc, concentration): z = self._z(x, loc=loc) return concentration * (tf.cos(z) - 1)