def test_losses(self, x): rates, distortions = self.all_rd(x) all_rd = rates + self.lmbda * distortions indexes = tf.argmin(all_rd, axis=-1, output_type=tf.int32) rates = tf.gather(rates, indexes) distortions = tf.gather(distortions, indexes, batch_dims=indexes.shape.rank) return rates, distortions
def _inverse(self, y): map_values = tf.convert_to_tensor(self.map_values) flat_y = tf.reshape(y, shape=[-1]) # Search for the indices of map_values that are closest to flat_y. # Since map_values is strictly increasing, the closest is either the # first one that is strictly greater than flat_y, or the one before it. upper_candidates = tf.minimum( tf.size(map_values) - 1, tf.searchsorted(map_values, values=flat_y, side='right')) lower_candidates = tf.maximum(0, upper_candidates - 1) candidates = tf.stack([lower_candidates, upper_candidates], axis=-1) lower_cand_diff = tf.abs(flat_y - self._forward(lower_candidates)) upper_cand_diff = tf.abs(flat_y - self._forward(upper_candidates)) if self.validate_args: with tf.control_dependencies([ assert_util.assert_near(tf.minimum(lower_cand_diff, upper_cand_diff), 0, message='inverse value not found') ]): candidates = tf.identity(candidates) candidate_selector = tf.stack([ tf.range(tf.size(flat_y), dtype=tf.int32), tf.argmin([lower_cand_diff, upper_cand_diff], output_type=tf.int32) ], axis=-1) return tf.reshape(tf.gather_nd(candidates, candidate_selector), shape=y.shape)
def contrastive_loss(similarity_matrix, metric_values, temperature, coupling_temperature=1.0, use_coupling_weights=True): """Contrative Loss with soft coupling.""" logging.info('Using alternative contrastive loss.') metric_shape = tf.shape(metric_values) similarity_matrix /= temperature neg_logits1 = similarity_matrix col_indices = tf.cast(tf.argmin(metric_values, axis=1), dtype=tf.int32) pos_indices1 = tf.stack( (tf.range(metric_shape[0], dtype=tf.int32), col_indices), axis=1) pos_logits1 = tf.gather_nd(similarity_matrix, pos_indices1) if use_coupling_weights: metric_values /= coupling_temperature coupling = tf.exp(-metric_values) pos_weights1 = -tf.gather_nd(metric_values, pos_indices1) pos_logits1 += pos_weights1 negative_weights = tf.math.log((1.0 - coupling) + EPS) neg_logits1 += tf.tensor_scatter_nd_update(negative_weights, pos_indices1, pos_weights1) neg_logits1 = tf.math.reduce_logsumexp(neg_logits1, axis=1) return tf.reduce_mean(neg_logits1 - pos_logits1)
def argmin(a, axis=None): a = array_creation.asarray(a) a = atleast_1d(a) if axis is None: # When axis is None numpy flattens the array. a_t = tf.reshape(a.data, [-1]) else: a_t = a.data return utils.tensor_to_ndarray(tf.argmin(input=a_t, axis=axis))
def visualize_nearest_neighbours(model, data, global_step, batch_size, num_steps, num_frames_per_step, split): """Visualize nearest neighbours in embedding space.""" # Set learning_phase to False to use models in inference mode. tf.keras.backend.set_learning_phase(0) cnn = model['cnn'] emb = model['emb'] cnn_feats = get_cnn_feats(cnn, data, training=False) emb_feats = emb(cnn_feats, num_steps) emb_feats = tf.stack(tf.split(emb_feats, num_steps, axis=0), axis=1) query_feats = emb_feats[0] frames = data['frames'] image_list = tf.unstack(frames, num=batch_size, axis=0) im_list = [image_list[0][num_frames_per_step - 1::num_frames_per_step]] sim_matrix = np.zeros((batch_size - 1, num_steps, num_steps), dtype=np.float32) for i in range(1, batch_size): candidate_feats = emb_feats[i] img_list = tf.unstack(image_list[i], num=num_steps * num_frames_per_step, axis=0)[num_frames_per_step - 1::num_frames_per_step] nn_img_list = [] for j in range(num_steps): curr_query_feats = tf.tile(query_feats[j:j + 1], [num_steps, 1]) mean_squared_distance = tf.reduce_mean(tf.math.squared_difference( curr_query_feats, candidate_feats), axis=1) sim_matrix[i - 1, j] = softmax(-1.0 * mean_squared_distance) nn_img_list.append(img_list[tf.argmin(mean_squared_distance)]) nn_img = tf.stack(nn_img_list, axis=0) im_list.append(nn_img) def vstack(im): return tf.concat(tf.unstack(im, num=num_steps), axis=1) summary_im = tf.expand_dims(tf.concat([vstack(im) for im in im_list], axis=0), axis=0) tf.summary.image('%s/nn' % split, summary_im, step=global_step) # Convert sim_matrix to float32 as summary_image doesn't take float64 sim_matrix = sim_matrix.astype(np.float32) tf.summary.image('%s/similarity_matrix' % split, np.expand_dims(sim_matrix, axis=3), step=global_step)
def _estimate_initial_position(log_moneyness, total_variance): """Provides a heuristic initial guess for the SVI parameters. The values for `rho` and `sigma` are predetermined. `rho = 0` enforces the symmetry of the initial skew. `sigma = 0.5` enforces certain smoothness at the vertex of the skew. The value for `m` estimated as the position of the vertex of the skew: `m` is the log-moneyness corresponding to the smallest input variance. The values for `a`, `b` are computed using a simple linear regression, using the input data and the above estimates for `rho`, `sigma` and `m`. Args: log_moneyness: A rank 2 real `Tensor` of shape [batch_size, num_strikes]. The log-moneyness `k := log(K/F)` of the options. total_variance: A rank 2 real `Tensor` of shape [batch_size, num_strikes]. The target total variance to be approximated by the SVI model. Returns: A rank 2 real `Tensor` of shape [batch_size, 5], representing an initial guess for the SVI parameter optimization. """ dtype = total_variance.dtype # Estimate `m` as the log_moneyess with the smallest target variance minvol_index = tf.argmin(total_variance, axis=1) m = tf.gather(log_moneyness, minvol_index, axis=1, batch_dims=1) # The initial guess will be a reasonably smooth symmetric smile sigma = 0.5 * tf.ones_like(minvol_index, dtype=dtype) rho = tf.zeros_like(minvol_index, dtype=dtype) # At this point, the SVI equation is reduced to `y = a + b * x`, where y = total_variance x = tf.sqrt((log_moneyness - m[:, None])**2 + sigma[:, None]**2) # Solve the simple regression for `a` and `b`, using the standard formulas, # cf. https://en.wikipedia.org/wiki/Simple_linear_regression e_x = tf.math.reduce_mean(x, axis=1) e_y = tf.math.reduce_mean(y, axis=1) e_xy = tf.math.reduce_mean(x * y, axis=1) var_x = tf.math.reduce_variance(x, axis=1) b = (e_xy - e_x * e_y) / var_x a = e_y - b * e_x initial_position = tf.transpose([a, b, rho, m, sigma]) return initial_position
def argmin(a, axis=None): """Returns the indices of the minimum values along an array axis. Args: a: array_like. Could be an ndarray, a Tensor or any object that can be converted to a Tensor using `tf.convert_to_tensor`. axis: Optional. The axis along which to compute argmin. If None, index of the min element in the flattened array is returned. Returns: An ndarray with the same shape as `a` with `axis` removed if not None. If `axis` is None, a scalar array is returned. """ a = array_creation.asarray(a) if axis is None or utils.isscalar(a): # When axis is None or the array is a scalar, numpy flattens the array. a_t = tf.reshape(a.data, [-1]) else: a_t = a.data return utils.tensor_to_ndarray(tf.argmin(input=a_t, axis=axis))
def call(self, x, training=False): x_flat = tf.reshape(x, shape=(-1, self.depth)) # Split each input vector into one segment per head. x_flat_split = tf.split(x_flat, self.num_heads, axis=1) x_flat = tf.concat(x_flat_split, axis=0) if training: # Figure out which centroids we want to keep, and which we want to # restart. n = x_flat.shape[0] keep = self.counts * self.k > self.restart_threshold * n restart = tf.math.logical_not(keep) # Replace centroids to restart with elements from the batch, using samples # from a uniform distribution as a fallback in case we need to restart # more centroids than we have elements in the batch. restart_idx = tf.squeeze(tf.where(restart), -1) n_replace = tf.minimum(tf.shape(restart_idx)[0], x_flat.shape[0]) e_restart = tf.tensor_scatter_nd_update( tf.random.uniform([self.k, self.depth // self.num_heads]), tf.expand_dims(restart_idx[:n_replace], 1), tf.random.shuffle(x_flat)[:n_replace]) # Compute the values of the centroids we want to keep by dividing the # summed vectors by the corresponding counts. e = tf.where( tf.expand_dims(keep, 1), tf.math.divide_no_nan(self.sums, tf.expand_dims(self.counts, 1)), e_restart) else: # If not training, just use the centroids as is with no restarts. e = tf.math.divide_no_nan(self.sums, tf.expand_dims(self.counts, 1)) # Compute distance between each input vector and each cluster center. distances = (tf.expand_dims(tf.reduce_sum(x_flat**2, axis=1), 1) - 2 * tf.matmul(x_flat, tf.transpose(e)) + tf.expand_dims(tf.reduce_sum(e**2, axis=1), 0)) # Find nearest cluster center for each input vector. c = tf.argmin(distances, axis=1) # Quantize input vectors with straight-through estimator. z = tf.nn.embedding_lookup(e, c) z_split = tf.split(z, self.num_heads, axis=0) z = tf.concat(z_split, axis=1) z = tf.reshape(z, tf.shape(x)) z = x + tf.stop_gradient(z - x) if training: # Compute cluster counts and vector sums over the batch. oh = tf.one_hot(indices=c, depth=self.k) counts = tf.reduce_sum(oh, axis=0) sums = tf.matmul(oh, x_flat, transpose_a=True) # Apply exponential moving average to cluster counts and vector sums. self.counts.assign_sub((1 - self.gamma) * (self.counts - counts)) self.sums.assign_sub((1 - self.gamma) * (self.sums - sums)) c_split = tf.split(c, self.num_heads, axis=0) c = tf.stack(c_split, axis=1) c = tf.reshape(c, tf.concat([tf.shape(x)[:-1], [self.num_heads]], axis=0)) return z, c
def quantize(self, x): rates, distortions = self.all_rd(x) all_rd = rates + self.lmbda * distortions indexes = tf.argmin(all_rd, axis=-1, output_type=tf.int32) return self.codebook, rates, indexes
def calibrate(*, forwards, expiries, strikes, volatilities, initial_position=None, optimizer_fn=None, tolerance=1e-6, maximum_iterations=100, dtype=None, name=None): """Calibrates the SVI model parameters for a batch of volatility skews. This function optimizes the SVI model parameters to fit the given volatilities at various strikes. The loss function is the L2 norm of the differences in the volatility space. Each volatility skew in the batch corresponds to a fixed expiry for options on some underlying assets. Optimization is done independently for each skew. TODO(b/189458981): add flexibility to accept higher rank tensors as inputs. #### Example The example shows how to calibrate a single skew, loosely based on market prices for GOOG210820C* (GOOG calls with 2021-08-20 expiry) as of 2021-05-27. https://finance.yahoo.com/quote/GOOG/options?p=GOOG&date=1629417600 ````python import numpy as np import tensorflow.compat.v2 as tf import tf_quant_finance as tff forwards = np.array([2402.]) expiries = np.array([0.23]) strikes = np.array([[ 1700., 1800., 1900., 2000., 2050., 2100., 2200., 2250., 2350., 2400., 2450., 2500., 2550., 2600., 2650., 2700., 2750., 2800., 2850., 2900., 2950., 3000. ]]) volatilities = np.array([[ 0.5335, 0.4882, 0.4389, 0.3937, 0.3749, 0.3569, 0.3259, 0.3135, 0.29, 0.283, 0.2717, 0.2667, 0.2592, 0.2566, 0.2564, 0.2574, 0.2595, 0.2621, 0.2669, 0.2732, 0.2826, 0.2967 ]]) tolerance=1e-4 (svi_params, converged, _) = tff.experimental.svi.calibrate( forwards=forwards, expiries=expiries, strikes=strikes, volatilities=volatilities) # Expected results are tensors containing (up to numerical tolerance): # svi_params: [[-0.2978, 0.4212, 0.0415, 0.1282, 0.7436]] # converged: [True] ```` Args: forwards: A rank 1 real `Tensor` of shape [batch_size]. The forward prices of the underlyig asset for each skew in the batch. expiries: A rank 1 real `Tensor` of shape [batch_size]. The option expiries for each skew in the batch. strikes: A rank 2 real `Tensor` of shape [batch_size, num_strikes]. The strike prices of the options. volatilities: A rank 2 real `Tensor` of shape [batch_size, num_strikes]. The market implied Black-Scholes volatilities to calibrate. initial_position: A rank 2 real `Tensor` of shape [batch_size, 5]. The SVI parameters to use as the initial values for the optimization. The default value is None, in which case the initial values are guessed heuristically and may lead to slower convergence. optimizer_fn: Optional Python callable which implements the algorithm used to minimize the objective function during calibration. It should have the following interface: result = optimizer_fn(value_and_gradients_function, initial_position, tolerance, max_iterations) `value_and_gradients_function` is a Python callable that accepts a point as a real `Tensor` and returns a tuple of `Tensor`s of real dtype containing the value of the function and its gradient at that point. 'initial_position' is a real `Tensor` containing the starting point of the optimization, 'tolerance' is a real scalar `Tensor` for stopping tolerance for the procedure and `max_iterations` specifies the maximum number of iterations. `optimizer_fn` should return a namedtuple containing the items: `position` (a tensor containing the optimal value), `converged` (a boolean indicating whether the optimize converged according the specified criteria), `failed` (a boolean indicating if the optimization resulted in a failure), `num_iterations` (the number of iterations used), and `objective_value` ( the value of the objective function at the optimal value). The default value for `optimizer_fn` is None and conjugate gradient algorithm is used. tolerance: Scalar `Tensor` of real dtype. The absolute tolerance for terminating the iterations. Default value: 1e-6. maximum_iterations: Scalar positive int32 `Tensor`. The maximum number of iterations during the optimization. Default value: 200. dtype: The default dtype to use when converting values to `Tensor`s. Default value: `None`, uses the default dtypes inferred by TensorFlow. name: Python string. The name to give to the ops created by this function. Default value: `None`, maps to the default name `svi_skew_calibration`. Returns: A Tuple of three elements: (parameters, status, iterations) - parameters: a tensor of shape [batch_size, 5] representing raw parameters for the SVI model calibrated with given input Black-Scholes volatilities. - status: boolean, whether the optimization algorithm succeeded in finding the optimal point based on the specified convergance criteria. - iterations: the number of iterations performed during the optimization. """ name = name or 'svi_skew_calibration' with tf.name_scope(name): volatilities = tf.convert_to_tensor(volatilities, dtype=dtype, name='volatilities') dtype = dtype or volatilities.dtype forwards = tf.convert_to_tensor(forwards, dtype=dtype, name='forwards') expiries = tf.convert_to_tensor(expiries, dtype=dtype, name='expiries') strikes = tf.convert_to_tensor(strikes, dtype=dtype, name='strikes') # the standard notation for log moneyness in the literature is k:=log(K/F) log_moneyness = tf.math.log(strikes / forwards[:, None]) if initial_position is None: minvol_index = tf.argmin(volatilities, axis=1) a0 = tf.gather(volatilities, minvol_index, axis=1, batch_dims=1)**2 b0 = tf.zeros_like(forwards, dtype=dtype) rho0 = tf.zeros_like(forwards, dtype=dtype) sigma0 = 0.5 * tf.ones_like(forwards, dtype=dtype) m0 = tf.gather(log_moneyness, minvol_index, axis=1, batch_dims=1) initial_position = tf.transpose([a0, b0, rho0, m0, sigma0]) if optimizer_fn is None: optimizer_fn = optimizer.conjugate_gradient_minimize @make_val_and_grad_fn def loss_function(parameters): """Loss function for the optimization.""" total_variance = parameterizations.total_variance_from_raw( parameters, log_moneyness) model_vol = tf.where(total_variance < 0., tf.zeros_like(total_variance), tf.sqrt(total_variance / expiries[:, None])) squared_difference = tf.where( total_variance < 0., volatilities**2 - total_variance, tf.math.squared_difference(model_vol, volatilities)) loss = tf.math.reduce_sum(squared_difference, axis=1) return loss optimization_result = optimizer_fn(loss_function, initial_position=initial_position, tolerance=tolerance, max_iterations=maximum_iterations) # The optimizer may converge negative SVI sigma; to enforce the positivity # convention, we take sigma by absolute value, which yields the same model. calibrated_parameters = tf.concat([ optimization_result.position[:, :-1], tf.math.abs(optimization_result.position[:, -1, None]) ], axis=1) return (calibrated_parameters, optimization_result.converged, optimization_result.num_iterations)