def make_response_likelihood(self, w, x): if tensorshape_util.rank(w.shape) == 1: y_bar = tf.matmul(w[tf.newaxis], x)[0] else: y_bar = tf.matmul(w, x) return tfd.Normal(loc=y_bar, scale=tf.ones_like(y_bar)) # [n]
def _stddev(self): return self.scale * tf.ones_like(self.loc) * np.pi / np.sqrt(2.)
def reduce_weighted_logsumexp(logx, w=None, axis=None, keep_dims=False, return_sign=False, name=None): """Computes `log(abs(sum(weight * exp(elements across tensor dimensions))))`. If all weights `w` are known to be positive, it is more efficient to directly use `reduce_logsumexp`, i.e., `tf.reduce_logsumexp(logx + tf.log(w))` is more efficient than `du.reduce_weighted_logsumexp(logx, w)`. Reduces `input_tensor` along the dimensions given in `axis`. Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in `axis`. If `keep_dims` is true, the reduced dimensions are retained with length 1. If `axis` has no entries, all dimensions are reduced, and a tensor with a single element is returned. This function is more numerically stable than log(sum(w * exp(input))). It avoids overflows caused by taking the exp of large inputs and underflows caused by taking the log of small inputs. For example: ```python x = tf.constant([[0., 0, 0], [0, 0, 0]]) w = tf.constant([[-1., 1, 1], [1, 1, 1]]) du.reduce_weighted_logsumexp(x, w) # ==> log(-1*1 + 1*1 + 1*1 + 1*1 + 1*1 + 1*1) = log(4) du.reduce_weighted_logsumexp(x, w, axis=0) # ==> [log(-1+1), log(1+1), log(1+1)] du.reduce_weighted_logsumexp(x, w, axis=1) # ==> [log(-1+1+1), log(1+1+1)] du.reduce_weighted_logsumexp(x, w, axis=1, keep_dims=True) # ==> [[log(-1+1+1)], [log(1+1+1)]] du.reduce_weighted_logsumexp(x, w, axis=[0, 1]) # ==> log(-1+5) ``` Args: logx: The tensor to reduce. Should have numeric type. w: The weight tensor. Should have numeric type identical to `logx`. axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. keep_dims: If true, retains reduced dimensions with length 1. return_sign: If `True`, returns the sign of the result. name: A name for the operation (optional). Returns: lswe: The `log(abs(sum(weight * exp(x))))` reduced tensor. sign: (Optional) The sign of `sum(weight * exp(x))`. """ with tf.name_scope(name or 'reduce_weighted_logsumexp'): logx = tf.convert_to_tensor(logx, name='logx') if w is None: lswe = tf.reduce_logsumexp(logx, axis=axis, keepdims=keep_dims) if return_sign: sgn = tf.ones_like(lswe) return lswe, sgn return lswe w = tf.convert_to_tensor(w, dtype=logx.dtype, name='w') log_absw_x = logx + tf.math.log(tf.abs(w)) max_log_absw_x = tf.reduce_max(log_absw_x, axis=axis, keepdims=True) # If the largest element is `-inf` or `inf` then we don't bother subtracting # off the max. We do this because otherwise we'd get `inf - inf = NaN`. That # this is ok follows from the fact that we're actually free to subtract any # value we like, so long as we add it back after taking the `log(sum(...))`. max_log_absw_x = tf.where(tf.math.is_inf(max_log_absw_x), tf.zeros([], max_log_absw_x.dtype), max_log_absw_x) wx_over_max_absw_x = (tf.sign(w) * tf.exp(log_absw_x - max_log_absw_x)) sum_wx_over_max_absw_x = tf.reduce_sum(wx_over_max_absw_x, axis=axis, keepdims=keep_dims) if not keep_dims: max_log_absw_x = tf.squeeze(max_log_absw_x, axis) sgn = tf.sign(sum_wx_over_max_absw_x) lswe = max_log_absw_x + tf.math.log(sgn * sum_wx_over_max_absw_x) if return_sign: return lswe, sgn return lswe
def cdf(x): ones = tf.ones_like(x) answer = tf1.where(x < 3, 0.6 * ones, ones) answer = tf1.where(x < 2, 0.3 * ones, answer) answer = tf1.where(x < 1, 0.1 * ones, answer) return tf1.where(x < 0, 0 * ones, answer)
def sample_lkj(num_samples, dimension, concentration, cholesky_space=False, seed=None, name=None): """Returns a Tensor of samples from an LKJ distribution. Args: num_samples: Python `int`. The number of samples to draw. dimension: Python `int`. The dimension of correlation matrices. concentration: `Tensor` representing the concentration of the LKJ distribution. cholesky_space: Python `bool`. Whether to take samples from LKJ or Chol(LKJ). seed: Python integer seed for RNG name: Python `str` name prefixed to Ops created by this function. Returns: samples: A Tensor of correlation matrices (or Cholesky factors of correlation matrices if `cholesky_space = True`) with shape `[n] + B + [D, D]`, where `B` is the shape of the `concentration` parameter, and `D` is the `dimension`. Raises: ValueError: If `dimension` is negative. """ if dimension < 0: raise ValueError( 'Cannot sample negative-dimension correlation matrices.') # Notation below: B is the batch shape, i.e., tf.shape(concentration) seed = SeedStream(seed, 'sample_lkj') with tf.name_scope('sample_lkj' or name): concentration = tf.convert_to_tensor(concentration) if not dtype_util.is_floating(concentration.dtype): raise TypeError( 'The concentration argument should have floating type, not ' '{}'.format(dtype_util.name(concentration.dtype))) concentration = _replicate(num_samples, concentration) concentration_shape = tf.shape(concentration) if dimension <= 1: # For any dimension <= 1, there is only one possible correlation matrix. shape = tf.concat([concentration_shape, [dimension, dimension]], axis=0) return tf.ones(shape=shape, dtype=concentration.dtype) beta_conc = concentration + (dimension - 2.) / 2. beta_dist = beta.Beta(concentration1=beta_conc, concentration0=beta_conc) # Note that the sampler below deviates from [1], by doing the sampling in # cholesky space. This does not change the fundamental logic of the # sampler, but does speed up the sampling. # This is the correlation coefficient between the first two dimensions. # This is also `r` in reference [1]. corr12 = 2. * beta_dist.sample(seed=seed()) - 1. # Below we construct the Cholesky of the initial 2x2 correlation matrix, # which is of the form: # [[1, 0], [r, sqrt(1 - r**2)]], where r is the correlation between the # first two dimensions. # This is the top-left corner of the cholesky of the final sample. first_row = tf.concat([ tf.ones_like(corr12)[..., tf.newaxis], tf.zeros_like(corr12)[..., tf.newaxis] ], axis=-1) second_row = tf.concat( [corr12[..., tf.newaxis], tf.sqrt(1 - corr12**2)[..., tf.newaxis]], axis=-1) chol_result = tf.concat( [first_row[..., tf.newaxis, :], second_row[..., tf.newaxis, :]], axis=-2) for n in range(2, dimension): # Loop invariant: on entry, result has shape B + [n, n] beta_conc = beta_conc - 0.5 # norm is y in reference [1]. norm = beta.Beta(concentration1=n / 2., concentration0=beta_conc).sample(seed=seed()) # distance shape: B + [1] for broadcast distance = tf.sqrt(norm)[..., tf.newaxis] # direction is u in reference [1]. # direction shape: B + [n] direction = _uniform_unit_norm(n, concentration_shape, concentration.dtype, seed) # raw_correlation is w in reference [1]. raw_correlation = distance * direction # shape: B + [n] # This is the next row in the cholesky of the result, # which differs from the construction in reference [1]. # In the reference, the new row `z` = chol_result @ raw_correlation^T # = C @ raw_correlation^T (where as short hand we use C = chol_result). # We prove that the below equation is the right row to add to the # cholesky, by showing equality with reference [1]. # Let S be the sample constructed so far, and let `z` be as in # reference [1]. Then at this iteration, the new sample S' will be # [[S z^T] # [z 1]] # In our case we have the cholesky decomposition factor C, so # we want our new row x (same size as z) to satisfy: # [[S z^T] [[C 0] [[C^T x^T] [[CC^T Cx^T] # [z 1]] = [x k]] [0 k]] = [xC^t xx^T + k**2]] # Since C @ raw_correlation^T = z = C @ x^T, and C is invertible, # we have that x = raw_correlation. Also 1 = xx^T + k**2, so k # = sqrt(1 - xx^T) = sqrt(1 - |raw_correlation|**2) = sqrt(1 - # distance**2). new_row = tf.concat( [raw_correlation, tf.sqrt(1. - norm[..., tf.newaxis])], axis=-1) # Finally add this new row, by growing the cholesky of the result. chol_result = tf.concat([ chol_result, tf.zeros_like(chol_result[..., 0][..., tf.newaxis]) ], axis=-1) chol_result = tf.concat([chol_result, new_row[..., tf.newaxis, :]], axis=-2) if cholesky_space: return chol_result result = tf.matmul(chol_result, chol_result, transpose_b=True) # The diagonal for a correlation matrix should always be ones. Due to # numerical instability the matmul might not achieve that, so manually set # these to ones. result = tf.linalg.set_diag( result, tf.ones(shape=tf.shape(result)[:-1], dtype=result.dtype)) # This sampling algorithm can produce near-PSD matrices on which standard # algorithms such as `tf.cholesky` or `tf.linalg.self_adjoint_eigvals` # fail. Specifically, as documented in b/116828694, around 2% of trials # of 900,000 5x5 matrices (distributed according to 9 different # concentration parameter values) contained at least one matrix on which # the Cholesky decomposition failed. return result
def _entropy(self): log_normalization = 0.5 * np.log(2. * np.pi) + tf.math.log(self.scale) entropy = 0.5 + log_normalization return entropy * tf.ones_like(self.loc)
def _generate_detections_per_image(boxes, scores, max_total_size=100, nms_iou_threshold=0.3, score_threshold=0.05, pre_nms_num_boxes=5000): """Generate the final detections per image given the model outputs. Args: boxes: a tensor with shape [N, num_classes, 4] or [N, 1, 4], which box predictions on all feature levels. The N is the number of total anchors on all levels. scores: a tensor with shape [N, num_classes], which stacks class probability on all feature levels. The N is the number of total anchors on all levels. The num_classes is the number of classes predicted by the model. Note that the class_outputs here is the raw score. max_total_size: a scalar representing maximum number of boxes retained over all classes. nms_iou_threshold: a float representing the threshold for deciding whether boxes overlap too much with respect to IOU. score_threshold: a float representing the threshold for deciding when to remove boxes based on score. pre_nms_num_boxes: an int number of top candidate detections per class before NMS. Returns: nms_boxes: `float` Tensor of shape [max_total_size, 4] representing top detected boxes in [y1, x1, y2, x2]. nms_scores: `float` Tensor of shape [max_total_size] representing sorted confidence scores for detected boxes. The values are between [0, 1]. nms_classes: `int` Tensor of shape [max_total_size] representing classes for detected boxes. valid_detections: `int` Tensor of shape [1] only the top `valid_detections` boxes are valid detections. """ nmsed_boxes = [] nmsed_scores = [] nmsed_classes = [] num_classes_for_box = boxes.get_shape().as_list()[1] num_classes = scores.get_shape().as_list()[1] for i in range(num_classes): boxes_i = boxes[:, min(num_classes_for_box-1, i)] scores_i = scores[:, i] # Obtains pre_nms_num_boxes before running NMS. scores_i, indices = tf.nn.top_k( scores_i, k=tf.minimum(tf.shape(input=scores_i)[-1], pre_nms_num_boxes)) boxes_i = tf.gather(boxes_i, indices) (nmsed_indices_i, nmsed_num_valid_i) = tf.image.non_max_suppression_padded( tf.cast(boxes_i, tf.float32), tf.cast(scores_i, tf.float32), max_total_size, iou_threshold=nms_iou_threshold, score_threshold=score_threshold, pad_to_max_output_size=True, name='nms_detections_' + str(i)) nmsed_boxes_i = tf.gather(boxes_i, nmsed_indices_i) nmsed_scores_i = tf.gather(scores_i, nmsed_indices_i) # Sets scores of invalid boxes to -1. nmsed_scores_i = tf.where( tf.less(tf.range(max_total_size), [nmsed_num_valid_i]), nmsed_scores_i, -tf.ones_like(nmsed_scores_i)) nmsed_classes_i = tf.fill([max_total_size], i) nmsed_boxes.append(nmsed_boxes_i) nmsed_scores.append(nmsed_scores_i) nmsed_classes.append(nmsed_classes_i) # Concats results from all classes and sort them. nmsed_boxes = tf.concat(nmsed_boxes, axis=0) nmsed_scores = tf.concat(nmsed_scores, axis=0) nmsed_classes = tf.concat(nmsed_classes, axis=0) nmsed_scores, indices = tf.nn.top_k( nmsed_scores, k=max_total_size, sorted=True) nmsed_boxes = tf.gather(nmsed_boxes, indices) nmsed_classes = tf.gather(nmsed_classes, indices) valid_detections = tf.reduce_sum( input_tensor=tf.cast(tf.greater(nmsed_scores, -1), tf.int32)) return nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections
def __init__(self, level_scale_prior=None, slope_mean_prior=None, slope_scale_prior=None, autoregressive_coef_prior=None, initial_level_prior=None, initial_slope_prior=None, observed_time_series=None, constrain_ar_coef_stationary=True, constrain_ar_coef_positive=False, name=None): """Specify a semi-local linear trend model. Args: level_scale_prior: optional `tfd.Distribution` instance specifying a prior on the `level_scale` parameter. If `None`, a heuristic default prior is constructed based on the provided `observed_time_series`. Default value: `None`. slope_mean_prior: optional `tfd.Distribution` instance specifying a prior on the `slope_mean` parameter. If `None`, a heuristic default prior is constructed based on the provided `observed_time_series`. Default value: `None`. slope_scale_prior: optional `tfd.Distribution` instance specifying a prior on the `slope_scale` parameter. If `None`, a heuristic default prior is constructed based on the provided `observed_time_series`. Default value: `None`. autoregressive_coef_prior: optional `tfd.Distribution` instance specifying a prior on the `autoregressive_coef` parameter. If `None`, the default prior is a standard `Normal(0., 1.)`. Note that the prior may be implicitly truncated by `constrain_ar_coef_stationary` and/or `constrain_ar_coef_positive`. Default value: `None`. initial_level_prior: optional `tfd.Distribution` instance specifying a prior on the initial level. If `None`, a heuristic default prior is constructed based on the provided `observed_time_series`. Default value: `None`. initial_slope_prior: optional `tfd.Distribution` instance specifying a prior on the initial slope. If `None`, a heuristic default prior is constructed based on the provided `observed_time_series`. Default value: `None`. observed_time_series: optional `float` `Tensor` of shape `batch_shape + [T, 1]` (omitting the trailing unit dimension is also supported when `T > 1`), specifying an observed time series. Any priors not explicitly set will be given default values according to the scale of the observed time series (or batch of time series). May optionally be an instance of `tfp.sts.MaskedTimeSeries`, which includes a mask `Tensor` to specify timesteps with missing observations. Default value: `None`. constrain_ar_coef_stationary: if `True`, perform inference using a parameterization that restricts `autoregressive_coef` to the interval `(-1, 1)`, or `(0, 1)` if `force_positive_ar_coef` is also `True`, corresponding to stationary processes. This will implicitly truncates the support of `autoregressive_coef_prior`. Default value: `True`. constrain_ar_coef_positive: if `True`, perform inference using a parameterization that restricts `autoregressive_coef` to be positive, or in `(0, 1)` if `constrain_ar_coef_stationary` is also `True`. This will implicitly truncate the support of `autoregressive_coef_prior`. Default value: `False`. name: the name of this model component. Default value: 'SemiLocalLinearTrend'. """ with tf.name_scope(name or 'SemiLocalLinearTrend') as name: if observed_time_series is not None: _, observed_stddev, observed_initial = sts_util.empirical_statistics( observed_time_series) else: observed_stddev, observed_initial = 1., 0. # Heuristic default priors. Overriding these may dramatically # change inference performance and results. if level_scale_prior is None: level_scale_prior = tfd.LogNormal(loc=tf.math.log( .01 * observed_stddev), scale=2.) if slope_mean_prior is None: slope_mean_prior = tfd.Normal(loc=0., scale=observed_stddev) if slope_scale_prior is None: slope_scale_prior = tfd.LogNormal(loc=tf.math.log( .01 * observed_stddev), scale=2.) if autoregressive_coef_prior is None: autoregressive_coef_prior = tfd.Normal( loc=0., scale=tf.ones_like(observed_initial)) if initial_level_prior is None: initial_level_prior = tfd.Normal( loc=observed_initial, scale=tf.abs(observed_initial) + observed_stddev) if initial_slope_prior is None: initial_slope_prior = tfd.Normal(loc=0., scale=observed_stddev) self._initial_state_prior = tfd.MultivariateNormalDiag( loc=tf.stack( [initial_level_prior.mean(), initial_slope_prior.mean()], axis=-1), scale_diag=tf.stack([ initial_level_prior.stddev(), initial_slope_prior.stddev() ], axis=-1)) # Constrain the support of the autoregressive coefficient. if constrain_ar_coef_stationary and constrain_ar_coef_positive: autoregressive_coef_bijector = tfb.Sigmoid( ) # support in (0, 1) elif constrain_ar_coef_positive: autoregressive_coef_bijector = tfb.Softplus( ) # support in (0, infty) elif constrain_ar_coef_stationary: autoregressive_coef_bijector = tfb.Tanh() # support in (-1, 1) else: autoregressive_coef_bijector = tfb.Identity() # unconstrained stddev_preconditioner = tfb.Scale(scale=observed_stddev) scaled_softplus = tfb.Chain( [stddev_preconditioner, tfb.Softplus()]) super(SemiLocalLinearTrend, self).__init__(parameters=[ Parameter('level_scale', level_scale_prior, scaled_softplus), Parameter('slope_mean', slope_mean_prior, stddev_preconditioner), Parameter('slope_scale', slope_scale_prior, scaled_softplus), Parameter('autoregressive_coef', autoregressive_coef_prior, autoregressive_coef_bijector), ], latent_size=2, name=name)
def make_response_likelihood(self, w, x): if w.shape.ndims == 1: y_bar = tf.matmul(w[tf.newaxis], x)[0] else: y_bar = tf.matmul(w, x) return tfd.Normal(loc=y_bar, scale=tf.ones_like(y_bar)) # [n]
def _calculate_spline_coeffs(x_data, y_data): """Calculates the coefficients for the spline interpolation. These are the values of the second derivative of the spline at `x_data`. See p.548 of [1]. Below is an outline of the function when number of observations if equal to 7. The coefficients are obtained by building and solving a tridiagonal linear system of equations with symmetric matrix w2, dx2, 0, 0, 0 dx2, w3, dx3, 0, 0 0, dx3, w4, dx4, 0 0, 0, dx4, w5, dx5 0, 0, 0, dx5, w6 where: wn = 2 * (x_data[n-2] + x_data[n-1]) dxn = x_data[n-1] - x_data[n-2] and the right hand side of the equation is: [[3*( (d2-d1)/X1 - (d1-d0)/x0], [3*( (d3-d2)/X2 - (d2-d1)/x1], ... ] with di = y_data[..., i] Solve for `spline_coeffs`, so that matrix * spline_coeffs = rhs the solution is the `spline_coeffs` parameter of the spline equation: y_pred = a(spline_coeffs) * t^3 + b(spline_coeffs) * t^2 + c(spline_coeffs) * t + d(spline_coeffs) with t being the proportion of the difference between the x value of the spline used and the nx_value of the next spline: t = (x_values - x_data[:,n]) / (x_data[:,n+1]-x_data[:,n]) and `a`, `b`, `c`, and `d` are functions of `spline_coeffs` and `x_data` and are provided in the `interpolate` function. ## References: [1]: R. Sedgewick, Algorithms in C, 1990, p. 545-550. Link: http://index-of.co.uk/Algorithms/Algorithms%20in%20C.pdf Args: x_data: A real `Tensor` of shape `[..., num_points]` containing X-coordinates of points to fit the splines to. The values have to be monotonically non-decreasing along the last dimension. y_data: A `Tensor` of the same shape and `dtype` as `x_data` containing Y-coordinates of points to fit the splines to. Returns: A `Tensor` of the same shape and `dtype` as `x_data`. Represents the spline coefficients for the cubic spline interpolation. """ # `dx` is the distances between the x points. It is 1 element shorter than # `x_data` dx = x_data[..., 1:] - x_data[..., :-1] # `diag_values` are the diagonal values 2 * (x_data[i+1] - x_data[i-1]) # its length 2 shorter diag_values = 2.0 * (x_data[..., 2:] - x_data[..., :-2]) superdiag = dx[..., 1:] subdiag = dx[..., :-1] corr_term = tf.logical_or(tf.equal(superdiag, 0), tf.equal(subdiag, 0)) diag_values_corr = tf.where(corr_term, tf.ones_like(diag_values), diag_values) superdiag_corr = tf.where(tf.equal(subdiag, 0), tf.zeros_like(superdiag), superdiag) subdiag_corr = tf.where(tf.equal(superdiag, 0), tf.zeros_like(subdiag), subdiag) diagonals = tf.stack([superdiag_corr, diag_values_corr, subdiag_corr], axis=-2) # determine the rhs of the equation dd = (y_data[..., 1:] - y_data[..., :-1]) / dx dd = tf.where(tf.equal(dx, 0), tf.zeros_like(dd), dd) # rhs is a column vector: # [[-3((y1-y0)/dx0 - (y2-y1)/dx0], ...] rhs = -3 * (dd[..., :-1] - dd[..., 1:]) rhs = tf.where(corr_term, tf.zeros_like(rhs), rhs) # Partial pivoting is unnecessary since the matrix is diagonally dominant. spline_coeffs = tf.linalg.tridiagonal_solve(diagonals, rhs, partial_pivoting=False) # Reshape `spline_coeffs` zero = tf.zeros_like(dx[..., :1], dtype=x_data.dtype) spline_coeffs = tf.concat([zero, spline_coeffs, zero], axis=-1) return spline_coeffs
def _replicate(n, tensor): """Replicate the input tensor n times along a new (major) dimension.""" # TODO(axch) Does this already exist somewhere? Should it get contributed? multiples = tf.concat([[n], tf.ones_like(tensor.shape)], axis=0) return tf.tile(tf.expand_dims(tensor, axis=0), multiples)
def collater_fn(batch: Dict[str, tf.Tensor]) -> Dict[str, tf.Tensor]: batch = mm_collater_fn(batch) retrieve_masked = config.get('retrieve_masked', False) # Subselect mentions for which to retrieve corresponding memory. # We want to sample mentions which are linked, not masked, and not padded. scores = tf.random.uniform( tf.shape(batch['mention_target_is_masked'])) + 2 * tf.cast( batch['mention_target_weights'], tf.float32) if not retrieve_masked: scores -= tf.cast(batch['mention_target_is_masked'], tf.float32) _, mention_target_retrieval_indices = tf.math.top_k( scores, k=max_retrieval_indices) mention_retrieval_indices = tf.gather( batch['mention_target_indices'], mention_target_retrieval_indices) retrieval_mention_mask = tf.gather( batch['mention_target_weights'], mention_target_retrieval_indices) # set weight to 0 for masked retrievals if we do not want to include these if not retrieve_masked: retrieval_mention_mask *= tf.gather( 1 - tf.cast(batch['mention_target_is_masked'], tf.int32), mention_target_retrieval_indices) retrieval_mention_start_positions = tf.gather( batch['mention_start_positions'], mention_retrieval_indices) retrieval_text_identifiers = tf.gather(batch['text_identifiers'], mention_retrieval_indices) retrieval_mention_hash = mention_preprocess_utils.modified_cantor_pairing( tf.cast(retrieval_mention_start_positions, tf.int64), retrieval_text_identifiers) retrieval_mention_hash = tf.cast(retrieval_mention_hash, tf.int32) retrieval_mention_sort_ids = tf.searchsorted( memory_hash_sorted, retrieval_mention_hash) # Searchsorted does not check whether value is present in array, just # finds insertion point. Here we check and set to default retrieval if not # present. hash_not_present_mask = tf.not_equal( retrieval_mention_hash, tf.gather(memory_hash_sorted, retrieval_mention_sort_ids)) hash_not_present = tf.where(hash_not_present_mask) update_values = tf.fill((tf.shape(hash_not_present)[0], ), tf.shape(hash_sorted_idx)[0] - 1) retrieval_mention_sort_ids = tf.tensor_scatter_nd_update( retrieval_mention_sort_ids, hash_not_present, update_values) # Set mask to 0 if no mention is found batch['retrieval_mention_mask'] = retrieval_mention_mask * ( 1 - tf.cast(hash_not_present_mask, tf.int32)) retrieval_mention_ids = tf.gather(hash_sorted_idx, retrieval_mention_sort_ids) retrieval_mention_values = tf.gather(memory_table, retrieval_mention_ids) # Match passage entity_ids with memory entity ids as sanity check. if memory_entity_pattern: retrieval_memory_entity_ids = tf.gather( memory_entity_ids, retrieval_mention_ids) retrieval_passage_entity_ids = tf.gather( tf.cast(batch['mention_target_ids'], tf.int32), mention_target_retrieval_indices) entity_does_not_match = tf.not_equal( retrieval_memory_entity_ids, retrieval_passage_entity_ids) batch['entity_does_not_match'] = tf.logical_and( entity_does_not_match, tf.cast(batch['retrieval_mention_mask'], tf.bool)) batch['retrieval_mention_values'] = retrieval_mention_values batch['retrieval_mention_scores'] = tf.ones_like( batch['retrieval_mention_mask']) batch['retrieval_mention_batch_positions'] = tf.gather( batch['mention_batch_positions'], mention_retrieval_indices) batch['retrieval_mention_start_positions'] = retrieval_mention_start_positions # pylint: disable=line-too-long batch['retrieval_mention_end_positions'] = tf.gather( batch['mention_end_positions'], mention_retrieval_indices) batch['mention_retrieval_indices'] = mention_retrieval_indices return batch
def l1norm(x): x = tf.concat([x, tf.ones_like(x[..., :1]) * 1e-6], axis=-1) x = x / tf.linalg.norm(x, ord=1, axis=-1, keepdims=True) return x
if inspect.isclass(condition): condition = lambda distribution, cls=condition: isinstance( # pylint: disable=g-long-lambda distribution, cls) ASVI_SURROGATE_SUBSTITUTIONS[condition] = substitution_fn # Default substitutions attempt to express distributions using the most # flexible available parameterization. # pylint: disable=g-long-lambda register_asvi_substitution_rule( half_normal.HalfNormal, lambda dist: truncated_normal.TruncatedNormal( loc=0., scale=dist.scale, low=0., high=dist.scale * 10.)) register_asvi_substitution_rule( uniform.Uniform, lambda dist: shift.Shift(dist.low) (scale_lib.Scale(dist.high - dist.low) (beta.Beta(concentration0=tf.ones_like(dist.mean()), concentration1=1.)))) register_asvi_substitution_rule( exponential.Exponential, lambda dist: gamma.Gamma(concentration=1., rate=dist.rate)) register_asvi_substitution_rule( chi2.Chi2, lambda dist: gamma.Gamma(concentration=0.5 * dist.df, rate=0.5)) # pylint: enable=g-long-lambda # TODO(kateslin): Add support for models with prior+likelihood written as # a single JointDistribution. def build_asvi_surrogate_posterior(prior, mean_field=False, initial_prior_weight=0.5, seed=None,
def lossfun(x, alpha, scale, approximate=False, epsilon=1e-6): r"""Implements the general form of the loss. This implements the rho(x, \alpha, c) function described in "A General and Adaptive Robust Loss Function", Jonathan T. Barron, https://arxiv.org/abs/1701.03077. Args: x: The residual for which the loss is being computed. x can have any shape, and alpha and scale will be broadcasted to match x's shape if necessary. Must be a tensorflow tensor or numpy array of floats. alpha: The shape parameter of the loss (\alpha in the paper), where more negative values produce a loss with more robust behavior (outliers "cost" less), and more positive values produce a loss with less robust behavior (outliers are penalized more heavily). Alpha can be any value in [-infinity, infinity], but the gradient of the loss with respect to alpha is 0 at -infinity, infinity, 0, and 2. Must be a tensorflow tensor or numpy array of floats with the same precision as `x`. Varying alpha allows for smooth interpolation between a number of discrete robust losses: alpha=-Infinity: Welsch/Leclerc Loss. alpha=-2: Geman-McClure loss. alpha=0: Cauchy/Lortentzian loss. alpha=1: Charbonnier/pseudo-Huber loss. alpha=2: L2 loss. scale: The scale parameter of the loss. When |x| < scale, the loss is an L2-like quadratic bowl, and when |x| > scale the loss function takes on a different shape according to alpha. Must be a tensorflow tensor or numpy array of single-precision floats. approximate: a bool, where if True, this function returns an approximate and faster form of the loss, as described in the appendix of the paper. This approximation holds well everywhere except as x and alpha approach zero. epsilon: A float that determines how inaccurate the "approximate" version of the loss will be. Larger values are less accurate but more numerically stable. Must be great than single-precision machine epsilon. Returns: The losses for each element of x, in the same shape as x. This is returned as a TensorFlow graph node of single precision floats. """ # `scale` and `alpha` must have the same type as `x`. float_dtype = x.dtype tf.debugging.assert_type(scale, float_dtype) tf.debugging.assert_type(alpha, float_dtype) # `scale` must be > 0. assert_ops = [tf.Assert(tf.reduce_all(tf.greater(scale, 0.)), [scale])] with tf.control_dependencies(assert_ops): # Broadcast `alpha` and `scale` to have the same shape as `x`. alpha = tf.broadcast_to(alpha, tf.shape(x)) scale = tf.broadcast_to(scale, tf.shape(x)) if approximate: # `epsilon` must be greater than single-precision machine epsilon. assert epsilon > np.finfo(np.float32).eps # Compute an approximate form of the loss which is faster, but innacurate # when x and alpha are near zero. b = tf.abs(alpha - tf.cast(2., float_dtype)) + epsilon d = tf.where( tf.greater_equal(alpha, 0.), alpha + epsilon, alpha - epsilon) loss = (b / d) * (tf.pow(tf.square(x / scale) / b + 1., 0.5 * d) - 1.) else: # Compute the exact loss. # This will be used repeatedly. squared_scaled_x = tf.square(x / scale) # The loss when alpha == 2. loss_two = 0.5 * squared_scaled_x # The loss when alpha == 0. loss_zero = util.log1p_safe(0.5 * squared_scaled_x) # The loss when alpha == -infinity. loss_neginf = -tf.math.expm1(-0.5 * squared_scaled_x) # The loss when alpha == +infinity. loss_posinf = util.expm1_safe(0.5 * squared_scaled_x) # The loss when not in one of the above special cases. machine_epsilon = tf.cast(np.finfo(np.float32).eps, float_dtype) # Clamp |2-alpha| to be >= machine epsilon so that it's safe to divide by. beta_safe = tf.maximum(machine_epsilon, tf.abs(alpha - 2.)) # Clamp |alpha| to be >= machine epsilon so that it's safe to divide by. alpha_safe = tf.where( tf.greater_equal(alpha, 0.), tf.ones_like(alpha), -tf.ones_like(alpha)) * tf.maximum(machine_epsilon, tf.abs(alpha)) loss_otherwise = (beta_safe / alpha_safe) * ( tf.pow(squared_scaled_x / beta_safe + 1., 0.5 * alpha) - 1.) # Select which of the cases of the loss to return. loss = tf.where( tf.equal(alpha, -tf.cast(float('inf'), float_dtype)), loss_neginf, tf.where( tf.equal(alpha, 0.), loss_zero, tf.where( tf.equal(alpha, 2.), loss_two, tf.where( tf.equal(alpha, tf.cast(float('inf'), float_dtype)), loss_posinf, loss_otherwise)))) return loss
def draw_sample(num_samples, num_classes, logits, num_trials, dtype, seed): """Sample a multinomial. The batch shape is given by broadcasting num_trials with remove_last_dimension(logits). Args: num_samples: Python int or singleton integer Tensor: number of multinomial samples to draw. num_classes: Python int or singleton integer Tensor: number of classes. logits: Floating Tensor with last dimension k, of (unnormalized) logit probabilities per class. num_trials: Tensor of number of categorical trials each multinomial consists of. num_trials[..., tf.newaxis] must broadcast with logits. dtype: dtype at which to emit samples. seed: Random seed. Returns: samples: Tensor of given dtype and shape [n] + batch_shape + [k]. """ with tf.name_scope('draw_sample'): # broadcast the num_trials and logits to same shape num_trials = tf.ones_like(logits[..., 0], dtype=num_trials.dtype) * num_trials logits = tf.ones_like(num_trials[..., tf.newaxis], dtype=logits.dtype) * logits # flatten the total_count and logits # flat_logits has shape [B1B2...Bm, num_classes] flat_logits = tf.reshape(logits, [-1, num_classes]) flat_num_trials = num_samples * tf.reshape(num_trials, [-1]) # [B1B2...Bm] # Computes each logits and num_trials situation by map_fn. # Using just one batch samplers.categorical call doesn't work because that # requires num_trials to be the same across all members of the batch of # logits. This restriction makes sense for samplers.categorical because # for it, num_trials is part of the returned shape. However, the # multinomial sampler does not need that restriction, because it sums out # exactly that dimension. # One possibility would be to draw a batch categorical whose sample count is # max(num_trials) and mask out the excess ones. However, if the elements of # num_trials vary widely, this can be wasteful of memory. # TODO(b/123763054, b/112152209): Revisit the possibility of writing this # with a batch categorical followed by batch unsorted_segment_sum, once both # of those work and are memory-efficient enough. def _sample_one_batch_member(args): logits, num_cat_samples, item_seed = args # [K], [] # x has shape [1, num_cat_samples = num_samples * num_trials] x = samplers.categorical(logits[tf.newaxis, ...], num_cat_samples, seed=item_seed) x = tf.reshape(x, shape=[num_samples, -1]) # [num_samples, num_trials] x = tf.one_hot( x, depth=num_classes) # [num_samples, num_trials, num_classes] x = tf.reduce_sum(x, axis=-2) # [num_samples, num_classes] return tf.cast(x, dtype=dtype) flat_seeds = samplers.split_seed(seed, n=tf.shape(flat_logits)[0], salt='multinomial_draw_sample') x = tf.map_fn( _sample_one_batch_member, [flat_logits, flat_num_trials, flat_seeds], fn_output_signature=dtype) # [B1B2...Bm, num_samples, num_classes] # reshape the results to proper shape x = tf.transpose(a=x, perm=[1, 0, 2]) final_shape = tf.concat( [[num_samples], tf.shape(num_trials), [num_classes]], axis=0) x = tf.reshape(x, final_shape) return x
def segment_diff(x, segment_ids, order=1, exclusive=False, dtype=None, name=None): """Computes difference of successive elements in a segment. For a complete description of segment_* ops see documentation of `tf.segment_max`. This op extends the `diff` functionality to segmented inputs. The behaviour of this op is the same as that of the op `diff` within each segment. The result is effectively a concatenation of the results of `diff` applied to each segment. ## Example ```python x = tf.constant([2, 5, 1, 7, 9] + [32, 10, 12, 3] + [4, 8, 5]) segments = tf.constant([0, 0, 0, 0, 0] + [1, 1, 1, 1] + [2, 2, 2]) # First order diff. Expected result: [3, -4, 6, 2, -22, 2, -9, 4, -3] dx1 = segment_diff( x, segment_ids=segments, order=1, exclusive=True) # Non-exclusive, second order diff. # Expected result: [2, 5, -1, 2, 8, 32, 10, -20, -7, 4, 8, 1] dx2 = segment_diff( x, segment_ids=segments, order=2, exclusive=False) ``` Args: x: A rank 1 `Tensor` of any dtype for which arithmetic operations are permitted. segment_ids: A `Tensor`. Must be one of the following types: int32, int64. A 1-D tensor whose size is equal to the size of `x`. Values should be sorted and can be repeated. order: Positive Python int. The order of the difference to compute. `order = 1` corresponds to the difference between successive elements. Default value: 1 exclusive: Python bool. See description above. Default value: False dtype: Optional `tf.Dtype`. If supplied, the dtype for `x` to use when converting to `Tensor`. Default value: None which maps to the default dtype inferred by TF. name: Python `str` name prefixed to Ops created by this class. Default value: None which is mapped to the default name 'segment_diff'. Returns: diffs: A `Tensor` of the same dtype as `x`. Assuming that each segment is of length greater than or equal to order, if `exclusive` is True, then the size is `n-order*k` where `n` is the size of x, `k` is the number of different segment ids supplied if `segment_ids` is not None or 1 if `segment_ids` is None. If any of the segments is of length less than the order, then the size is: `n-sum(min(order, length(segment_j)), j)` where the sum is over segments. If `exclusive` is False, then the size is `n`. """ with tf.compat.v1.name_scope(name, default_name='segment_diff', values=[x]): x = tf.convert_to_tensor(x, dtype=dtype) raw_diffs = diff_ops.diff(x, order=order, exclusive=exclusive) if segment_ids is None: return raw_diffs # If segment ids are supplied, raw_diffs are incorrect at locations: # p, p+1, ... min(p+order-1, m_p-1) where p is the index of the first # element of a segment other than the very first segment (which is # already correct). m_p is the segment length. # Find positions where the segments begin. has_segment_changed = tf.concat( [[False], tf.not_equal(segment_ids[1:] - segment_ids[:-1], 0)], axis=0) # Shape [k, 1] segment_start_index = tf.cast(tf.where(has_segment_changed), dtype=tf.int32) segment_end_index = tf.concat([ tf.reshape(segment_start_index, [-1])[1:], [tf.size(segment_ids)] ], axis=0) segment_end_index = tf.reshape(segment_end_index, [-1, 1]) # The indices of locations that need to be adjusted. This needs to be # constructed in steps. First we generate p, p+1, ... p+order-1. # Shape [num_segments-1, order] fix_indices = (segment_start_index + tf.range(order, dtype=segment_start_index.dtype)) in_bounds = tf.where(fix_indices < segment_end_index) # Keep only the ones in bounds. fix_indices = tf.reshape(tf.gather_nd(fix_indices, in_bounds), [-1, 1]) needs_fix = tf.scatter_nd( fix_indices, # Unfortunately, scatter_nd doesn't support bool on GPUs so we need to # do ints here and then convert to bool. tf.reshape(tf.ones_like(fix_indices, dtype=tf.int32), [-1]), shape=tf.shape(x)) # If exclusive is False, then needs_fix means we need to replace the values # in raw_diffs at those locations with the values in x. needs_fix = tf.cast(needs_fix, dtype=tf.bool) if not exclusive: return tf.where(needs_fix, x, raw_diffs) # If exclusive is True, we have to be more careful. The raw_diffs # computation has removed the first 'order' elements. After removing the # corresponding elements from needs_fix, we use it to remove the elements # from raw_diffs. return tf.boolean_mask(raw_diffs, tf.logical_not(needs_fix[order:]))
def _make_black_objective_and_vega_func(prices, forwards, strikes, expiries, is_call_options, discount_factors): """Produces an objective and vega function for the Black Scholes model. The returned function maps volatilities to a tuple of objective function values and their gradients with respect to the volatilities. The objective function is the difference between Black Scholes prices and observed market prices, whereas the gradient is called vega of the option. That is: ``` g(s) = (f(s) - a, f'(s)) ``` Where `g` is the returned function taking volatility parameter `s`, `f` the Black Scholes price with all other variables curried and `f'` its derivative, and `a` the observed market prices of the options. Hence `g` calculates the information necessary for finding the volatility implied by observed market prices for options with given terms using first order methods. #### References [1] Hull, J., 2018. Options, Futures, and Other Derivatives. Harlow, England. Pearson. (p.358 - 361) Args: prices: A real `Tensor` of any shape. The observed market prices of the assets. forwards: A real `Tensor` of the same shape and dtype as `prices`. The current forward prices to expiry. strikes: A real `Tensor` of the same shape and dtype as `prices`. The strike prices of the options. expiries: A real `Tensor` of same shape and dtype as `forwards`. The expiry for each option. The units should be such that `expiry * volatility**2` is dimensionless. is_call_options: A boolean `Tensor` of same shape and dtype as `forwards`. Positive one where option is a call, negative one where option is a put. discount_factors: A real `Tensor` of the same shape and dtype as `forwards`. The total discount factors to apply. Returns: A function from volatilities to a Black Scholes objective and its derivative (which is coincident with Vega). """ dtype = prices.dtype phi = tfp.distributions.Normal(loc=tf.zeros(1, dtype=dtype), scale=tf.ones(1, dtype=dtype)) # orientations will decide the normalization strategy. orientations = strikes >= forwards # normalization is the greater of strikes or forwards normalization = tf.where(orientations, strikes, forwards) normalized_prices = prices / normalization if discount_factors is not None: normalized_prices /= discount_factors else: discount_factors = tf.ones_like(normalized_prices) units = tf.ones_like(forwards) # y is 1 when strikes >= forwards and strikes/forwards otherwise y = tf.where(orientations, units, strikes / forwards) # x is forwards/strikes when strikes >= forwards and 1 otherwise x = tf.where(orientations, forwards / strikes, units) lnz = tf.math.log(forwards) - tf.math.log(strikes) sqrt_t = tf.sqrt(expiries) if is_call_options is not None: is_call_options = tf.convert_to_tensor(is_call_options, dtype=tf.bool, name='is_call_options') def _black_objective_and_vega(volatilities): """Calculate the Black Scholes price and vega for a given volatility. This method returns normalized results. Args: volatilities: A real `Tensor` of same shape and dtype as `forwards`. The volatility to expiry. Returns: A tuple containing (value, gradient) of the black scholes price, both of which are `Tensor`s of the same shape and dtype as `volatilities`. """ v = volatilities * sqrt_t d1 = (lnz / v + v / 2) d2 = d1 - v implied_prices = x * phi.cdf(d1) - y * phi.cdf(d2) if is_call_options is not None: put_prices = implied_prices - x + y implied_prices = tf.where( tf.broadcast_to(is_call_options, tf.shape(put_prices)), implied_prices, put_prices) vega = x * phi.prob(d1) * sqrt_t / discount_factors return implied_prices - normalized_prices, vega return _black_objective_and_vega
def _stddev(self): return self.scale * tf.ones_like(self.loc)
def _parse_train_data(self, data): """Parse data for ShapeMask training.""" classes = data['groundtruth_classes'] boxes = data['groundtruth_boxes'] masks = data['groundtruth_instance_masks'] is_crowds = data['groundtruth_is_crowd'] # Skips annotations with `is_crowd` = True. if self._skip_crowd_during_training and self._is_training: num_groundtrtuhs = tf.shape(classes)[0] with tf.control_dependencies([num_groundtrtuhs, is_crowds]): indices = tf.cond( tf.greater(tf.size(is_crowds), 0), lambda: tf.where(tf.logical_not(is_crowds))[:, 0], lambda: tf.cast(tf.range(num_groundtrtuhs), tf.int64)) classes = tf.gather(classes, indices) boxes = tf.gather(boxes, indices) masks = tf.gather(masks, indices) # Gets original image and its size. image = data['image'] image_shape = tf.shape(image)[0:2] # If not using category, makes all categories with id = 0. if not self._use_category: classes = tf.cast(tf.greater(classes, 0), dtype=tf.float32) # Normalizes image with mean and std pixel values. image = input_utils.normalize_image(image) # Flips image randomly during training. if self._aug_rand_hflip: image, boxes, masks = input_utils.random_horizontal_flip( image, boxes, masks) # Converts boxes from normalized coordinates to pixel coordinates. boxes = box_utils.denormalize_boxes(boxes, image_shape) # Resizes and crops image. image, image_info = input_utils.resize_and_crop_image( image, self._output_size, self._output_size, aug_scale_min=self._aug_scale_min, aug_scale_max=self._aug_scale_max) image_scale = image_info[2, :] offset = image_info[3, :] # Resizes and crops boxes and masks. boxes = input_utils.resize_and_crop_boxes(boxes, image_scale, self._output_size, offset) # Filters out ground truth boxes that are all zeros. indices = input_utils.get_non_empty_box_indices(boxes) boxes = tf.gather(boxes, indices) classes = tf.gather(classes, indices) masks = tf.gather(masks, indices) # Assigns anchors. input_anchor = anchor.Anchor(self._min_level, self._max_level, self._num_scales, self._aspect_ratios, self._anchor_size, self._output_size) anchor_labeler = anchor.AnchorLabeler(input_anchor, self._match_threshold, self._unmatched_threshold) (cls_targets, box_targets, num_positives) = anchor_labeler.label_anchors( boxes, tf.cast(tf.expand_dims(classes, axis=1), tf.float32)) # Sample groundtruth masks/boxes/classes for mask branch. num_masks = tf.shape(masks)[0] mask_shape = tf.shape(masks)[1:3] # Pad sampled boxes/masks/classes to a constant batch size. padded_boxes = input_utils.pad_to_fixed_size(boxes, self._num_sampled_masks) padded_classes = input_utils.pad_to_fixed_size(classes, self._num_sampled_masks) padded_masks = input_utils.pad_to_fixed_size(masks, self._num_sampled_masks) # Randomly sample groundtruth masks for mask branch training. For the image # without groundtruth masks, it will sample the dummy padded tensors. rand_indices = tf.random.shuffle( tf.range(tf.maximum(num_masks, self._num_sampled_masks))) rand_indices = tf.math.mod(rand_indices, tf.maximum(num_masks, 1)) rand_indices = rand_indices[0:self._num_sampled_masks] rand_indices = tf.reshape(rand_indices, [self._num_sampled_masks]) sampled_boxes = tf.gather(padded_boxes, rand_indices) sampled_classes = tf.gather(padded_classes, rand_indices) sampled_masks = tf.gather(padded_masks, rand_indices) # Jitter the sampled boxes to mimic the noisy detections. sampled_boxes = box_utils.jitter_boxes( sampled_boxes, noise_scale=self._box_jitter_scale) sampled_boxes = box_utils.clip_boxes(sampled_boxes, self._output_size) # Compute mask targets in feature crop. A feature crop fully contains a # sampled box. mask_outer_boxes = box_utils.compute_outer_boxes( sampled_boxes, tf.shape(image)[0:2], scale=self._outer_box_scale) mask_outer_boxes = box_utils.clip_boxes(mask_outer_boxes, self._output_size) # Compensate the offset of mask_outer_boxes to map it back to original image # scale. mask_outer_boxes_ori = mask_outer_boxes mask_outer_boxes_ori += tf.tile(tf.expand_dims(offset, axis=0), [1, 2]) mask_outer_boxes_ori /= tf.tile(tf.expand_dims(image_scale, axis=0), [1, 2]) norm_mask_outer_boxes_ori = box_utils.normalize_boxes( mask_outer_boxes_ori, mask_shape) # Set sampled_masks shape to [batch_size, height, width, 1]. sampled_masks = tf.cast(tf.expand_dims(sampled_masks, axis=-1), tf.float32) mask_targets = tf.image.crop_and_resize( sampled_masks, norm_mask_outer_boxes_ori, box_indices=tf.range(self._num_sampled_masks), crop_size=[self._mask_crop_size, self._mask_crop_size], method='bilinear', extrapolation_value=0, name='train_mask_targets') mask_targets = tf.where(tf.greater_equal(mask_targets, 0.5), tf.ones_like(mask_targets), tf.zeros_like(mask_targets)) mask_targets = tf.squeeze(mask_targets, axis=-1) if self._up_sample_factor > 1: fine_mask_targets = tf.image.crop_and_resize( sampled_masks, norm_mask_outer_boxes_ori, box_indices=tf.range(self._num_sampled_masks), crop_size=[ self._mask_crop_size * self._up_sample_factor, self._mask_crop_size * self._up_sample_factor ], method='bilinear', extrapolation_value=0, name='train_mask_targets') fine_mask_targets = tf.where( tf.greater_equal(fine_mask_targets, 0.5), tf.ones_like(fine_mask_targets), tf.zeros_like(fine_mask_targets)) fine_mask_targets = tf.squeeze(fine_mask_targets, axis=-1) else: fine_mask_targets = mask_targets # If bfloat16 is used, casts input image to tf.bfloat16. if self._use_bfloat16: image = tf.cast(image, dtype=tf.bfloat16) valid_image = tf.cast(tf.not_equal(num_masks, 0), tf.int32) if self._mask_train_class == 'all': mask_is_valid = valid_image * tf.ones_like(sampled_classes, tf.int32) else: # Get the intersection of sampled classes with training splits. mask_valid_classes = tf.cast( tf.expand_dims( class_utils.coco_split_class_ids(self._mask_train_class), 1), sampled_classes.dtype) match = tf.reduce_any( tf.equal(tf.expand_dims(sampled_classes, 0), mask_valid_classes), 0) mask_is_valid = valid_image * tf.cast(match, tf.int32) # Packs labels for model_fn outputs. labels = { 'cls_targets': cls_targets, 'box_targets': box_targets, 'anchor_boxes': input_anchor.multilevel_boxes, 'num_positives': num_positives, 'image_info': image_info, # For ShapeMask. 'mask_boxes': sampled_boxes, 'mask_outer_boxes': mask_outer_boxes, 'mask_targets': mask_targets, 'fine_mask_targets': fine_mask_targets, 'mask_classes': sampled_classes, 'mask_is_valid': mask_is_valid, } return image, labels
def box_matching(boxes, gt_boxes, gt_classes): """Match boxes to groundtruth boxes. Given the proposal boxes and the groundtruth boxes and classes, perform the groundtruth matching by taking the argmax of the IoU between boxes and groundtruth boxes. Args: boxes: a tensor of shape of [batch_size, N, 4] representing the box coordiantes to be matched to groundtruth boxes. gt_boxes: a tensor of shape of [batch_size, MAX_INSTANCES, 4] representing the groundtruth box coordinates. It is padded with -1s to indicate the invalid boxes. gt_classes: [batch_size, MAX_INSTANCES] representing the groundtruth box classes. It is padded with -1s to indicate the invalid classes. Returns: matched_gt_boxes: a tensor of shape of [batch_size, N, 4], representing the matched groundtruth box coordinates for each input box. If the box does not overlap with any groundtruth boxes, the matched boxes of it will be set to all 0s. matched_gt_classes: a tensor of shape of [batch_size, N], representing the matched groundtruth classes for each input box. If the box does not overlap with any groundtruth boxes, the matched box classes of it will be set to 0, which corresponds to the background class. matched_gt_indices: a tensor of shape of [batch_size, N], representing the indices of the matched groundtruth boxes in the original gt_boxes tensor. If the box does not overlap with any groundtruth boxes, the index of the matched groundtruth will be set to -1. matched_iou: a tensor of shape of [batch_size, N], representing the IoU between the box and its matched groundtruth box. The matched IoU is the maximum IoU of the box and all the groundtruth boxes. iou: a tensor of shape of [batch_size, N, K], representing the IoU matrix between boxes and the groundtruth boxes. The IoU between a box and the invalid groundtruth boxes whose coordinates are [-1, -1, -1, -1] is -1. """ # Compute IoU between boxes and gt_boxes. # iou <- [batch_size, N, K] iou = box_utils.bbox_overlap(boxes, gt_boxes) # max_iou <- [batch_size, N] # 0.0 -> no match to gt, or -1.0 match to no gt matched_iou = tf.reduce_max(iou, axis=-1) # background_box_mask <- bool, [batch_size, N] background_box_mask = tf.less_equal(matched_iou, 0.0) argmax_iou_indices = tf.argmax(iou, axis=-1, output_type=tf.int32) argmax_iou_indices_shape = tf.shape(argmax_iou_indices) batch_indices = ( tf.expand_dims(tf.range(argmax_iou_indices_shape[0]), axis=-1) * tf.ones([1, argmax_iou_indices_shape[-1]], dtype=tf.int32)) gather_nd_indices = tf.stack([batch_indices, argmax_iou_indices], axis=-1) matched_gt_boxes = tf.gather_nd(gt_boxes, gather_nd_indices) matched_gt_boxes = tf.where( tf.tile(tf.expand_dims(background_box_mask, axis=-1), [1, 1, 4]), tf.zeros_like(matched_gt_boxes, dtype=tf.float32), matched_gt_boxes) matched_gt_classes = tf.gather_nd(gt_classes, gather_nd_indices) matched_gt_classes = tf.where(background_box_mask, tf.zeros_like(matched_gt_classes), matched_gt_classes) matched_gt_indices = tf.where(background_box_mask, -tf.ones_like(argmax_iou_indices), argmax_iou_indices) return (matched_gt_boxes, matched_gt_classes, matched_gt_indices, matched_iou, iou)
def ood_logits(logits, ood_log_density): logits = tf.concat( [logits, tf.ones_like(logits[..., :1]) * ood_log_density], axis=-1) return logits
def left_continuous_cdf(x): ones = tf.ones_like(x) answer = tf1.where(x <= 3, 0.6 * ones, ones) answer = tf1.where(x <= 2, 0.3 * ones, answer) answer = tf1.where(x <= 1, 0.1 * ones, answer) return tf1.where(x <= 0, 0 * ones, answer)
def get_loss_tensors(self, f0_candidates, freqs, amps): """Get traces of loss to estimate fundamental frequency. Args: f0_candidates: Frequencies of candidates in hertz. [batch, time, freq]. freqs: Frequencies of sinusoids in hertz. [batch, time, feq]. amps: Amplitudes of sinusoids, greater than 0. [batch, time, freq]. Returns: sinusoids_loss: -log p(sinusoids|harmonics), [batch, time, f0_candidate]. harmonics_loss: - log p(harmonics|sinusoids), [batch, time, f0_candidate]. """ # ========================================================================== # P(sinusoids | candidate_harmonics). # ========================================================================== p_sinusoids_given_harmonics = self.get_p_sinusoids_given_harmonics() # Treat each partial as a candidate. # Get the ratio of each partial to each candidate. # -> [batch, time, candidate, partial] freq_ratios = safe_divide(freqs[:, :, tf.newaxis, :], f0_candidates[:, :, :, tf.newaxis]) nll_sinusoids = - p_sinusoids_given_harmonics.log_prob(freq_ratios) a = tf.convert_to_tensor(amps[:, :, tf.newaxis, :]) # # Don't count sinusoids that are less than 1 std > mean. # a_mean, a_var = tf.nn.moments(a, axes=-1, keepdims=True) # a = tf.where(a > a_mean + 0.5 * a_var**0.5, a, tf.zeros_like(a)) # Weighted sum by sinusoid amplitude. # -> [batch, time, candidate] sinusoids_loss = safe_divide(tf.reduce_sum(nll_sinusoids * a, axis=-1), tf.reduce_sum(a, axis=-1)) # ========================================================================== # P(candidate_harmonics | sinusoids) # ========================================================================== p_harm_given_sin = self.get_p_harmonics_given_sinusoids(freqs, amps) harmonics = self.get_candidate_harmonics(f0_candidates, as_midi=True) # Need to rearrage shape as tfp expects, [sample_sh, batch_sh, event_sh]. # -> [candidate, harmonic, batch, time] harmonics_transpose = tf.transpose(harmonics, [2, 3, 0, 1]) nll_harmonics_transpose = - p_harm_given_sin.log_prob(harmonics_transpose) # -> [batch, time, candidate, harm] nll_harmonics = tf.transpose(nll_harmonics_transpose, [2, 3, 0, 1]) # Prior decreasing importance of upper harmonics. amps_prior = tf.linspace( 1.0, 1.0 / self.n_harmonic_points, self.n_harmonic_points) harmonics_loss = (nll_harmonics * amps_prior[tf.newaxis, tf.newaxis, tf.newaxis, :]) # Don't count loss for harmonics above nyquist. # Reweight by the number of harmonics below nyquist, # (so it doesn't just pick the highest frequency possible). nyquist_midi = hz_to_midi(self.sample_rate / 2.0) nyquist_mask = tf.where(harmonics < nyquist_midi, tf.ones_like(harmonics_loss), tf.zeros_like(harmonics_loss)) harmonics_loss *= safe_divide( nyquist_mask, tf.reduce_mean(nyquist_mask, axis=-1, keepdims=True)) # Sum over harmonics. harmonics_loss = tf.reduce_mean(harmonics_loss, axis=-1) return sinusoids_loss, harmonics_loss
def _entropy(self): # Use broadcasting rules to calculate the full broadcast sigma. scale = self.scale * tf.ones_like(self.loc) return (0.5 * (1. + 2 * tf.math.log(scale) + np.euler_gamma + np.log(4. * np.pi)))
def _call(self, r): mean = tf.identity(r) variance = grad_mean = tf.ones_like(r) return mean, variance, grad_mean
def _mode(self): return self.loc * tf.ones_like(self.scale)
def _call(self, r): mean = 1. / r variance = tf.ones_like(r) grad_mean = -1. / r**2 return mean, variance, grad_mean
def _grad_and_hessian_loss_fn(x): loss = _neg_log_likelihood(x) grad_loss = tf.gradients(ys=loss, xs=[x])[0] hessian_loss = tf.hessians(ys=loss, xs=[x])[0] hessian_chol = tf.linalg.cholesky(hessian_loss) return grad_loss, hessian_chol, tf.ones_like(grad_loss)
def corr_matrix(t): one = tf.ones_like(t) row1 = tf.stack([one, 0.5 * t], axis=-1) row2 = tf.reverse(row1, [0]) corr_matrix = tf.stack([row1, row2], axis=-1) return corr_matrix