def _prob(self, y): """Called by the base class to compute likelihoods.""" # Convert to (channels, 1, batch) format by collapsing dimensions and then # commuting channels to front. y = tf.broadcast_to( y, tf.broadcast_dynamic_shape(tf.shape(y), self.batch_shape_tensor())) shape = tf.shape(y) y = tf.reshape(y, (-1, 1, self.batch_shape.num_elements())) y = tf.transpose(y, (2, 1, 0)) # Evaluate densities. # We can use the special rule below to only compute differences in the left # tail of the sigmoid. This increases numerical stability: sigmoid(x) is 1 # for large x, 0 for small x. Subtracting two numbers close to 0 can be done # with much higher precision than subtracting two numbers close to 1. lower = self._logits_cumulative(y - .5) upper = self._logits_cumulative(y + .5) # Flip signs if we can move more towards the left tail of the sigmoid. sign = tf.stop_gradient(-tf.math.sign(lower + upper)) p = abs(tf.sigmoid(sign * upper) - tf.sigmoid(sign * lower)) p = math_ops.lower_bound(p, 0.) # Convert back to (broadcasted) input tensor shape. p = tf.transpose(p, (2, 1, 0)) p = tf.reshape(p, shape) return p
def _decode(self, rel_codes, anchors): """Decode relative codes to boxes. Args: rel_codes: a tensor representing N anchor-encoded boxes. anchors: BoxList of anchors. Returns: boxes: BoxList holding N bounding boxes. """ ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes() ty, tx, th, tw = tf.unstack(tf.transpose(a=rel_codes)) if self._scale_factors: ty /= self._scale_factors[0] tx /= self._scale_factors[1] th /= self._scale_factors[2] tw /= self._scale_factors[3] w = tf.exp(tw) * wa h = tf.exp(th) * ha ycenter = tf.sigmoid(ty) + ycenter_a xcenter = tf.sigmoid(tx) + xcenter_a ymin = ycenter - h / 2. xmin = xcenter - w / 2. ymax = ycenter + h / 2. xmax = xcenter + w / 2. return box_list.BoxList(tf.transpose(a=tf.stack([ymin, xmin, ymax, xmax])))
def mlp(self, x): layer_1 = tf.sigmoid( tf.add(tf.matmul(x, self.h1_weights), self.h1_bias)) layer_2 = tf.sigmoid( tf.add(tf.matmul(layer_1, self.h2_weights), self.h2_bias)) return tf.sigmoid( tf.add(tf.matmul(layer_2, self.out_weights), self.out_bias))
def __call__(self, x, carry): update_t = tf.sigmoid(x @ self.W_update_x + carry @ self.W_update_c + self.b_update) reset_t = tf.sigmoid(x @ self.W_reset_x + carry @ self.W_reset_c + self.b_reset) new_carry = update_t * carry + (1. - update_t) * tf.tanh( self.next_x_net(x) + self.next_c_net(reset_t * carry) + self.b_next) return new_carry
def get_discriminator_loss(learner_agent_output, env_output, actor_agent_output, actor_action, reward_clipping, discounting, baseline_cost, entropy_cost, num_steps): """Discriminator loss.""" del actor_agent_output del actor_action del reward_clipping del discounting del baseline_cost del entropy_cost first_true = utils.get_first_true_column( env_output.observation['disc_mask']) output_logits = learner_agent_output.policy_logits output_logits = tf.squeeze(output_logits, axis=1) output_logits = tf.boolean_mask(output_logits, first_true) output_affine_a, output_affine_b = learner_agent_output.baseline # Get the first true. labels = tf.cast(env_output.observation['label'], tf.float32) labels = tf.boolean_mask(labels, first_true) positive_label = tf.equal(labels, tf.constant(1.0)) positive_logits = tf.boolean_mask(output_logits, positive_label) tf.summary.histogram('distribution/sigmoid_positive_logits', tf.sigmoid(positive_logits), step=num_steps) tf.summary.histogram('distribution/positive_logits', positive_logits, step=num_steps) negative_label = tf.equal(labels, tf.constant(0.0)) negative_logits = tf.boolean_mask(output_logits, negative_label) tf.summary.histogram('distribution/sigmoid_negative_logits', tf.sigmoid(negative_logits), step=num_steps) tf.summary.histogram('distribution/negative_logits', negative_logits, step=num_steps) tf.summary.scalar('labels/positive_label', tf.reduce_mean(tf.cast(positive_label, tf.float32)), step=num_steps) tf.summary.scalar('labels/labels', tf.reduce_mean(labels), step=num_steps) tf.summary.scalar('affine_transform/a', tf.reduce_mean(output_affine_a), step=num_steps) tf.summary.scalar('affine_transform/b', tf.reduce_mean(output_affine_b), step=num_steps) cross_entropy = tf.nn.weighted_cross_entropy_with_logits( labels=labels, logits=output_logits, pos_weight=5) return cross_entropy
def _get_discriminator_logits(learner_agent_output, env_output, actor_agent_output, actor_action, reward_clipping, discounting, baseline_cost, entropy_cost, num_steps): """Discriminator loss.""" del actor_agent_output del actor_action del reward_clipping del discounting del baseline_cost del entropy_cost first_true = utils.get_first_true_column( env_output.observation['disc_mask']) # Shape of output_logits:[time, batch]. output_logits = learner_agent_output.policy_logits # Shape of output_logits:[batch]. output_logits = tf.boolean_mask(output_logits, first_true) output_affine_a, output_affine_b = learner_agent_output.baseline # Get the first true. labels = tf.cast(env_output.observation['label'], tf.float32) tf.summary.scalar('labels/mean_labels before masking', tf.reduce_mean(labels), step=num_steps) # Shape of labels:[batch]. labels = tf.boolean_mask(labels, first_true) positive_label = tf.equal(labels, tf.constant(1.0)) positive_logits = tf.boolean_mask(output_logits, positive_label) tf.summary.histogram('distribution/sigmoid_positive_logits', tf.sigmoid(positive_logits), step=num_steps) tf.summary.histogram('distribution/positive_logits', positive_logits, step=num_steps) negative_label = tf.equal(labels, tf.constant(0.0)) negative_logits = tf.boolean_mask(output_logits, negative_label) tf.summary.histogram('distribution/sigmoid_negative_logits', tf.sigmoid(negative_logits), step=num_steps) tf.summary.histogram('distribution/negative_logits', negative_logits, step=num_steps) tf.summary.scalar('labels/positive_label_ratio', tf.reduce_mean(tf.cast(positive_label, tf.float32)), step=num_steps) tf.summary.scalar('affine_transform/a', tf.reduce_mean(output_affine_a), step=num_steps) tf.summary.scalar('affine_transform/b', tf.reduce_mean(output_affine_b), step=num_steps) # Shape: [batch] return labels, output_logits
def get_score_label_v2(self, action_list, env_output_list, agent_output, environment): """Gets the probability score and GT labels for DiscriminatorAgentV2.""" del action_list, environment # Remove the unused timestep dimension. labels = tf.squeeze(agent_output.policy_logits['labels'], axis=0) logits = tf.squeeze(agent_output.baseline, axis=0) if self._mode == 'predict': instruction_ids = self._get_instruction_ids(env_output_list) return [(tf.sigmoid(logits), labels, instruction_ids)] else: return [(tf.sigmoid(logits), labels)]
def evaluate_binary_classification(self, predictions, weights): """Evaluates the softmax loss on the given predictions. Given a rank-1 `Tensor` of predictions with shape (n,), where n is the number of examples, and a rank-2 `Tensor` of weights with shape (m, 2), where m is broadcastable to n, this method will return a `Tensor` of shape (n,) where the ith element is: ```python softmax_loss[i] = ( weights[i, 0] * ( exp(predictions[i]) / ( 1 + exp(predictions[i]) ) ) + weights[i, 1] * ( 1 / ( 1 + exp(predictions[i]) ) ) ) ``` where constant_weights[i] = min{weights[i, 0], weights[i, 1]} contains the minimum weights. You can think of weights[:, 0] as being the per-example costs associated with making a positive prediction, and weights[:, 1] as those for a negative prediction. Args: predictions: a `Tensor` of shape (n,), where n is the number of examples. weights: a `Tensor` of shape (m, 2), where m is broadcastable to n. This `Tensor` is *not* necessarily non-negative. Returns: A `Tensor` of shape (n,) and dtype=predictions.dtype, containing the softmax losses for each example. Raises: TypeError: if "predictions" is not a floating-point `Tensor`, or "weights" is not a `Tensor`. ValueError: if "predictions" is not rank-1, or "weights" is not a rank-2 `Tensor` with exactly two columns. """ predictions = _convert_to_binary_classification_predictions( predictions) columns = helpers.get_num_columns_of_2d_tensor(weights, name="weights") if columns != 2: raise ValueError("weights must have two columns") dtype = predictions.dtype.base_dtype positive_weights = tf.cast(weights[:, 0], dtype=dtype) negative_weights = tf.cast(weights[:, 1], dtype=dtype) is_positive = tf.sigmoid(predictions) is_negative = tf.sigmoid(-predictions) return positive_weights * is_positive + negative_weights * is_negative
def call(self, inputs, training=True, survival_prob=None): """Implementation of call(). Args: inputs: the inputs tensor. training: boolean, whether the model is constructed for training. survival_prob: float, between 0 to 1, drop connect rate. Returns: A output tensor. """ x = inputs if self._block_args.expand_ratio != 1: x = self._relu_fn(self._bn0(self._expand_conv(x), training=training)) x = self._relu_fn(self._bn1(self._depthwise_conv(x), training=training)) if self._has_se: se_tensor = tf.reduce_mean( x, self._spatial_dims, keepdims=True) se_tensor = self._se_expand(self._relu_fn(self._se_reduce(se_tensor))) x = tf.sigmoid(se_tensor) * x x = self._bn2(self._project_conv(x), training=training) # Add identity so that quantization-aware training can insert quantization # ops correctly. x = tf.identity(x) if self._clip_projection_output: x = tf.clip_by_value(x, -6, 6) if all( s == 1 for s in self._block_args.strides ) and self._block_args.input_filters == self._block_args.output_filters: if survival_prob: x = utils.drop_connect(x, training, survival_prob) x = tf.add(x, inputs) return x
def _cdf(self, x): logits = self._logits_parameter_no_checks() total_count = tf.convert_to_tensor(self.total_count) safe_x = tf.where(x >= 0, x, 0.) answer = tfp_math.betainc( total_count, 1. + safe_x, tf.sigmoid(-logits)) return distribution_util.extend_cdf_outside_support(x, answer, low=0)
def sigmoid(x): """Sigmoid activation function, `sigmoid(x) = 1 / (1 + exp(-x))`. Applies the sigmoid activation function. For small values (<-5), `sigmoid` returns a value close to zero, and for large values (>5) the result of the function gets close to 1. Sigmoid is equivalent to a 2-element Softmax, where the second element is assumed to be zero. The sigmoid function always returns a value between 0 and 1. For example: >>> a = tf.constant([-20, -1.0, 0.0, 1.0, 20], dtype = tf.float32) >>> b = tf.keras.activations.sigmoid(a) >>> b.numpy() array([2.0611537e-09, 2.6894143e-01, 5.0000000e-01, 7.3105860e-01, 1.0000000e+00], dtype=float32) Args: x: Input tensor. Returns: Tensor with the sigmoid activation: `1 / (1 + exp(-x))`. """ output = tf.sigmoid(x) # Cache the logits to use for crossentropy loss. output._keras_logits = x # pylint: disable=protected-access return output
def latent_encoder(self, x, y): """Encodes the inputs into one representation. Args: x: Tensor of shape [batch_size, observations, d_x]. For the prior, these are context x-values. For the posterior, these are target x-values. y: Tensor of shape [batch_size, observations, d_y]. For the prior, these are context y-values. For the posterior, these are target y-values. Returns: A normal distribution over tensors of shape [batch_size, num_latents]. """ encoder_input = tf.concat([x, y], axis=-1) per_example_embedding = batch_mlp( encoder_input, self._latent_encoder_sizes) dataset_embedding = tf.reduce_mean(per_example_embedding, axis=1) hidden = tf.keras.layers.Dense( (self._latent_encoder_sizes[-1] + self._num_latents)//2, activation=tf.nn.relu)(dataset_embedding) loc = tf.keras.layers.Dense(self._num_latents, activation=None)(hidden) untransformed_scale = tf.keras.layers.Dense(self._num_latents, activation=None)(hidden) # Constraint scale following Garnelo et al. (2018). scale_diag = 0.1 + 0.9 * tf.sigmoid(untransformed_scale) return generated_random_variables.MultivariateNormalDiag( loc=loc, scale_diag=scale_diag)
def _apply_score_activation(logits, num_classes, activation): """Applies activation to logits and removes the background class. Note that it is assumed that the background class has index 0, which is sliced away after the score transformation. Args: logits: the raw logit tensor. num_classes: the total number of classes including one background class. activation: the score activation type, one of 'SIGMOID', 'SOFTMAX' and 'IDENTITY'. Returns: scores: the tensor after applying score transformation and background class removal. """ batch_size = tf.shape(input=logits)[0] logits = tf.reshape(logits, [batch_size, -1, num_classes]) if activation == 'SIGMOID': scores = tf.sigmoid(logits) elif activation == 'SOFTMAX': scores = tf.softmax(logits) elif activation == 'IDENTITY': pass else: raise ValueError( 'The score activation should be SIGMOID, SOFTMAX or IDENTITY') scores = scores[..., 1:] return scores
def _kl_bernoulli_bernoulli(a, b, name=None): """Calculate the batched KL divergence KL(a || b) with a and b Bernoulli. Args: a: instance of a Bernoulli distribution object. b: instance of a Bernoulli distribution object. name: (optional) Name to use for created operations. default is "kl_bernoulli_bernoulli". Returns: Batchwise KL(a || b) """ with tf.name_scope(name or "kl_bernoulli_bernoulli"): delta_probs0 = tf.nn.softplus(-b.logits) - tf.nn.softplus(-a.logits) delta_probs1 = tf.nn.softplus(b.logits) - tf.nn.softplus(a.logits) return (tf.sigmoid(a.logits) * delta_probs0 + tf.sigmoid(-a.logits) * delta_probs1)
def logit_normal_variance_trapezoid(loc, scale): """Brute-force the variance of LogitNormal(loc, scale) by quadrature.""" dist = tfd.Normal(loc, scale) grid, compute = logit_normal_trapezoid_rule(loc, scale) probs = dist.prob(grid) sigmoids = tf.sigmoid(grid) mean = compute(sigmoids * probs) return compute((sigmoids - mean)**2 * probs)
def _cdf(self, x): logits = self._logits_parameter_no_checks() total_count = tf.convert_to_tensor(self.total_count) shape = self._batch_shape_tensor(logits_or_probs=logits, total_count=total_count) return tf.math.betainc(tf.broadcast_to(total_count, shape), tf.broadcast_to(1. + x, shape), tf.broadcast_to(tf.sigmoid(-logits), shape))
def _forward(self, x): if self._is_standard_sigmoid: return tf.sigmoid(x) lo = tf.convert_to_tensor(self.low) # Concretize only once hi = tf.convert_to_tensor(self.high) diff = hi - lo left = lo + diff * tf.math.sigmoid(x) right = hi - diff * tf.math.sigmoid(-x) return tf.where(x < 0, left, right)
def step(cell_inputs, cell_states): """Step function that will be used by Keras RNN backend.""" h_tm1 = cell_states[0] # previous memory state c_tm1 = cell_states[1] # previous carry state z = backend.dot(cell_inputs, kernel) z += backend.dot(h_tm1, recurrent_kernel) z = backend.bias_add(z, bias) z0, z1, z2, z3 = tf.split(z, 4, axis=1) i = tf.sigmoid(z0) f = tf.sigmoid(z1) c = f * c_tm1 + i * tf.tanh(z2) o = tf.sigmoid(z3) h = o * tf.tanh(c) return h, [h, c]
def __call__(self, logits, scaled_labels, classes, category_loss=True, mse_loss=False): """Compute instance segmentation loss. Args: logits: A Tensor of shape [batch_size * num_points, height, width, num_classes]. The logits are not necessarily between 0 and 1. scaled_labels: A float16 Tensor of shape [batch_size, num_instances, mask_size, mask_size], where mask_size = mask_crop_size * gt_upsample_scale for fine mask, or mask_crop_size for coarse masks and shape priors. classes: A int tensor of shape [batch_size, num_instances]. category_loss: use class specific mask prediction or not. mse_loss: use mean square error for mask loss or not Returns: mask_loss: an float tensor representing total mask classification loss. iou: a float tensor representing the IoU between target and prediction. """ classes = tf.reshape(classes, [-1]) _, _, height, width = scaled_labels.get_shape().as_list() scaled_labels = tf.reshape(scaled_labels, [-1, height, width]) if not category_loss: logits = logits[:, :, :, 0] else: logits = tf.transpose(a=logits, perm=(0, 3, 1, 2)) gather_idx = tf.stack( [tf.range(tf.size(input=classes)), classes - 1], axis=1) logits = tf.gather_nd(logits, gather_idx) # Ignore loss on empty mask targets. valid_labels = tf.reduce_any(input_tensor=tf.greater(scaled_labels, 0), axis=[1, 2]) if mse_loss: # Logits are probabilities in the case of shape prior prediction. logits *= tf.reshape(tf.cast(valid_labels, logits.dtype), [-1, 1, 1]) weighted_loss = tf.nn.l2_loss(scaled_labels - logits) probs = logits else: weighted_loss = tf.nn.sigmoid_cross_entropy_with_logits( labels=scaled_labels, logits=logits) probs = tf.sigmoid(logits) weighted_loss *= tf.reshape( tf.cast(valid_labels, weighted_loss.dtype), [-1, 1, 1]) iou = tf.reduce_sum( input_tensor=tf.minimum(scaled_labels, probs)) / tf.reduce_sum( input_tensor=tf.maximum(scaled_labels, probs)) mask_loss = tf.reduce_sum(input_tensor=weighted_loss) / tf.reduce_sum( input_tensor=scaled_labels) return tf.cast(mask_loss, tf.float32), tf.cast(iou, tf.float32)
def _cdf(self, x): logits = self._logits_parameter_no_checks() total_count = tf.convert_to_tensor(self.total_count) shape = self._batch_shape_tensor(logits=logits, total_count=total_count) safe_x = tf.where(x >= 0, x, 0.) answer = tf.math.betainc(tf.broadcast_to(total_count, shape), tf.broadcast_to(1. + safe_x, shape), tf.broadcast_to(tf.sigmoid(-logits), shape)) return distribution_util.extend_cdf_outside_support(x, answer, low=0)
def _kl_bernoulli_bernoulli(a, b, name=None): """Calculate the batched KL divergence KL(a || b) with a and b Bernoulli. Args: a: instance of a Bernoulli distribution object. b: instance of a Bernoulli distribution object. name: Python `str` name to use for created operations. Default value: `None` (i.e., `'kl_bernoulli_bernoulli'`). Returns: Batchwise KL(a || b) """ with tf.name_scope(name or 'kl_bernoulli_bernoulli'): a_logits = a.logits_parameter() b_logits = b.logits_parameter() return (tf.sigmoid(a_logits) * (tf.math.softplus(-b_logits) - tf.math.softplus(-a_logits)) + tf.sigmoid(-a_logits) * (tf.math.softplus(b_logits) - tf.math.softplus(a_logits)))
def _sample_helper(self, value, eps=None): mu, sigma = tf.split(value, num_or_size_splits=2, axis=-1) sigma = tf.sigmoid(sigma) if eps is None: eps = tf.random.normal(shape=tf.shape(sigma), mean=0., stddev=self._eps_std, dtype=tf.float32) value = mu + sigma * eps neg_kl = 0.5 + tf.math.log(sigma + 1e-8) - 0.5 * (sigma**2 + mu**2) return tf.squeeze(value, axis=-1), tf.squeeze(neg_kl, axis=-1), eps
def logit_normal_variance_gh(loc, scale, deg): """Approxmates `Var_{N(m,s)}[sigmoid(X)]` by Gauss-Hermite quadrature.""" # Since we have to compute sigmoids for variance anyway, we inline # computing the mean by Gauss-Hermite quadrature at the same grid of points. grid, weights = onp.polynomial.hermite_e.hermegauss(deg) grid = tf.cast(grid, dtype=loc.dtype) weights = tf.cast(weights, dtype=loc.dtype) normalizer = tf.constant(onp.sqrt(2 * onp.pi), dtype=loc.dtype) sigmoids = tf.sigmoid(grid * scale[..., tf.newaxis] + loc[..., tf.newaxis]) mean = tf.reduce_sum(sigmoids * weights, axis=-1) / normalizer residuals = (sigmoids - mean[..., tf.newaxis])**2 return tf.reduce_sum(residuals * weights, axis=-1) / normalizer
def logit_normal_mean_gh(loc, scale, deg): """Approximates `E_{N(m,s)}[sigmoid(X)]` by Gauss-Hermite quadrature.""" # We want to integrate # A = \int_-inf^inf sigmoid(x) * Normal(loc, scale).pdf(x) dx # To bring it into the right form for Gauss-Hermite quadrature, # we make the substitution y = (x - loc) / scale, to get # A = (1/sqrt(2*pi)) * \int_-inf^inf [ # sigmoid(y * scale + loc) * exp(-1/2 y**2) dy] grid, weights = onp.polynomial.hermite_e.hermegauss(deg) grid = tf.cast(grid, dtype=loc.dtype) weights = tf.cast(weights, dtype=loc.dtype) normalizer = tf.constant(onp.sqrt(2 * onp.pi), dtype=loc.dtype) values = tf.sigmoid(grid * scale[..., tf.newaxis] + loc[..., tf.newaxis]) return tf.reduce_sum(values * weights, axis=-1) / normalizer
def testVarianceWhenProbCloseToOne(self): # Prob is very close to 1.0, so the naive 1 - p will be (numerically) 0, # which would make variance zero. Main point of this test is to verify that # the variance is > 0 ... we also verify that variance is correct. # tf.sigmoid(logits) is < float eps away from 1.0, which means the naive # 1 - tf.sigmoid(logits) will result in 0.0, which is a loss of precision. one_minus_prob_64 = np.float64(np.finfo(np.float32).eps) / 2 logits_32 = np.float32( np.log((1. - one_minus_prob_64) / one_minus_prob_64)) # Verify that this value of logits results in loss of precision for a naive # implementation (justifying our "fancy" implementation of sigmoid(-logits)) self.assertAllEqual(0., 1 - tf.sigmoid(logits_32)) # See! This one weird trick fixes everything. Asserts below check that we # used the trick correctly in our code. self.assertGreater(self.evaluate(tf.sigmoid(-logits_32)), 0.) dist = tfd.Bernoulli(logits=logits_32) expected_variance = np.float32(one_minus_prob_64 * (1 - one_minus_prob_64)) self.assertGreater(expected_variance, 0.) self.assertAllClose( dist.variance(), expected_variance, # Equivalent to atol=0, rtol=1e-6, but less likely to confuse which # element is being used for the "r" in rtol. # Note this also ensures dist.variance() > 0, which the naive # implementation would not be able to do. atol=expected_variance * 1e-6, rtol=0, )
def step(cell_inputs, cell_states): """Step function that will be used by Keras RNN backend.""" h_tm1 = cell_states[0] # inputs projected by all gate matrices at once matrix_x = backend.dot(cell_inputs, kernel) matrix_x = backend.bias_add(matrix_x, input_bias) x_z, x_r, x_h = tf.split(matrix_x, 3, axis=1) # hidden state projected by all gate matrices at once matrix_inner = backend.dot(h_tm1, recurrent_kernel) matrix_inner = backend.bias_add(matrix_inner, recurrent_bias) recurrent_z, recurrent_r, recurrent_h = tf.split(matrix_inner, 3, axis=1) z = tf.sigmoid(x_z + recurrent_z) r = tf.sigmoid(x_r + recurrent_r) hh = tf.tanh(x_h + r * recurrent_h) # previous and candidate state mixed by update gate h = z * h_tm1 + (1 - z) * hh return h, [h]
def make_precision_matrix_update_op(self, gp_feature, logits, precision_matrix): """Defines update op for the precision matrix of feature weights.""" if self.likelihood != 'gaussian': if logits is None: raise ValueError( f'"logits" cannot be None when likelihood={self.likelihood}' ) if logits.shape[-1] != 1: raise ValueError( f'likelihood={self.likelihood} only support univariate logits.' f'Got logits dimension: {logits.shape[-1]}') batch_size = tf.shape(gp_feature)[0] batch_size = tf.cast(batch_size, dtype=gp_feature.dtype) # Computes batch-specific normalized precision matrix. if self.likelihood == 'binary_logistic': prob = tf.sigmoid(logits) prob_multiplier = prob * (1. - prob) elif self.likelihood == 'poisson': prob_multiplier = tf.exp(logits) else: prob_multiplier = 1. gp_feature_adjusted = tf.sqrt(prob_multiplier) * gp_feature precision_matrix_minibatch = tf.matmul(gp_feature_adjusted, gp_feature_adjusted, transpose_a=True) # Updates the population-wise precision matrix. if self.momentum > 0: # Use moving-average updates to accumulate batch-specific precision # matrices. precision_matrix_minibatch = precision_matrix_minibatch / batch_size precision_matrix_new = ( self.momentum * precision_matrix + (1. - self.momentum) * precision_matrix_minibatch) else: # Compute exact population-wise covariance without momentum. # If use this option, make sure to pass through data only once. precision_matrix_new = precision_matrix + precision_matrix_minibatch # Returns the update op. return precision_matrix.assign(precision_matrix_new)
def sigmoid_cross_entropy_focal_loss(logits, labels, alpha=0.25, gamma=2.0): """Focal loss for binary (sigmoid) logistic loss.""" # The numerically-stable way to compute # log(p) for positives; # log(1 - p) for negatives. labels = tf.cast(labels, logits.dtype) labels = tf.reshape(labels, logits.shape) loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits) if gamma is not None and gamma != 0: # The modulating factor. Note that inner = tf.sigmoid(logits * (1 - labels * 2)) loss *= tf.pow(inner, gamma) if alpha is not None: # [1] Eq (3) loss *= (alpha * labels + (1 - alpha) * (1 - labels)) loss = tf.reduce_sum(loss, axis=-1) return loss
def posterior_mode(self, K, return_temporaries=False): n = self.X_train_.shape[0] if self.warm_start and hasattr(self, "f_cached"): f = self.f_cached else: f = tf.zeros(n, dtype=np.float64) log_marginal_likelihood = tf.constant(-np.inf, dtype='float64') for i in range(self.max_iter_predict): pi = tf.sigmoid(f) W = pi * (1 - pi) W_sr = tf.sqrt(W) W_sr_K = tf.reshape(W_sr, [-1, 1]) * K B = tf.eye(W.shape[0], dtype='float64') + W_sr_K * W_sr L = tf.linalg.cholesky(B) b = W * f + (self.y - pi) a = b - W_sr * tf.reshape( tf.linalg.cholesky_solve( L, tf.matmul(W_sr_K, tf.reshape(b, (-1, 1)))), [-1]) f = tf.matmul(K, tf.reshape(a, [-1, 1])) lml = -0.5 * tf.matmul(tf.reshape(a, [1, -1]), f) - tf.reduce_sum( tf.math.log(1 + tf.math.exp(-(self.y * 2.0 - 1.0) * tf.reshape(f, [-1]))) ) - tf.reduce_sum(tf.math.log(tf.linalg.tensor_diag_part(L))) f = np.reshape(f, [-1]) if lml[0, 0] - log_marginal_likelihood < 1e-10: break log_marginal_likelihood = lml self.f_cached = f if return_temporaries: return f, lml, (pi, W_sr, L, b, a) else: return f, lml, i
def __call__(self, box_outputs, class_outputs, anchor_boxes, image_shape): # Collects outputs from all levels into a list. boxes = [] scores = [] for i in range(self._min_level, self._max_level + 1): box_outputs_i_shape = tf.shape(box_outputs[i]) batch_size = box_outputs_i_shape[0] num_anchors_per_locations = box_outputs_i_shape[-1] // 4 num_classes = tf.shape( class_outputs[i])[-1] // num_anchors_per_locations # Applies score transformation and remove the implicit background class. scores_i = tf.sigmoid( tf.reshape(class_outputs[i], [batch_size, -1, num_classes])) scores_i = tf.slice(scores_i, [0, 0, 1], [-1, -1, -1]) # Box decoding. # The anchor boxes are shared for all data in a batch. # One stage detector only supports class agnostic box regression. anchor_boxes_i = tf.reshape(anchor_boxes[i], [batch_size, -1, 4]) box_outputs_i = tf.reshape(box_outputs[i], [batch_size, -1, 4]) boxes_i = box_utils.decode_boxes(box_outputs_i, anchor_boxes_i) # Box clipping. boxes_i = box_utils.clip_boxes(boxes_i, image_shape) boxes.append(boxes_i) scores.append(scores_i) boxes = tf.concat(boxes, axis=1) scores = tf.concat(scores, axis=1) nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections = ( self._generate_detections(tf.expand_dims(boxes, axis=2), scores)) # Adds 1 to offset the background class which has index 0. nmsed_classes += 1 return nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections