def __init__(self, idx, mean, var, **kwargs): """ :param mean: Tensor of shape [W] containing the mean at each parameter vertex :param var: Tensor of shape [W] containing the variance at each parameter vertex """ Posterior.__init__(self, idx, **kwargs) self.nvertices = tf.shape(mean)[0] self.name = kwargs.get("name", "NormPost") mean, var = self._get_mean_var(mean, var, kwargs.get("init", None)) mean = tf.cast(mean, tf.float32) var = tf.cast(var, tf.float32) mean = self.log_tf(tf.where(tf.is_finite(mean), mean, tf.zeros_like(mean))) var = tf.where(tf.is_nan(var), tf.ones_like(var), var) self.mean_variable = self.log_tf(tf.Variable(mean, validate_shape=False, name="%s_mean" % self.name)) self.log_var = self.log_tf(tf.Variable(tf.log(var), validate_shape=False, name="%s_log_var" % self.name)) self.var_variable = self.log_tf(tf.exp(self.log_var, name="%s_var" % self.name)) if kwargs.get("suppress_nan", True): #self.mean = tf.where(tf.is_nan(self.mean_variable), tf.ones_like(self.mean_variable), self.mean_variable) #self.var = tf.where(tf.is_nan(self.var_variable), tf.ones_like(self.var_variable), self.var_variable) self.mean = tf.where(tf.is_nan(self.mean_variable), mean, self.mean_variable) self.var = tf.where(tf.is_nan(self.var_variable), var, self.var_variable) else: self.mean = self.mean_variable self.var = self.var_variable self.std = self.log_tf(tf.sqrt(self.var, name="%s_std" % self.name))
def __init__(self, idx, mean, var, **kwargs): """ :param mean: Tensor of shape [W] containing the mean at each parameter vertex :param var: Tensor of shape [W] containing the variance at each parameter vertex """ Posterior.__init__(self, idx, **kwargs) self.nvertices = tf.shape(mean)[0] self.name = kwargs.get("name", "GlobalPost") mean, var = self._get_mean_var(mean, var, kwargs.get("init", None)) # Take the mean of the mean and variance across vertices as the initial value # in case there is a vertexwise initialization function initial_mean_global = tf.reshape(tf.reduce_mean(mean), [1]) initial_var_global = tf.reshape(tf.reduce_mean(var), [1]) self.mean_variable = tf.Variable(initial_mean_global, dtype=tf.float32, validate_shape=False, name="%s_mean" % self.name) self.log_var = tf.Variable(tf.log(tf.cast(initial_var_global, dtype=tf.float32)), validate_shape=False, name="%s_log_var" % self.name) self.var_variable = self.log_tf(tf.exp(self.log_var, name="%s_var" % self.name)) if kwargs.get("suppress_nan", True): self.mean_global = tf.where(tf.is_nan(self.mean_variable), initial_mean_global, self.mean_variable) self.var_global = tf.where(tf.is_nan(self.var_variable), initial_var_global, self.var_variable) else: self.mean_global = self.mean_variable self.var_global = self.var_variable self.mean = self.log_tf(tf.tile(self.mean_global, [self.nvertices]), name="%s_meang" % self.name) self.var = tf.tile(self.var_global, [self.nvertices]) self.std = self.log_tf(tf.sqrt(self.var, name="%s_std" % self.name))
def replace_nan_groundtruth_label_scores_with_ones(label_scores): """Replaces nan label scores with 1.0. Args: label_scores: a tensor containing object annoation label scores. Returns: a tensor where NaN label scores have been replaced by ones. """ return tf.where( tf.is_nan(label_scores), tf.ones(tf.shape(label_scores)), label_scores)
def _clamp_and_filter_result(pixel_x, pixel_y, z): """Clamps and masks out out-of-bounds pixel coordinates. Args: pixel_x: a tf.Tensor containing x pixel coordinates in an image. pixel_y: a tf.Tensor containing y pixel coordinates in an image. z: a tf.Tensor containing the depth ar each (pixel_y, pixel_x) All shapes are [B, H, W]. Returns: pixel_x, pixel_y, mask, where pixel_x and pixel_y are the original ones, except: - Values that fall out of the image bounds, which are [0, W-1) in x and [0, H-1) in y, are clamped to the bounds - NaN values in pixel_x, pixel_y are replaced by zeros mask is False at allpoints where: - Clamping in pixel_x or pixel_y was performed - NaNs were replaced by zeros - z is non-positive, and True everywhere else, that is, where pixel_x, pixel_y are finite and fall within the frame. """ with tf.name_scope("Clamp", values=[pixel_x, pixel_y, z]): _, height, width = tf.unstack(tf.shape(pixel_x)) def _tensor(x): return tf.to_float(tf.convert_to_tensor(x)) x_not_underflow = pixel_x >= 0.0 y_not_underflow = pixel_y >= 0.0 x_not_overflow = pixel_x < _tensor(width - 1) y_not_overflow = pixel_y < _tensor(height - 1) z_positive = z > 0.0 x_not_nan = tf.math.logical_not(tf.is_nan(pixel_x)) y_not_nan = tf.math.logical_not(tf.is_nan(pixel_y)) not_nan = tf.logical_and(x_not_nan, y_not_nan) not_nan_mask = tf.to_float(not_nan) pixel_x *= not_nan_mask pixel_y *= not_nan_mask pixel_x = tf.clip_by_value(pixel_x, 0.0, _tensor(width - 1)) pixel_y = tf.clip_by_value(pixel_y, 0.0, _tensor(height - 1)) mask_stack = tf.stack( [ x_not_underflow, y_not_underflow, x_not_overflow, y_not_overflow, z_positive, not_nan, ], axis=0, ) mask = tf.reduce_all(mask_stack, axis=0) return pixel_x, pixel_y, mask
def create_topk_unique(inputs, k): height = inputs.shape[0] width = inputs.shape[1] neg_inf_r0 = tf.constant(-np.inf, dtype=tf.float32) ones = tf.ones([height, width], dtype=tf.float32) neg_inf_r2 = ones * neg_inf_r0 inputs = tf.where(tf.is_nan(inputs), neg_inf_r2, inputs) tmp = inputs topk_r2 = tf.zeros([height, k], dtype=tf.float32) for i in range(k): kth_order_statistic = tf.reduce_max(tmp, axis=1, keepdims=True) k_mask = tf.tile(tf.expand_dims(tf.equal(tf.range(k), tf.fill([k], i)), 0), [height, 1]) topk_r2 = tf.where(k_mask, tf.tile(kth_order_statistic, [1, k]), topk_r2) ge_r2 = tf.greater_equal(inputs, tf.tile(kth_order_statistic, [1, width])) tmp = tf.where(ge_r2, neg_inf_r2, inputs) log2_ceiling = int(math.ceil(math.log(float(int(width)), 2))) next_power_of_two = 1 << log2_ceiling count_mask = next_power_of_two - 1 mask_r0 = tf.constant(count_mask) mask_r2 = tf.fill([height, k], mask_r0) topk_r2_s32 = tf.bitcast(topk_r2, tf.int32) topk_indices_r2 = tf.bitwise.bitwise_and(topk_r2_s32, mask_r2) return topk_r2, topk_indices_r2
def _select_columns(self, mode, features): input_mask = features["input_mask"] column_ids = features["column_ids"] with tf.variable_scope("bert"): with tf.variable_scope("embeddings", reuse=tf.compat.v1.AUTO_REUSE): input_embeddings, _ = modeling.embedding_lookup( input_ids=features["input_ids"], vocab_size=self._vocab_size, embedding_size=self._hidden_size, initializer_range=self._initializer_range, word_embedding_name="word_embeddings") if self._use_positional_embeddings: token_type_ids = [] token_type_features = [ "segment_ids", "column_ids", "row_ids", "prev_label_ids", "column_ranks", "inv_column_ranks", "numeric_relations" ] for key in token_type_features: if self._disabled_features is not None and key in self._disabled_features: token_type_ids.append(tf.zeros_like(features[key])) else: token_type_ids.append(features[key]) input_embeddings = modeling.embedding_postprocessor( input_tensor=input_embeddings, use_token_type=True, token_type_ids=token_type_ids, token_type_vocab_size=self._type_vocab_size, token_type_embedding_name="token_type_embeddings", use_position_embeddings=self._use_position_embeddings, position_embedding_name="position_embeddings", initializer_range=self._initializer_range, max_position_embeddings=self._max_position_embeddings, extra_embeddings=None, dropout_prob=0.0) # Indexes all the zero values from the input_mask by (max_num_columns+1) # The index 0 is for the question and from 1 to max_num_columns included # is for the columns. masked_col_ids = column_ids * input_mask + (1 - input_mask) * ( self._max_num_columns + 1) col_index = segmented_tensor.IndexMap( indices=masked_col_ids, num_segments=self._max_num_columns + 2, batch_dims=1) average_embeddings, _ = segmented_tensor.reduce_mean( input_embeddings, col_index) # Removes the last index as it contains the avg of non selected values average_embeddings = average_embeddings[:, :-1] normalize_average_embeddings = tf.math.l2_normalize( average_embeddings, axis=2) questions_embeddings = normalize_average_embeddings[:, :1] columns_embeddings = normalize_average_embeddings[:, 1:] multiply = columns_embeddings * questions_embeddings multiply = tf.where(tf.is_nan(multiply), tf.zeros_like(multiply), multiply) column_scores = tf.math.reduce_sum(multiply, axis=-1, name="column_scores") return column_scores
def gradient_summaries(gvs, suppress_inf_and_nans=False): """Creates summaries for norm, mean and var of gradients.""" gs = [gv[0] for gv in gvs] grad_global_norm = tf.global_norm(gs, 'gradient_global_norm') if suppress_inf_and_nans: is_nan_or_inf = tf.logical_or(tf.is_nan(grad_global_norm), tf.is_inf(grad_global_norm)) grad_global_norm = tf.where(is_nan_or_inf, tf.zeros_like(grad_global_norm) - 1., grad_global_norm) grad_abs_max, grad_abs_mean, grad_mean, grad_var = [0.] * 4 n_grads = 1e-8 for g, _ in gvs: if isinstance(g, tf.IndexedSlices): g = g.values if g is not None: current_n_grads = np.prod(g.shape.as_list()) abs_g = abs(g) mean, var = tf.nn.moments(g, list(range(len(g.shape)))) grad_abs_max = tf.maximum(grad_abs_max, tf.reduce_max(abs_g)) grad_abs_mean += tf.reduce_sum(abs_g) grad_mean += mean * current_n_grads grad_var += var n_grads += current_n_grads tf.summary.scalar('grad/abs_max', grad_abs_max) tf.summary.scalar('grad/abs_mean', grad_abs_mean / n_grads) tf.summary.scalar('grad/mean', grad_mean / n_grads) tf.summary.scalar('grad/var', grad_var / n_grads) return dict(grad_global_norm=grad_global_norm)
def filter_groundtruth_with_nan_box_coordinates(tensor_dict): """Filters out groundtruth with no bounding boxes. Args: tensor_dict: a dictionary of following groundtruth tensors - fields.InputDataFields.groundtruth_boxes fields.InputDataFields.groundtruth_classes fields.InputDataFields.groundtruth_confidences fields.InputDataFields.groundtruth_keypoints fields.InputDataFields.groundtruth_instance_masks fields.InputDataFields.groundtruth_is_crowd fields.InputDataFields.groundtruth_area fields.InputDataFields.groundtruth_label_types Returns: a dictionary of tensors containing only the groundtruth that have bounding boxes. """ groundtruth_boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes] nan_indicator_vector = tf.greater(tf.reduce_sum(tf.cast( tf.is_nan(groundtruth_boxes), dtype=tf.int32), reduction_indices=[1]), 0) valid_indicator_vector = tf.logical_not(nan_indicator_vector) valid_indices = tf.where(valid_indicator_vector) return retain_groundtruth(tensor_dict, valid_indices)
def _calculate_regression_loss(answer, aggregate_mask, dist_per_cell, numeric_values, numeric_values_scale, input_mask_float, logits_aggregation, config): """Calculates the regression loss per example. Args: answer: <float32>[batch_size] aggregate_mask: <float32>[batch_size] dist_per_cell: Cell selection distribution for each cell. numeric_values: <float32>[batch_size, seq_length] numeric_values_scale: <float32>[batch_size, seq_length] input_mask_float: <float32>[batch_size, seq_length] logits_aggregation: <float32>[batch_size, num_aggregation_labels] probabilities. config: Configuration for Tapas model. Returns: per_example_answer_loss_scaled: <float32>[batch_size]. Scales answer loss for each example in the batch. large_answer_loss_mask: <float32>[batch_size]. A mask which is 1 for examples for which their answer loss is larger than the answer_loss_cutoff. """ # <float32>[batch_size] expected_result = _calculate_expected_result(dist_per_cell, numeric_values, numeric_values_scale, input_mask_float, logits_aggregation, config) # <float32>[batch_size] answer_masked = tf.where(tf.is_nan(answer), tf.zeros_like(answer), answer) if config.use_normalized_answer_loss: normalizer = tf.stop_gradient( tf.math.maximum(tf.math.abs(expected_result), tf.math.abs(answer_masked)) + _EPSILON_ZERO_DIVISION) normalized_answer_masked = answer_masked / normalizer normalized_expected_result = expected_result / normalizer per_example_answer_loss = tf.losses.huber_loss( normalized_answer_masked * aggregate_mask, normalized_expected_result * aggregate_mask, delta=tf.cast(config.huber_loss_delta, tf.float32), reduction=tf.losses.Reduction.NONE) else: per_example_answer_loss = tf.losses.huber_loss( answer_masked * aggregate_mask, expected_result * aggregate_mask, delta=tf.cast(config.huber_loss_delta, tf.float32), reduction=tf.losses.Reduction.NONE) if config.answer_loss_cutoff is None: large_answer_loss_mask = tf.ones_like(per_example_answer_loss, dtype=tf.float32) else: large_answer_loss_mask = tf.where( per_example_answer_loss > config.answer_loss_cutoff, tf.zeros_like(per_example_answer_loss, dtype=tf.float32), tf.ones_like(per_example_answer_loss, dtype=tf.float32)) per_example_answer_loss_scaled = config.answer_loss_importance * ( per_example_answer_loss * aggregate_mask) return per_example_answer_loss_scaled, large_answer_loss_mask
def _compile_POPLINP_cost(self, weight_input, cem_type, tf_data_dict): """ @brief: The input is the noise of the weight space @weight_input: size [pop_size, plan_hor, weight_size] """ policy_network = tf_data_dict['policy_network'] # nopt is different solutions t, nopt = tf.constant(0), tf.shape(weight_input)[0] init_costs = tf.zeros([nopt, self.npart]) init_obs = tf.tile(self.sy_cur_obs[None], [nopt * self.npart, 1]) weight_input = tf.reshape( tf.tile( tf.transpose(weight_input, [1, 0, 2])[:, :, None], [1, 1, self.npart, 1] # hor, popsize, npart, dU ), [self.plan_hor, -1, tf_data_dict['weight_size']]) def limit_action(action): return tf.minimum(tf.maximum(action, self.ac_lb[0]), self.ac_ub[0]) if cem_type in ['POPLINP-SEP', 'POPLINP-UNI']: # step 2: cem on top of the @proposed_act_seqs def iteration(t, total_cost, cur_obs): cur_acs = \ policy_network.forward_network(cur_obs, weight_input[t]) cur_acs = limit_action(cur_acs) next_obs = self._predict_next_obs(cur_obs, cur_acs) if self.obs_ac_cost_fn is not None: delta_cost = tf.reshape( self.obs_ac_cost_fn(next_obs, cur_acs), [-1, self.npart]) else: delta_cost = tf.reshape( self.obs_cost_fn(next_obs) + self.ac_cost_fn(cur_acs), [-1, self.npart]) return t + 1, total_cost + delta_cost, \ self.obs_postproc2(next_obs), cur_acs pass else: raise NotImplementedError total_cost, cur_obs = init_costs, init_obs for t in range(self.plan_hor): _, total_cost, cur_obs, cur_acs = iteration(t, total_cost, cur_obs) costs = total_cost # replace nan costs with very high cost return tf.reduce_mean(tf.where(tf.is_nan(costs), 1e6 * tf.ones_like(costs), costs), axis=1)
def f1_loss(y_true, y_pred): tp = K.sum(K.cast(y_true * y_pred, 'float'), axis = 0) fp = K.sum(K.cast((1 - y_true) * y_pred, 'float'), axis = 0) fn = K.sum(K.cast(y_true * (1 - y_pred), 'float'), axis = 0) p = tp / (tp + fp + K.epsilon()) r = tp / (tp + fn + K.epsilon()) f1 = 2*p*r / (p + r + K.epsilon()) f1 = tf.where(tf.is_nan(f1), tf.zeros_like(f1), f1) return 1 - K.mean(f1)
def get_gen_loss(args, xfake, ll_fake, score_func, z_outer): opt_gen = tf.train.AdamOptimizer( learning_rate=args.learning_rate, beta1=args.beta1, beta2=args.beta2) f_sampled_x = score_func(xfake, z_outer) loss = -tf.reduce_mean(f_sampled_x) + args.ent_lam * tf.reduce_mean(ll_fake) gvs = opt_gen.compute_gradients( loss, var_list=tf.trainable_variables(scope='generator')) gvs = [(tf.where(tf.is_nan(grad), tf.zeros_like(grad), grad), val) for grad, val in gvs if grad is not None] train_gen = opt_gen.apply_gradients(gvs) return loss, train_gen
def _get_cubic_root(self): """Get the cubic root.""" # We have the equation x^2 D^2 + (1-x)^4 * C / h_min^2 # where x = sqrt(mu). # We substitute x, which is sqrt(mu), with x = y + 1. # It gives y^3 + py = q # where p = (D^2 h_min^2)/(2*C) and q = -p. # We use the Vieta's substitution to compute the root. # There is only one real solution y (which is in [0, 1] ). # http://mathworld.wolfram.com/VietasSubstitution.html assert_array = [ tf.Assert(tf.logical_not(tf.is_nan(self._dist_to_opt_avg)), [ self._dist_to_opt_avg, ]), tf.Assert(tf.logical_not(tf.is_nan(self._h_min)), [ self._h_min, ]), tf.Assert(tf.logical_not(tf.is_nan(self._grad_var)), [ self._grad_var, ]), tf.Assert(tf.logical_not(tf.is_inf(self._dist_to_opt_avg)), [ self._dist_to_opt_avg, ]), tf.Assert(tf.logical_not(tf.is_inf(self._h_min)), [ self._h_min, ]), tf.Assert(tf.logical_not(tf.is_inf(self._grad_var)), [ self._grad_var, ]) ] with tf.control_dependencies(assert_array): p = self._dist_to_opt_avg**2 * self._h_min**2 / 2 / self._grad_var w3 = (-tf.sqrt(p**2 + 4.0 / 27.0 * p**3) - p) / 2.0 w = tf.sign(w3) * tf.pow(tf.abs(w3), 1.0 / 3.0) y = w - p / 3.0 / w x = y + 1 return x
def __call__(self, prediction_tensor, target_tensor, ignore_nan_targets=False, losses_mask=None, scope=None, **params): """Call the loss function. Args: prediction_tensor: an N-d tensor of shape [batch, anchors, ...] representing predicted quantities. target_tensor: an N-d tensor of shape [batch, anchors, ...] representing regression or classification targets. ignore_nan_targets: whether to ignore nan targets in the loss computation. E.g. can be used if the target tensor is missing groundtruth data that shouldn't be factored into the loss. losses_mask: A [batch] boolean tensor that indicates whether losses should be applied to individual images in the batch. For elements that are False, corresponding prediction, target, and weight tensors will not contribute to loss computation. If None, no filtering will take place prior to loss computation. scope: Op scope name. Defaults to 'Loss' if None. **params: Additional keyword arguments for specific implementations of the Loss. Returns: loss: a tensor representing the value of the loss function. """ with tf.name_scope(scope, 'Loss', [prediction_tensor, target_tensor, params]) as scope: if ignore_nan_targets: target_tensor = tf.where(tf.is_nan(target_tensor), prediction_tensor, target_tensor) if losses_mask is not None: tensor_multiplier = self._get_loss_multiplier_for_tensor( prediction_tensor, losses_mask) prediction_tensor *= tensor_multiplier target_tensor *= tensor_multiplier if 'weights' in params: params['weights'] = tf.convert_to_tensor(params['weights']) weights_multiplier = self._get_loss_multiplier_for_tensor( params['weights'], losses_mask) params['weights'] *= weights_multiplier return self._compute_loss(prediction_tensor, target_tensor, **params)
def _calculate_aggregate_mask(answer, output_layer_aggregation, output_bias_agg, output_weights_agg, cell_select_pref, label_ids): """Finds examples where the model should select cells with no aggregation. Returns a mask that determines for which examples should the model select answers directly from the table, without any aggregation function. If the answer is a piece of text the case is unambiguous as aggregation functions only apply to numbers. If the answer is a number but does not appear in the table then we must use some aggregation case. The ambiguous case is when the answer is a number that also appears in the table. In this case we use the aggregation function probabilities predicted by the model to decide whether to select or aggregate. The threshold for this is a hyperparameter `cell_select_pref`. Args: answer: <float32>[batch_size] output_layer_aggregation: <float32>[batch_size, hidden_size] output_bias_agg: <float32>[num_aggregation_labels] output_weights_agg: <float32>[num_aggregation_labels, hidden_size_agg] cell_select_pref: Preference for cell selection in ambiguous cases. label_ids: <int32>[batch_size, seq_length] Returns: aggregate_mask: <float32>[batch_size] A mask set to 1 for examples that should use aggregation functions. """ # <float32>[batch_size] aggregate_mask_init = tf.cast(tf.logical_not(tf.is_nan(answer)), tf.float32) logits_aggregation = _calculate_aggregation_logits( output_layer_aggregation, output_weights_agg, output_bias_agg) dist_aggregation = tfp.distributions.Categorical(logits=logits_aggregation) aggregation_ops_total_mass = tf.reduce_sum( _get_probs(dist_aggregation)[:, 1:], axis=1) # Cell selection examples according to current model. is_pred_cell_selection = aggregation_ops_total_mass <= cell_select_pref # Examples with non-empty cell selection supervision. is_cell_supervision_available = tf.reduce_sum(label_ids, axis=1) > 0 aggregate_mask = tf.where( tf.logical_and(is_pred_cell_selection, is_cell_supervision_available), tf.zeros_like(aggregate_mask_init, dtype=tf.float32), aggregate_mask_init) aggregate_mask = tf.stop_gradient(aggregate_mask) return aggregate_mask
def reduce_nanmean(tensor, axes=None, keepdims=False, name=None): """Take the mean of a tensor, skipping NaNs. Args: tensor: tensor to reduce. axes: optional list of axes to reduce. keepdims: optional boolean indicating whether to keep dimensions or not. name: optional op name. Returns: tf.Tensor with reduce values. """ masked = tf.is_nan(tensor) valid_tensor = tf.where(masked, tf.zeros_like(tensor), tensor) total = tf.reduce_sum(valid_tensor, axes, keepdims=keepdims) counts = tf.reduce_sum(tf.cast(tf.logical_not(masked), tensor.dtype), axes, keepdims=keepdims) return tf.div(total, counts, name=name)
def _create_topk_unique(inputs, k): """Creates the top k values in sorted order with indices. Args: inputs: A tensor with rank of 2. [batch_size, original_size]. k: An integer, number of top elements to select. Returns: topk_r2: A tensor, the k largest elements. [batch_size, k]. topk_indices_r2: A tensor, indices of the top k values. [batch_size, k]. """ height = inputs.shape[0] width = inputs.shape[1] neg_inf_r0 = tf.constant(-np.inf, dtype=tf.float32) ones = tf.ones([height, width], dtype=tf.float32) neg_inf_r2 = ones * neg_inf_r0 inputs = tf.where(tf.is_nan(inputs), neg_inf_r2, inputs) # Select the current largest value k times and keep them in topk_r2. The # selected largest values are marked as the smallest value to avoid being # selected again. tmp = inputs topk_r2 = tf.zeros([height, k], dtype=tf.float32) for i in range(k): kth_order_statistic = tf.reduce_max(tmp, axis=1, keepdims=True) k_mask = tf.tile( tf.expand_dims(tf.equal(tf.range(k), tf.fill([k], i)), 0), [height, 1]) topk_r2 = tf.where(k_mask, tf.tile(kth_order_statistic, [1, k]), topk_r2) ge_r2 = tf.greater_equal(inputs, tf.tile(kth_order_statistic, [1, width])) tmp = tf.where(ge_r2, neg_inf_r2, inputs) log2_ceiling = int(math.ceil(math.log(float(int(width)), 2))) next_power_of_two = 1 << log2_ceiling count_mask = next_power_of_two - 1 mask_r0 = tf.constant(count_mask) mask_r2 = tf.fill([height, k], mask_r0) topk_r2_s32 = tf.bitcast(topk_r2, tf.int32) topk_indices_r2 = tf.bitwise.bitwise_and(topk_r2_s32, mask_r2) return topk_r2, topk_indices_r2
def __init__(self, posts, **kwargs): FactorisedPosterior.__init__(self, posts, **kwargs) # The full covariance matrix is formed from the Cholesky decomposition # to ensure that it remains positive definite. # # To achieve this, we have to create PxP tensor variables for # each parameter vertex, but we then extract only the lower triangular # elements and train only on these. The diagonal elements # are constructed by the FactorisedPosterior if kwargs.get("init", None): # We are initializing from an existing posterior. # The FactorizedPosterior will already have extracted the mean and # diagonal of the covariance matrix - we need the Cholesky decomposition # of the covariance to initialize the off-diagonal terms self.log.info(" - Initializing posterior covariance from input posterior") _mean, cov = kwargs["init"] covar_init = tf.cholesky(cov) else: covar_init = tf.zeros([self.nvertices, self.nparams, self.nparams], dtype=tf.float32) self.off_diag_vars_base = self.log_tf(tf.Variable(covar_init, validate_shape=False, name='%s_off_diag_vars' % self.name)) if kwargs.get("suppress_nan", True): self.off_diag_vars = tf.where(tf.is_nan(self.off_diag_vars_base), tf.zeros_like(self.off_diag_vars_base), self.off_diag_vars_base) else: self.off_diag_vars = self.off_diag_vars_base self.off_diag_cov_chol = tf.matrix_set_diag(tf.matrix_band_part(self.off_diag_vars, -1, 0), tf.zeros([self.nvertices, self.nparams]), name='%s_off_diag_cov_chol' % self.name) # Combine diagonal and off-diagonal elements into full matrix self.cov_chol = tf.add(tf.matrix_diag(self.std), self.off_diag_cov_chol, name='%s_cov_chol' % self.name) # Form the covariance matrix from the chol decomposition self.cov = tf.matmul(tf.transpose(self.cov_chol, perm=(0, 2, 1)), self.cov_chol, name='%s_cov' % self.name) self.cov_chol = self.log_tf(self.cov_chol) self.cov = self.log_tf(self.cov)
def fun_w(self, x, low, up): I1 = 0.110987 x_list = tf.split(x, self.dim, 1) #************************************************** x_scale_list = [] h_len = (up - low) / 2.0 for i in range(self.dim): x_scale = (x_list[i] - low - h_len) / h_len x_scale_list.append(x_scale) #************************************************ z_x_list = [] for i in range(self.dim): supp_x = tf.greater(1 - tf.abs(x_scale_list[i]), 0) z_x = tf.where(supp_x, tf.exp(1 / (tf.pow(x_scale_list[i], 2) - 1)) / I1, tf.zeros_like(x_scale_list[i])) z_x_list.append(z_x) #*************************************************** w_val = tf.constant(1.0) for i in range(self.dim): w_val = tf.multiply(w_val, z_x_list[i]) dw = tf.gradients(w_val, x, unconnected_gradients='zero')[0] dw = tf.where(tf.is_nan(dw), tf.zeros_like(dw), dw) return (w_val, dw)
def get_disc_loss(args, x, x_fake, score_func, z_outer, neg_kl_outer): opt_disc = tf.train.AdamOptimizer(learning_rate=args.learning_rate, beta1=args.beta1, beta2=args.beta2) fx = score_func(x, z_outer) f_fake_x = score_func(x_fake, z_outer) f_loss = tf.reduce_mean(-fx) + tf.reduce_mean(f_fake_x) loss = f_loss + tf.reduce_mean(-neg_kl_outer) if args.gp_lambda > 0: # add gradient penalty alpha = tf.random.uniform(shape=(tf.shape(x)[0], 1, 1)) x_hat = alpha * x + (1 - alpha) * x_fake d_hat = score_func(x_hat, tf.stop_gradient(z_outer)) ddx = tf.gradients(d_hat, x_hat)[0] ddx = tf.sqrt(tf.reduce_sum(tf.square(ddx), axis=[1, 2])) ddx = tf.reduce_mean(tf.square(ddx - 1.0)) * args.gp_lambda loss = loss + ddx gvs = opt_disc.compute_gradients( loss, var_list=tf.trainable_variables(scope='score_func')) gvs = [(tf.where(tf.is_nan(grad), tf.zeros_like(grad), grad), val) for grad, val in gvs if grad is not None] train_disc = opt_disc.apply_gradients(gvs) return f_loss, train_disc
def replace_nan_with_value(tensor, value): return tf.where(tf.is_nan(tensor), value * tf.ones_like(tensor), tensor)
def maybe_gen_fake_data_based_on_real_data(image, label, reso, min_fake_lesion_ratio, gen_fake_probability): """Remove real lesion and synthesize lesion.""" # TODO(lehou): Replace magic numbers with flag variables. gen_prob_indicator = tf.random_uniform(shape=[], minval=0.0, maxval=1.0, dtype=tf.float32) background_mask = tf.less(label, 0.5) lesion_mask = tf.greater(label, 1.5) liver_mask = tf.logical_not(tf.logical_or(background_mask, lesion_mask)) liver_intensity = tf.boolean_mask(image, liver_mask) lesion_intensity = tf.boolean_mask(image, lesion_mask) intensity_diff = tf.reduce_mean(liver_intensity) - ( tf.reduce_mean(lesion_intensity)) intensity_diff *= 1.15 intensity_diff = tf.cond(tf.is_nan(intensity_diff), lambda: 0.0, lambda: intensity_diff) lesion_liver_ratio = 0.0 lesion_liver_ratio += tf.random.normal(shape=[], mean=0.01, stddev=0.01) lesion_liver_ratio += tf.random.normal(shape=[], mean=0.0, stddev=0.05) lesion_liver_ratio = tf.clip_by_value(lesion_liver_ratio, min_fake_lesion_ratio, min_fake_lesion_ratio + 0.20) fake_lesion_mask = tf.logical_and( _gen_rand_mask(ratio_mean=lesion_liver_ratio, ratio_stddev=0.0, scale=reso // 32, shape=label.shape, smoothness=reso // 32), tf.logical_not(background_mask)) liver_mask = tf.logical_not( tf.logical_or(background_mask, fake_lesion_mask)) # Blur the masks lesion_mask_blur = tf.squeeze( tf.nn.conv3d(tf.expand_dims( tf.expand_dims(tf.cast(lesion_mask, tf.float32), -1), 0), filter=tf.ones([reso // 32] * 3 + [1, 1], tf.float32) / (reso // 32)**3, strides=[1, 1, 1, 1, 1], padding='SAME')) fake_lesion_mask_blur = tf.squeeze( tf.nn.conv3d(tf.expand_dims( tf.expand_dims(tf.cast(fake_lesion_mask, tf.float32), -1), 0), filter=tf.ones([reso // 32] * 3 + [1, 1], tf.float32) / (reso // 32)**3, strides=[1, 1, 1, 1, 1], padding='SAME')) # Remove real lesion and add fake lesion. # If the intensitify is too small (maybe no liver or lesion region labeled), # do not generate fake data. gen_prob_indicator = tf.cond(tf.greater(intensity_diff, 0.0001), lambda: gen_prob_indicator, lambda: 0.0) # pylint: disable=g-long-lambda image = tf.cond( tf.greater(gen_prob_indicator, 1 - gen_fake_probability), lambda: image + intensity_diff * lesion_mask_blur \ - intensity_diff * fake_lesion_mask_blur, lambda: image) label = tf.cond( tf.greater(gen_prob_indicator, 1 - gen_fake_probability), lambda: tf.cast(background_mask, tf.float32) * 0 + \ tf.cast(liver_mask, tf.float32) * 1 + \ tf.cast(fake_lesion_mask, tf.float32) * 2, lambda: label) # pylint: enable=g-long-lambda return image, label
def _deal_with_nan(grad): # Reference: https://stackoverflow.com/questions/33712178/tensorflow-nan-bug assert isinstance(grad, tf.Tensor) return tf.where(tf.is_nan(grad), tf.zeros(grad.shape), grad)
def normalize_each_feature(observation_values, obs_code, vocab_size, mode, momentum): """Combines SparseTensors of observation codes and values into a Tensor. Args: observation_values: A SparseTensor of type float with the observation values of dense shape [batch_size, max_sequence_length, 1]. There may be no time gaps in between codes. obs_code: A Tensor of shape [?, 3] of type int32 with the ids that go along with the observation_values. We will do the normalization separately for each lab test. vocab_size: The range of the values in obs_code is from 0 to vocab_size. mode: The execution mode, as defined in tf.estimator.ModeKeys. momentum: Mean and variance will be updated as momentum*old_value + (1-momentum) * new_value. Returns: observation_values as in the input only with normalized values. """ with tf.variable_scope('batch_normalization'): new_indices = [] new_values = [] for i in range(vocab_size): with tf.variable_scope('bn' + str(i)): positions_of_feature_i = tf.where(tf.equal(obs_code, i)) values_of_feature_i = tf.gather_nd(observation_values.values, positions_of_feature_i) if mode == tf.estimator.ModeKeys.TRAIN: tf.summary.scalar('avg_observation_values/' + str(i), tf.reduce_mean(values_of_feature_i)) tf.summary.histogram('observation_values/' + str(i), values_of_feature_i) batchnorm_layer = tf.layers.BatchNormalization( axis=1, momentum=momentum, epsilon=0.01, trainable=True) normalized_values = tf.squeeze( batchnorm_layer.apply( tf.expand_dims(values_of_feature_i, axis=1), training=(mode == tf.estimator.ModeKeys.TRAIN) ), axis=1, name='squeeze_normalized_values') if mode == tf.estimator.ModeKeys.TRAIN: tf.summary.scalar('batchnorm_layer/moving_mean/' + str(i), tf.squeeze(batchnorm_layer.moving_mean)) tf.summary.scalar('batchnorm_layer/moving_variance/' + str(i), tf.squeeze(batchnorm_layer.moving_variance)) tf.summary.scalar('avg_normalized_values/' + str(i), tf.reduce_mean(normalized_values)) tf.summary.histogram('normalized_observation_values/' + str(i), normalized_values) indices_i = tf.gather_nd(observation_values.indices, positions_of_feature_i) new_indices += [indices_i] normalized_values = tf.where(tf.is_nan(normalized_values), tf.zeros_like(normalized_values), normalized_values) new_values += [normalized_values] normalized_sp_tensor = tf.SparseTensor( indices=tf.concat(new_indices, axis=0), values=tf.concat(new_values, axis=0), dense_shape=observation_values.dense_shape) normalized_sp_tensor = tf.sparse_reorder(normalized_sp_tensor) return normalized_sp_tensor
def _calculate_expected_result(dist_per_cell, numeric_values, numeric_values_scale, input_mask_float, logits_aggregation, config): """Calculate the expected result given cell and aggregation probabilities.""" if config.use_gumbel_for_cells: gumbel_dist = tfp.distributions.RelaxedBernoulli( # The token logits where already divided by the temperature and used for # computing cell selection errors so we need to multiply it again here config.temperature, logits=dist_per_cell.logits_parameter() * config.temperature) scaled_probability_per_cell = gumbel_dist.sample() else: scaled_probability_per_cell = _get_probs(dist_per_cell) # <float32>[batch_size, seq_length] scaled_probability_per_cell = (scaled_probability_per_cell / numeric_values_scale) * input_mask_float count_result = tf.reduce_sum(scaled_probability_per_cell, axis=1) numeric_values_masked = tf.where( tf.is_nan(numeric_values), tf.zeros_like(numeric_values), numeric_values) # Mask non-numeric table values to zero. sum_result = tf.reduce_sum(scaled_probability_per_cell * numeric_values_masked, axis=1) avg_approximation = config.average_approximation_function if avg_approximation == AverageApproximationFunction.RATIO: average_result = sum_result / (count_result + _EPSILON_ZERO_DIVISION) elif avg_approximation == AverageApproximationFunction.FIRST_ORDER: # The sum of all probabilities exept that correspond to other cells ex = ( tf.reduce_sum(scaled_probability_per_cell, axis=1, keepdims=True) - scaled_probability_per_cell + 1) average_result = tf.reduce_sum(numeric_values_masked * scaled_probability_per_cell / ex, axis=1) elif avg_approximation == AverageApproximationFunction.SECOND_ORDER: # The sum of all probabilities exept that correspond to other cells ex = ( tf.reduce_sum(scaled_probability_per_cell, axis=1, keepdims=True) - scaled_probability_per_cell + 1) pointwise_var = (scaled_probability_per_cell * (1 - scaled_probability_per_cell)) var = tf.reduce_sum(pointwise_var, axis=1, keepdims=True) - pointwise_var multiplier = (var / tf.math.square(ex) + 1) / ex average_result = tf.reduce_sum( numeric_values_masked * scaled_probability_per_cell * multiplier, axis=1) else: tf.logging.error("Invalid average_approximation_function: %s", config.average_approximation_function) if config.use_gumbel_for_agg: gumbel_dist = tfp.distributions.RelaxedOneHotCategorical( config.agg_temperature, logits=logits_aggregation[:, 1:]) # <float32>[batch_size, num_aggregation_labels - 1] aggregation_op_only_probs = gumbel_dist.sample() else: # <float32>[batch_size, num_aggregation_labels - 1] aggregation_op_only_probs = tf.nn.softmax(logits_aggregation[:, 1:] / config.agg_temperature, axis=-1) all_results = tf.concat([ tf.expand_dims(sum_result, axis=1), tf.expand_dims(average_result, axis=1), tf.expand_dims(count_result, axis=1) ], axis=1) expected_result = tf.reduce_sum(all_results * aggregation_op_only_probs, axis=1) return expected_result
def SanitizedAutoCorrelation(x, axis, *args, **kwargs): res = tfp.stats.auto_correlation(x, axis, *args, **kwargs) res = tf.where(tf.is_nan(res), tf.ones_like(res), res) res = tf.where(tf.is_inf(res), tf.ones_like(res), res) return res
def replace_nan(tensor, default): return where(is_nan(tensor), ones_like(tensor) * default, tensor)
def _apply_gradients(self, grads_and_vars, learning_rate): print('_apply_gradients is called!!!') """See base class.""" # Create slot variables var_list = [] for (grad, param) in grads_and_vars: if grad is None or param is None: continue var_list.append(param) with ops.init_scope(): self._create_slots(var_list) # Build training operations assignments = [] check_values = [] for (grad, param) in grads_and_vars: if grad is None or param is None: continue param_name = self._get_variable_name(param.name) #m, v = self.mv_lookup[param_name] m = self.get_slot(param, param_name + "/adam_m") v = self.get_slot(param, param_name + "/adam_v") # Standard Adam update. next_m = ( tf.multiply(self.beta_1, m) + tf.multiply(1.0 - self.beta_1, grad)) next_v = ( tf.multiply(self.beta_2, v) + tf.multiply(1.0 - self.beta_2, tf.square(grad))) update = next_m / (tf.sqrt(next_v) + self.epsilon) check_update_nan = tf.Assert(tf.logical_not(tf.reduce_all(tf.is_nan(update))), [param_name, 'NAN update', update]) check_update_inf = tf.Assert(tf.logical_not(tf.reduce_all(tf.is_inf(update))), [param_name, 'INF update', update]) check_values.append(check_update_nan) check_values.append(check_update_inf) #update = 0 # Just adding the square of the weights to the loss function is *not* # the correct way of using L2 regularization/weight decay with Adam, # since that will interact with the m and v parameters in strange ways. # # Instead we want ot decay the weights in a manner that doesn't interact # with the m/v parameters. This is equivalent to adding the square # of the weights to the loss with plain (non-momentum) SGD. if self.weight_decay_rate > 0: if self._do_use_weight_decay(param_name): update += self.weight_decay_rate * param update_with_lr = learning_rate * update # update_with_lr = tf.Print(update_with_lr, ['\nupdate_with_lr', param_name, tf.shape(update_with_lr), update_with_lr], summarize=32) max_update_with_lr = tf.reduce_max(update_with_lr) min_update_with_lr = tf.reduce_min(update_with_lr) # update_with_lr = tf.Print(update_with_lr, ['\nupdate_with_lr', param_name, tf.shape(update_with_lr), min_update_with_lr, max_update_with_lr], summarize=32) check_update_with_lr_nan = tf.Assert(tf.logical_not(tf.reduce_all(tf.is_nan(update_with_lr))), [param_name, 'NAN update_with_lr', update_with_lr]) check_update_with_lr_inf = tf.Assert(tf.logical_not(tf.reduce_all(tf.is_inf(update_with_lr))), [param_name, 'INF update_with_lr', update_with_lr]) check_values.append(check_update_with_lr_nan) check_values.append(check_update_with_lr_inf) next_param = param - update_with_lr check_next_param_nan = tf.Assert(tf.logical_not(tf.reduce_all(tf.is_nan(next_param))), [param_name, 'NAN next_param', next_param]) check_next_param_inf = tf.Assert(tf.logical_not(tf.reduce_all(tf.is_inf(next_param))), [param_name, 'INF next_param', next_param]) check_values.append(check_next_param_nan) check_values.append(check_next_param_inf) # Ensure that the debug operations are executed. for op in check_values: op.mark_used() ''' assignments.extend( [param.assign(next_param),] ) ''' assignments.extend( [ param.assign(next_param), m.assign(next_m), v.assign(next_v) ] ) assignments.extend(check_values) return assignments
def _build_graph( Npartitions, voc_size, batch_size, gamma_regularizer, reg2, optimizer_param, optimizer_type, init_std_dev=.05, ): graph = tf.Graph() with graph.as_default(): chosen_index_1 = tf.placeholder(dtype=tf.int32, shape=(batch_size)) chosen_index_2 = tf.placeholder(dtype=tf.int32, shape=(batch_size)) is_corrections_pl = tf.placeholder_with_default(tf.ones( batch_size, dtype=tf.float32), shape=(batch_size)) learning_rate_pl = tf.placeholder(dtype=tf.float32) t_weights_free = tf.Variable(tf.truncated_normal( [Npartitions, Npartitions], mean=0., stddev=init_std_dev), dtype=tf.float32) t_weights_free_sym = t_weights_free + tf.transpose(t_weights_free) t_weights = tf.reshape( tf.nn.softmax( tf.reshape(t_weights_free_sym, [Npartitions * Npartitions])), [Npartitions, Npartitions]) t_topics_free = tf.Variable(tf.truncated_normal( [Npartitions, voc_size], mean=0., stddev=init_std_dev), dtype=tf.float32) t_topics = tf.nn.softmax(t_topics_free) #default axis is (-1) t_topics_free_pl = tf.placeholder(tf.float32, shape=[Npartitions, voc_size]) t_weights_free_pl = tf.placeholder( tf.float32, shape=[Npartitions, Npartitions]) t_weights_free_assign_op = tf.assign(t_weights_free, t_weights_free_pl) t_topics_free_assign_op = tf.assign(t_topics_free, t_topics_free_pl) t_gamma = gamma_regularizer t_gamma2 = reg2 pre_target = tf.log((tf.reduce_sum((tf.matmul( tf.expand_dims( tf.transpose(tf.gather(t_topics, chosen_index_1, axis=1)), -1), tf.expand_dims( tf.transpose(tf.gather(t_topics, chosen_index_2, axis=1)), 1)) * t_weights), axis=[1, 2]))) target = tf.reduce_mean(is_corrections_pl * tf.where( tf.is_nan(pre_target), tf.zeros_like(pre_target), pre_target )) + t_gamma * tf.reduce_sum( tf.diag_part(t_weights)) + t_gamma2 * tf.reduce_sum( tf.diag_part(t_weights) / tf.reduce_sum(t_weights, axis=1)) #now optimizer t_loss = -target #t_optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) #t_optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum = .9) #t_optimizer = tf.train.AdagradOptimizer(learning_rate=learning_rate) if optimizer_type == 'adam': t_optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate_pl, **optimizer_param) elif optimizer_type == 'rmsprop': t_optimizer = tf.train.RMSPropOptimizer( learning_rate=learning_rate_pl, **optimizer_param) else: raise ValueError('Unknown optimizer') opt_vars = t_optimizer.variables() opt_vars_pls = [ tf.placeholder(dtype=v.dtype, shape=v.shape) for v in opt_vars ] opt_vars_assigns = [ tf.assign(v, pl) for v, pl in zip(opt_vars, opt_vars_pls) ] t_train_op = t_optimizer.minimize(t_loss) t_tfinit = tf.global_variables_initializer() saver = tf.train.Saver(max_to_keep=2) t_loss_to_display = -(target - (t_gamma * tf.reduce_sum( tf.diag_part(t_weights)) + t_gamma2 * tf.reduce_sum( tf.diag_part(t_weights) / tf.reduce_sum(t_weights, axis=1)) )) return (graph, t_tfinit, t_loss_to_display, t_topics, t_train_op, t_weights, chosen_index_1, chosen_index_2, is_corrections_pl, saver, t_topics_free_pl, t_weights_free_pl, t_weights_free_assign_op, t_topics_free_assign_op, pre_target, learning_rate_pl, t_weights_free, t_topics_free, opt_vars, opt_vars_pls, opt_vars_assigns)
def denan(x): return tf.where(tf.is_nan(x), tf.zeros_like(x), x)