Ejemplo n.º 1
0
    def __init__(self, idx, mean, var, **kwargs):
        """
        :param mean: Tensor of shape [W] containing the mean at each parameter vertex
        :param var: Tensor of shape [W] containing the variance at each parameter vertex
        """
        Posterior.__init__(self, idx, **kwargs)
        self.nvertices = tf.shape(mean)[0]
        self.name = kwargs.get("name", "NormPost")
        
        mean, var = self._get_mean_var(mean, var, kwargs.get("init", None))
        mean = tf.cast(mean, tf.float32)
        var = tf.cast(var, tf.float32)
        mean = self.log_tf(tf.where(tf.is_finite(mean), mean, tf.zeros_like(mean)))
        var = tf.where(tf.is_nan(var), tf.ones_like(var), var)

        self.mean_variable = self.log_tf(tf.Variable(mean, validate_shape=False,
                                                     name="%s_mean" % self.name))
        self.log_var = self.log_tf(tf.Variable(tf.log(var), validate_shape=False,
                                   name="%s_log_var" % self.name))
        self.var_variable = self.log_tf(tf.exp(self.log_var, name="%s_var" % self.name))
        if kwargs.get("suppress_nan", True):
            #self.mean = tf.where(tf.is_nan(self.mean_variable), tf.ones_like(self.mean_variable), self.mean_variable)
            #self.var = tf.where(tf.is_nan(self.var_variable), tf.ones_like(self.var_variable), self.var_variable)
            self.mean = tf.where(tf.is_nan(self.mean_variable), mean, self.mean_variable)
            self.var = tf.where(tf.is_nan(self.var_variable), var, self.var_variable)
        else:
            self.mean = self.mean_variable
            self.var = self.var_variable
        self.std = self.log_tf(tf.sqrt(self.var, name="%s_std" % self.name))
Ejemplo n.º 2
0
    def __init__(self, idx, mean, var, **kwargs):
        """
        :param mean: Tensor of shape [W] containing the mean at each parameter vertex
        :param var: Tensor of shape [W] containing the variance at each parameter vertex
        """
        Posterior.__init__(self, idx, **kwargs)
        self.nvertices = tf.shape(mean)[0]
        self.name = kwargs.get("name", "GlobalPost")

        mean, var = self._get_mean_var(mean, var, kwargs.get("init", None))
        
        # Take the mean of the mean and variance across vertices as the initial value
        # in case there is a vertexwise initialization function
        initial_mean_global = tf.reshape(tf.reduce_mean(mean), [1])
        initial_var_global = tf.reshape(tf.reduce_mean(var), [1])
        self.mean_variable = tf.Variable(initial_mean_global, 
                                         dtype=tf.float32, validate_shape=False,
                                         name="%s_mean" % self.name)
        self.log_var = tf.Variable(tf.log(tf.cast(initial_var_global, dtype=tf.float32)), validate_shape=False,
                                   name="%s_log_var" % self.name)
        self.var_variable = self.log_tf(tf.exp(self.log_var, name="%s_var" % self.name))
        if kwargs.get("suppress_nan", True):
            self.mean_global = tf.where(tf.is_nan(self.mean_variable), initial_mean_global, self.mean_variable)
            self.var_global = tf.where(tf.is_nan(self.var_variable), initial_var_global, self.var_variable)
        else:
            self.mean_global = self.mean_variable
            self.var_global = self.var_variable

        self.mean = self.log_tf(tf.tile(self.mean_global, [self.nvertices]), name="%s_meang" % self.name)
        self.var = tf.tile(self.var_global, [self.nvertices])
        self.std = self.log_tf(tf.sqrt(self.var, name="%s_std" % self.name))
Ejemplo n.º 3
0
def replace_nan_groundtruth_label_scores_with_ones(label_scores):
  """Replaces nan label scores with 1.0.

  Args:
    label_scores: a tensor containing object annoation label scores.

  Returns:
    a tensor where NaN label scores have been replaced by ones.
  """
  return tf.where(
      tf.is_nan(label_scores), tf.ones(tf.shape(label_scores)), label_scores)
Ejemplo n.º 4
0
def _clamp_and_filter_result(pixel_x, pixel_y, z):
    """Clamps and masks out out-of-bounds pixel coordinates.
    Args:
      pixel_x: a tf.Tensor containing x pixel coordinates in an image.
      pixel_y: a tf.Tensor containing y pixel coordinates in an image.
      z: a tf.Tensor containing the depth ar each (pixel_y, pixel_x)  All shapes
        are [B, H, W].
    Returns:
      pixel_x, pixel_y, mask, where pixel_x and pixel_y are the original ones,
      except:
      - Values that fall out of the image bounds, which are [0, W-1) in x and
        [0, H-1) in y, are clamped to the bounds
      - NaN values in pixel_x, pixel_y are replaced by zeros
      mask is False at allpoints where:
      - Clamping in pixel_x or pixel_y was performed
      - NaNs were replaced by zeros
      - z is non-positive,
      and True everywhere else, that is, where pixel_x, pixel_y are finite and
      fall within the frame.
    """
    with tf.name_scope("Clamp", values=[pixel_x, pixel_y, z]):
        _, height, width = tf.unstack(tf.shape(pixel_x))

        def _tensor(x):
            return tf.to_float(tf.convert_to_tensor(x))

        x_not_underflow = pixel_x >= 0.0
        y_not_underflow = pixel_y >= 0.0
        x_not_overflow = pixel_x < _tensor(width - 1)
        y_not_overflow = pixel_y < _tensor(height - 1)
        z_positive = z > 0.0
        x_not_nan = tf.math.logical_not(tf.is_nan(pixel_x))
        y_not_nan = tf.math.logical_not(tf.is_nan(pixel_y))
        not_nan = tf.logical_and(x_not_nan, y_not_nan)
        not_nan_mask = tf.to_float(not_nan)
        pixel_x *= not_nan_mask
        pixel_y *= not_nan_mask
        pixel_x = tf.clip_by_value(pixel_x, 0.0, _tensor(width - 1))
        pixel_y = tf.clip_by_value(pixel_y, 0.0, _tensor(height - 1))
        mask_stack = tf.stack(
            [
                x_not_underflow,
                y_not_underflow,
                x_not_overflow,
                y_not_overflow,
                z_positive,
                not_nan,
            ],
            axis=0,
        )
        mask = tf.reduce_all(mask_stack, axis=0)
        return pixel_x, pixel_y, mask
def create_topk_unique(inputs, k):
  height = inputs.shape[0]
  width = inputs.shape[1]
  neg_inf_r0 = tf.constant(-np.inf, dtype=tf.float32)
  ones = tf.ones([height, width], dtype=tf.float32)
  neg_inf_r2 = ones * neg_inf_r0
  inputs = tf.where(tf.is_nan(inputs), neg_inf_r2, inputs)

  tmp = inputs
  topk_r2 = tf.zeros([height, k], dtype=tf.float32)
  for i in range(k):
    kth_order_statistic = tf.reduce_max(tmp, axis=1, keepdims=True)
    k_mask = tf.tile(tf.expand_dims(tf.equal(tf.range(k), tf.fill([k], i)), 0),
                     [height, 1])
    topk_r2 = tf.where(k_mask, tf.tile(kth_order_statistic, [1, k]), topk_r2)
    ge_r2 = tf.greater_equal(inputs, tf.tile(kth_order_statistic, [1, width]))
    tmp = tf.where(ge_r2, neg_inf_r2, inputs)

  log2_ceiling = int(math.ceil(math.log(float(int(width)), 2)))
  next_power_of_two = 1 << log2_ceiling
  count_mask = next_power_of_two - 1
  mask_r0 = tf.constant(count_mask)
  mask_r2 = tf.fill([height, k], mask_r0)
  topk_r2_s32 = tf.bitcast(topk_r2, tf.int32)
  topk_indices_r2 = tf.bitwise.bitwise_and(topk_r2_s32, mask_r2)
  return topk_r2, topk_indices_r2
Ejemplo n.º 6
0
 def _select_columns(self, mode, features):
     input_mask = features["input_mask"]
     column_ids = features["column_ids"]
     with tf.variable_scope("bert"):
         with tf.variable_scope("embeddings",
                                reuse=tf.compat.v1.AUTO_REUSE):
             input_embeddings, _ = modeling.embedding_lookup(
                 input_ids=features["input_ids"],
                 vocab_size=self._vocab_size,
                 embedding_size=self._hidden_size,
                 initializer_range=self._initializer_range,
                 word_embedding_name="word_embeddings")
             if self._use_positional_embeddings:
                 token_type_ids = []
                 token_type_features = [
                     "segment_ids", "column_ids", "row_ids",
                     "prev_label_ids", "column_ranks", "inv_column_ranks",
                     "numeric_relations"
                 ]
                 for key in token_type_features:
                     if self._disabled_features is not None and key in self._disabled_features:
                         token_type_ids.append(tf.zeros_like(features[key]))
                     else:
                         token_type_ids.append(features[key])
                 input_embeddings = modeling.embedding_postprocessor(
                     input_tensor=input_embeddings,
                     use_token_type=True,
                     token_type_ids=token_type_ids,
                     token_type_vocab_size=self._type_vocab_size,
                     token_type_embedding_name="token_type_embeddings",
                     use_position_embeddings=self._use_position_embeddings,
                     position_embedding_name="position_embeddings",
                     initializer_range=self._initializer_range,
                     max_position_embeddings=self._max_position_embeddings,
                     extra_embeddings=None,
                     dropout_prob=0.0)
             # Indexes all the zero values from the input_mask by (max_num_columns+1)
             # The index 0 is for the question and from 1 to max_num_columns included
             # is for the columns.
             masked_col_ids = column_ids * input_mask + (1 - input_mask) * (
                 self._max_num_columns + 1)
             col_index = segmented_tensor.IndexMap(
                 indices=masked_col_ids,
                 num_segments=self._max_num_columns + 2,
                 batch_dims=1)
             average_embeddings, _ = segmented_tensor.reduce_mean(
                 input_embeddings, col_index)
             # Removes the last index as it contains the avg of non selected values
             average_embeddings = average_embeddings[:, :-1]
             normalize_average_embeddings = tf.math.l2_normalize(
                 average_embeddings, axis=2)
             questions_embeddings = normalize_average_embeddings[:, :1]
             columns_embeddings = normalize_average_embeddings[:, 1:]
             multiply = columns_embeddings * questions_embeddings
             multiply = tf.where(tf.is_nan(multiply),
                                 tf.zeros_like(multiply), multiply)
             column_scores = tf.math.reduce_sum(multiply,
                                                axis=-1,
                                                name="column_scores")
             return column_scores
Ejemplo n.º 7
0
def gradient_summaries(gvs, suppress_inf_and_nans=False):
    """Creates summaries for norm, mean and var of gradients."""
    gs = [gv[0] for gv in gvs]
    grad_global_norm = tf.global_norm(gs, 'gradient_global_norm')

    if suppress_inf_and_nans:
        is_nan_or_inf = tf.logical_or(tf.is_nan(grad_global_norm),
                                      tf.is_inf(grad_global_norm))

        grad_global_norm = tf.where(is_nan_or_inf,
                                    tf.zeros_like(grad_global_norm) - 1.,
                                    grad_global_norm)

    grad_abs_max, grad_abs_mean, grad_mean, grad_var = [0.] * 4
    n_grads = 1e-8
    for g, _ in gvs:
        if isinstance(g, tf.IndexedSlices):
            g = g.values

        if g is not None:
            current_n_grads = np.prod(g.shape.as_list())
            abs_g = abs(g)
            mean, var = tf.nn.moments(g, list(range(len(g.shape))))
            grad_abs_max = tf.maximum(grad_abs_max, tf.reduce_max(abs_g))
            grad_abs_mean += tf.reduce_sum(abs_g)
            grad_mean += mean * current_n_grads
            grad_var += var
            n_grads += current_n_grads

    tf.summary.scalar('grad/abs_max', grad_abs_max)
    tf.summary.scalar('grad/abs_mean', grad_abs_mean / n_grads)
    tf.summary.scalar('grad/mean', grad_mean / n_grads)
    tf.summary.scalar('grad/var', grad_var / n_grads)

    return dict(grad_global_norm=grad_global_norm)
def filter_groundtruth_with_nan_box_coordinates(tensor_dict):
  """Filters out groundtruth with no bounding boxes.

  Args:
    tensor_dict: a dictionary of following groundtruth tensors -
      fields.InputDataFields.groundtruth_boxes
      fields.InputDataFields.groundtruth_classes
      fields.InputDataFields.groundtruth_confidences
      fields.InputDataFields.groundtruth_keypoints
      fields.InputDataFields.groundtruth_instance_masks
      fields.InputDataFields.groundtruth_is_crowd
      fields.InputDataFields.groundtruth_area
      fields.InputDataFields.groundtruth_label_types

  Returns:
    a dictionary of tensors containing only the groundtruth that have bounding
    boxes.
  """
  groundtruth_boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
  nan_indicator_vector = tf.greater(tf.reduce_sum(tf.cast(
      tf.is_nan(groundtruth_boxes), dtype=tf.int32), reduction_indices=[1]), 0)
  valid_indicator_vector = tf.logical_not(nan_indicator_vector)
  valid_indices = tf.where(valid_indicator_vector)

  return retain_groundtruth(tensor_dict, valid_indices)
Ejemplo n.º 9
0
def _calculate_regression_loss(answer, aggregate_mask, dist_per_cell,
                               numeric_values, numeric_values_scale,
                               input_mask_float, logits_aggregation, config):
    """Calculates the regression loss per example.

  Args:
    answer: <float32>[batch_size]
    aggregate_mask: <float32>[batch_size]
    dist_per_cell: Cell selection distribution for each cell.
    numeric_values: <float32>[batch_size, seq_length]
    numeric_values_scale: <float32>[batch_size, seq_length]
    input_mask_float: <float32>[batch_size, seq_length]
    logits_aggregation: <float32>[batch_size, num_aggregation_labels]
      probabilities.
    config: Configuration for Tapas model.

  Returns:
    per_example_answer_loss_scaled: <float32>[batch_size]. Scales answer loss
    for each example in the batch.
    large_answer_loss_mask: <float32>[batch_size]. A mask which is 1 for
    examples for which their answer loss is larger than the answer_loss_cutoff.
  """
    # <float32>[batch_size]
    expected_result = _calculate_expected_result(dist_per_cell, numeric_values,
                                                 numeric_values_scale,
                                                 input_mask_float,
                                                 logits_aggregation, config)
    # <float32>[batch_size]
    answer_masked = tf.where(tf.is_nan(answer), tf.zeros_like(answer), answer)

    if config.use_normalized_answer_loss:
        normalizer = tf.stop_gradient(
            tf.math.maximum(tf.math.abs(expected_result),
                            tf.math.abs(answer_masked)) +
            _EPSILON_ZERO_DIVISION)
        normalized_answer_masked = answer_masked / normalizer
        normalized_expected_result = expected_result / normalizer
        per_example_answer_loss = tf.losses.huber_loss(
            normalized_answer_masked * aggregate_mask,
            normalized_expected_result * aggregate_mask,
            delta=tf.cast(config.huber_loss_delta, tf.float32),
            reduction=tf.losses.Reduction.NONE)
    else:
        per_example_answer_loss = tf.losses.huber_loss(
            answer_masked * aggregate_mask,
            expected_result * aggregate_mask,
            delta=tf.cast(config.huber_loss_delta, tf.float32),
            reduction=tf.losses.Reduction.NONE)
    if config.answer_loss_cutoff is None:
        large_answer_loss_mask = tf.ones_like(per_example_answer_loss,
                                              dtype=tf.float32)
    else:
        large_answer_loss_mask = tf.where(
            per_example_answer_loss > config.answer_loss_cutoff,
            tf.zeros_like(per_example_answer_loss, dtype=tf.float32),
            tf.ones_like(per_example_answer_loss, dtype=tf.float32))
    per_example_answer_loss_scaled = config.answer_loss_importance * (
        per_example_answer_loss * aggregate_mask)
    return per_example_answer_loss_scaled, large_answer_loss_mask
Ejemplo n.º 10
0
    def _compile_POPLINP_cost(self, weight_input, cem_type, tf_data_dict):
        """ @brief:
            The input is the noise of the weight space

            @weight_input: size [pop_size, plan_hor, weight_size]
        """

        policy_network = tf_data_dict['policy_network']
        # nopt is different solutions
        t, nopt = tf.constant(0), tf.shape(weight_input)[0]
        init_costs = tf.zeros([nopt, self.npart])
        init_obs = tf.tile(self.sy_cur_obs[None], [nopt * self.npart, 1])

        weight_input = tf.reshape(
            tf.tile(
                tf.transpose(weight_input, [1, 0, 2])[:, :, None],
                [1, 1, self.npart, 1]  # hor, popsize, npart, dU
            ),
            [self.plan_hor, -1, tf_data_dict['weight_size']])

        def limit_action(action):
            return tf.minimum(tf.maximum(action, self.ac_lb[0]), self.ac_ub[0])

        if cem_type in ['POPLINP-SEP', 'POPLINP-UNI']:

            # step 2: cem on top of the @proposed_act_seqs
            def iteration(t, total_cost, cur_obs):
                cur_acs = \
                    policy_network.forward_network(cur_obs, weight_input[t])
                cur_acs = limit_action(cur_acs)
                next_obs = self._predict_next_obs(cur_obs, cur_acs)
                if self.obs_ac_cost_fn is not None:
                    delta_cost = tf.reshape(
                        self.obs_ac_cost_fn(next_obs, cur_acs),
                        [-1, self.npart])
                else:
                    delta_cost = tf.reshape(
                        self.obs_cost_fn(next_obs) + self.ac_cost_fn(cur_acs),
                        [-1, self.npart])
                return t + 1, total_cost + delta_cost, \
                    self.obs_postproc2(next_obs), cur_acs

            pass
        else:
            raise NotImplementedError

        total_cost, cur_obs = init_costs, init_obs
        for t in range(self.plan_hor):
            _, total_cost, cur_obs, cur_acs = iteration(t, total_cost, cur_obs)

        costs = total_cost

        # replace nan costs with very high cost
        return tf.reduce_mean(tf.where(tf.is_nan(costs),
                                       1e6 * tf.ones_like(costs), costs),
                              axis=1)
Ejemplo n.º 11
0
def f1_loss(y_true, y_pred):
    tp = K.sum(K.cast(y_true * y_pred, 'float'), axis = 0)
    fp = K.sum(K.cast((1 - y_true) * y_pred, 'float'), axis = 0)
    fn = K.sum(K.cast(y_true * (1 - y_pred), 'float'), axis = 0)
    p = tp / (tp + fp + K.epsilon())
    r = tp / (tp + fn + K.epsilon())

    f1 = 2*p*r / (p + r + K.epsilon())
    f1 = tf.where(tf.is_nan(f1), tf.zeros_like(f1), f1)

    return 1 - K.mean(f1)
Ejemplo n.º 12
0
def get_gen_loss(args, xfake, ll_fake, score_func, z_outer):
  opt_gen = tf.train.AdamOptimizer(
      learning_rate=args.learning_rate, beta1=args.beta1, beta2=args.beta2)

  f_sampled_x = score_func(xfake, z_outer)
  loss = -tf.reduce_mean(f_sampled_x) + args.ent_lam * tf.reduce_mean(ll_fake)

  gvs = opt_gen.compute_gradients(
      loss, var_list=tf.trainable_variables(scope='generator'))
  gvs = [(tf.where(tf.is_nan(grad), tf.zeros_like(grad), grad), val)
         for grad, val in gvs
         if grad is not None]
  train_gen = opt_gen.apply_gradients(gvs)
  return loss, train_gen
Ejemplo n.º 13
0
 def _get_cubic_root(self):
     """Get the cubic root."""
     # We have the equation x^2 D^2 + (1-x)^4 * C / h_min^2
     # where x = sqrt(mu).
     # We substitute x, which is sqrt(mu), with x = y + 1.
     # It gives y^3 + py = q
     # where p = (D^2 h_min^2)/(2*C) and q = -p.
     # We use the Vieta's substitution to compute the root.
     # There is only one real solution y (which is in [0, 1] ).
     # http://mathworld.wolfram.com/VietasSubstitution.html
     assert_array = [
         tf.Assert(tf.logical_not(tf.is_nan(self._dist_to_opt_avg)), [
             self._dist_to_opt_avg,
         ]),
         tf.Assert(tf.logical_not(tf.is_nan(self._h_min)), [
             self._h_min,
         ]),
         tf.Assert(tf.logical_not(tf.is_nan(self._grad_var)), [
             self._grad_var,
         ]),
         tf.Assert(tf.logical_not(tf.is_inf(self._dist_to_opt_avg)), [
             self._dist_to_opt_avg,
         ]),
         tf.Assert(tf.logical_not(tf.is_inf(self._h_min)), [
             self._h_min,
         ]),
         tf.Assert(tf.logical_not(tf.is_inf(self._grad_var)), [
             self._grad_var,
         ])
     ]
     with tf.control_dependencies(assert_array):
         p = self._dist_to_opt_avg**2 * self._h_min**2 / 2 / self._grad_var
         w3 = (-tf.sqrt(p**2 + 4.0 / 27.0 * p**3) - p) / 2.0
         w = tf.sign(w3) * tf.pow(tf.abs(w3), 1.0 / 3.0)
         y = w - p / 3.0 / w
         x = y + 1
     return x
Ejemplo n.º 14
0
    def __call__(self,
                 prediction_tensor,
                 target_tensor,
                 ignore_nan_targets=False,
                 losses_mask=None,
                 scope=None,
                 **params):
        """Call the loss function.

        Args:
          prediction_tensor: an N-d tensor of shape [batch, anchors, ...]
            representing predicted quantities.
          target_tensor: an N-d tensor of shape [batch, anchors, ...] representing
            regression or classification targets.
          ignore_nan_targets: whether to ignore nan targets in the loss computation.
            E.g. can be used if the target tensor is missing groundtruth data that
            shouldn't be factored into the loss.
          losses_mask: A [batch] boolean tensor that indicates whether losses should
            be applied to individual images in the batch. For elements that
            are False, corresponding prediction, target, and weight tensors will not
            contribute to loss computation. If None, no filtering will take place
            prior to loss computation.
          scope: Op scope name. Defaults to 'Loss' if None.
          **params: Additional keyword arguments for specific implementations of
                  the Loss.

        Returns:
          loss: a tensor representing the value of the loss function.
        """
        with tf.name_scope(scope, 'Loss',
                           [prediction_tensor, target_tensor, params]) as scope:
            if ignore_nan_targets:
                target_tensor = tf.where(tf.is_nan(target_tensor),
                                         prediction_tensor,
                                         target_tensor)
            if losses_mask is not None:
                tensor_multiplier = self._get_loss_multiplier_for_tensor(
                    prediction_tensor,
                    losses_mask)
                prediction_tensor *= tensor_multiplier
                target_tensor *= tensor_multiplier

                if 'weights' in params:
                    params['weights'] = tf.convert_to_tensor(params['weights'])
                    weights_multiplier = self._get_loss_multiplier_for_tensor(
                        params['weights'],
                        losses_mask)
                    params['weights'] *= weights_multiplier
            return self._compute_loss(prediction_tensor, target_tensor, **params)
Ejemplo n.º 15
0
def _calculate_aggregate_mask(answer, output_layer_aggregation,
                              output_bias_agg, output_weights_agg,
                              cell_select_pref, label_ids):
    """Finds examples where the model should select cells with no aggregation.

  Returns a mask that determines for which examples should the model select
  answers directly from the table, without any aggregation function. If the
  answer is a piece of text the case is unambiguous as aggregation functions
  only apply to numbers. If the answer is a number but does not appear in the
  table then we must use some aggregation case. The ambiguous case is when the
  answer is a number that also appears in the table. In this case we use the
  aggregation function probabilities predicted by the model to decide whether
  to select or aggregate. The threshold for this is a hyperparameter
  `cell_select_pref`.

  Args:
    answer: <float32>[batch_size]
    output_layer_aggregation: <float32>[batch_size, hidden_size]
    output_bias_agg: <float32>[num_aggregation_labels]
    output_weights_agg: <float32>[num_aggregation_labels, hidden_size_agg]
    cell_select_pref: Preference for cell selection in ambiguous cases.
    label_ids: <int32>[batch_size, seq_length]

  Returns:
    aggregate_mask: <float32>[batch_size] A mask set to 1 for examples that
      should use aggregation functions.
  """
    # <float32>[batch_size]
    aggregate_mask_init = tf.cast(tf.logical_not(tf.is_nan(answer)),
                                  tf.float32)
    logits_aggregation = _calculate_aggregation_logits(
        output_layer_aggregation, output_weights_agg, output_bias_agg)
    dist_aggregation = tfp.distributions.Categorical(logits=logits_aggregation)
    aggregation_ops_total_mass = tf.reduce_sum(
        _get_probs(dist_aggregation)[:, 1:], axis=1)
    # Cell selection examples according to current model.
    is_pred_cell_selection = aggregation_ops_total_mass <= cell_select_pref
    # Examples with non-empty cell selection supervision.
    is_cell_supervision_available = tf.reduce_sum(label_ids, axis=1) > 0
    aggregate_mask = tf.where(
        tf.logical_and(is_pred_cell_selection, is_cell_supervision_available),
        tf.zeros_like(aggregate_mask_init, dtype=tf.float32),
        aggregate_mask_init)
    aggregate_mask = tf.stop_gradient(aggregate_mask)
    return aggregate_mask
Ejemplo n.º 16
0
def reduce_nanmean(tensor, axes=None, keepdims=False, name=None):
  """Take the mean of a tensor, skipping NaNs.

  Args:
    tensor: tensor to reduce.
    axes: optional list of axes to reduce.
    keepdims: optional boolean indicating whether to keep dimensions or not.
    name: optional op name.

  Returns:
    tf.Tensor with reduce values.
  """
  masked = tf.is_nan(tensor)
  valid_tensor = tf.where(masked, tf.zeros_like(tensor), tensor)
  total = tf.reduce_sum(valid_tensor, axes, keepdims=keepdims)
  counts = tf.reduce_sum(tf.cast(tf.logical_not(masked), tensor.dtype),
                         axes, keepdims=keepdims)
  return tf.div(total, counts, name=name)
Ejemplo n.º 17
0
def _create_topk_unique(inputs, k):
    """Creates the top k values in sorted order with indices.

  Args:
    inputs: A tensor with rank of 2. [batch_size, original_size].
    k: An integer, number of top elements to select.

  Returns:
    topk_r2: A tensor, the k largest elements. [batch_size, k].
    topk_indices_r2: A tensor, indices of the top k values. [batch_size, k].
  """
    height = inputs.shape[0]
    width = inputs.shape[1]
    neg_inf_r0 = tf.constant(-np.inf, dtype=tf.float32)
    ones = tf.ones([height, width], dtype=tf.float32)
    neg_inf_r2 = ones * neg_inf_r0
    inputs = tf.where(tf.is_nan(inputs), neg_inf_r2, inputs)

    # Select the current largest value k times and keep them in topk_r2. The
    # selected largest values are marked as the smallest value to avoid being
    # selected again.
    tmp = inputs
    topk_r2 = tf.zeros([height, k], dtype=tf.float32)
    for i in range(k):
        kth_order_statistic = tf.reduce_max(tmp, axis=1, keepdims=True)
        k_mask = tf.tile(
            tf.expand_dims(tf.equal(tf.range(k), tf.fill([k], i)), 0),
            [height, 1])
        topk_r2 = tf.where(k_mask, tf.tile(kth_order_statistic, [1, k]),
                           topk_r2)
        ge_r2 = tf.greater_equal(inputs,
                                 tf.tile(kth_order_statistic, [1, width]))
        tmp = tf.where(ge_r2, neg_inf_r2, inputs)

    log2_ceiling = int(math.ceil(math.log(float(int(width)), 2)))
    next_power_of_two = 1 << log2_ceiling
    count_mask = next_power_of_two - 1
    mask_r0 = tf.constant(count_mask)
    mask_r2 = tf.fill([height, k], mask_r0)
    topk_r2_s32 = tf.bitcast(topk_r2, tf.int32)
    topk_indices_r2 = tf.bitwise.bitwise_and(topk_r2_s32, mask_r2)
    return topk_r2, topk_indices_r2
Ejemplo n.º 18
0
    def __init__(self, posts, **kwargs):
        FactorisedPosterior.__init__(self, posts, **kwargs)

        # The full covariance matrix is formed from the Cholesky decomposition
        # to ensure that it remains positive definite.
        #
        # To achieve this, we have to create PxP tensor variables for
        # each parameter vertex, but we then extract only the lower triangular
        # elements and train only on these. The diagonal elements
        # are constructed by the FactorisedPosterior
        if kwargs.get("init", None):
            # We are initializing from an existing posterior.
            # The FactorizedPosterior will already have extracted the mean and
            # diagonal of the covariance matrix - we need the Cholesky decomposition
            # of the covariance to initialize the off-diagonal terms
            self.log.info(" - Initializing posterior covariance from input posterior")
            _mean, cov = kwargs["init"]
            covar_init = tf.cholesky(cov)
        else:
            covar_init = tf.zeros([self.nvertices, self.nparams, self.nparams], dtype=tf.float32)

        self.off_diag_vars_base = self.log_tf(tf.Variable(covar_init, validate_shape=False,
                                                     name='%s_off_diag_vars' % self.name))
        if kwargs.get("suppress_nan", True):
            self.off_diag_vars = tf.where(tf.is_nan(self.off_diag_vars_base), tf.zeros_like(self.off_diag_vars_base), self.off_diag_vars_base)
        else:
            self.off_diag_vars = self.off_diag_vars_base
        self.off_diag_cov_chol = tf.matrix_set_diag(tf.matrix_band_part(self.off_diag_vars, -1, 0),
                                                    tf.zeros([self.nvertices, self.nparams]),
                                                    name='%s_off_diag_cov_chol' % self.name)

        # Combine diagonal and off-diagonal elements into full matrix
        self.cov_chol = tf.add(tf.matrix_diag(self.std), self.off_diag_cov_chol,
                               name='%s_cov_chol' % self.name)

        # Form the covariance matrix from the chol decomposition
        self.cov = tf.matmul(tf.transpose(self.cov_chol, perm=(0, 2, 1)), self.cov_chol,
                             name='%s_cov' % self.name)

        self.cov_chol = self.log_tf(self.cov_chol)
        self.cov = self.log_tf(self.cov)
Ejemplo n.º 19
0
 def fun_w(self, x, low, up):
     I1 = 0.110987
     x_list = tf.split(x, self.dim, 1)
     #**************************************************
     x_scale_list = []
     h_len = (up - low) / 2.0
     for i in range(self.dim):
         x_scale = (x_list[i] - low - h_len) / h_len
         x_scale_list.append(x_scale)
     #************************************************
     z_x_list = []
     for i in range(self.dim):
         supp_x = tf.greater(1 - tf.abs(x_scale_list[i]), 0)
         z_x = tf.where(supp_x,
                        tf.exp(1 / (tf.pow(x_scale_list[i], 2) - 1)) / I1,
                        tf.zeros_like(x_scale_list[i]))
         z_x_list.append(z_x)
     #***************************************************
     w_val = tf.constant(1.0)
     for i in range(self.dim):
         w_val = tf.multiply(w_val, z_x_list[i])
     dw = tf.gradients(w_val, x, unconnected_gradients='zero')[0]
     dw = tf.where(tf.is_nan(dw), tf.zeros_like(dw), dw)
     return (w_val, dw)
Ejemplo n.º 20
0
def get_disc_loss(args, x, x_fake, score_func, z_outer, neg_kl_outer):
    opt_disc = tf.train.AdamOptimizer(learning_rate=args.learning_rate,
                                      beta1=args.beta1,
                                      beta2=args.beta2)

    fx = score_func(x, z_outer)
    f_fake_x = score_func(x_fake, z_outer)
    f_loss = tf.reduce_mean(-fx) + tf.reduce_mean(f_fake_x)

    loss = f_loss + tf.reduce_mean(-neg_kl_outer)
    if args.gp_lambda > 0:  # add gradient penalty
        alpha = tf.random.uniform(shape=(tf.shape(x)[0], 1, 1))
        x_hat = alpha * x + (1 - alpha) * x_fake
        d_hat = score_func(x_hat, tf.stop_gradient(z_outer))
        ddx = tf.gradients(d_hat, x_hat)[0]
        ddx = tf.sqrt(tf.reduce_sum(tf.square(ddx), axis=[1, 2]))
        ddx = tf.reduce_mean(tf.square(ddx - 1.0)) * args.gp_lambda
        loss = loss + ddx
    gvs = opt_disc.compute_gradients(
        loss, var_list=tf.trainable_variables(scope='score_func'))
    gvs = [(tf.where(tf.is_nan(grad), tf.zeros_like(grad), grad), val)
           for grad, val in gvs if grad is not None]
    train_disc = opt_disc.apply_gradients(gvs)
    return f_loss, train_disc
Ejemplo n.º 21
0
def replace_nan_with_value(tensor, value):
    return tf.where(tf.is_nan(tensor), value * tf.ones_like(tensor), tensor)
Ejemplo n.º 22
0
def maybe_gen_fake_data_based_on_real_data(image, label, reso,
                                           min_fake_lesion_ratio,
                                           gen_fake_probability):
    """Remove real lesion and synthesize lesion."""
    # TODO(lehou): Replace magic numbers with flag variables.
    gen_prob_indicator = tf.random_uniform(shape=[],
                                           minval=0.0,
                                           maxval=1.0,
                                           dtype=tf.float32)

    background_mask = tf.less(label, 0.5)
    lesion_mask = tf.greater(label, 1.5)
    liver_mask = tf.logical_not(tf.logical_or(background_mask, lesion_mask))

    liver_intensity = tf.boolean_mask(image, liver_mask)
    lesion_intensity = tf.boolean_mask(image, lesion_mask)

    intensity_diff = tf.reduce_mean(liver_intensity) - (
        tf.reduce_mean(lesion_intensity))
    intensity_diff *= 1.15
    intensity_diff = tf.cond(tf.is_nan(intensity_diff), lambda: 0.0,
                             lambda: intensity_diff)

    lesion_liver_ratio = 0.0
    lesion_liver_ratio += tf.random.normal(shape=[], mean=0.01, stddev=0.01)
    lesion_liver_ratio += tf.random.normal(shape=[], mean=0.0, stddev=0.05)
    lesion_liver_ratio = tf.clip_by_value(lesion_liver_ratio,
                                          min_fake_lesion_ratio,
                                          min_fake_lesion_ratio + 0.20)

    fake_lesion_mask = tf.logical_and(
        _gen_rand_mask(ratio_mean=lesion_liver_ratio,
                       ratio_stddev=0.0,
                       scale=reso // 32,
                       shape=label.shape,
                       smoothness=reso // 32), tf.logical_not(background_mask))
    liver_mask = tf.logical_not(
        tf.logical_or(background_mask, fake_lesion_mask))

    # Blur the masks
    lesion_mask_blur = tf.squeeze(
        tf.nn.conv3d(tf.expand_dims(
            tf.expand_dims(tf.cast(lesion_mask, tf.float32), -1), 0),
                     filter=tf.ones([reso // 32] * 3 + [1, 1], tf.float32) /
                     (reso // 32)**3,
                     strides=[1, 1, 1, 1, 1],
                     padding='SAME'))
    fake_lesion_mask_blur = tf.squeeze(
        tf.nn.conv3d(tf.expand_dims(
            tf.expand_dims(tf.cast(fake_lesion_mask, tf.float32), -1), 0),
                     filter=tf.ones([reso // 32] * 3 + [1, 1], tf.float32) /
                     (reso // 32)**3,
                     strides=[1, 1, 1, 1, 1],
                     padding='SAME'))

    # Remove real lesion and add fake lesion.
    # If the intensitify is too small (maybe no liver or lesion region labeled),
    # do not generate fake data.
    gen_prob_indicator = tf.cond(tf.greater(intensity_diff, 0.0001),
                                 lambda: gen_prob_indicator, lambda: 0.0)
    # pylint: disable=g-long-lambda
    image = tf.cond(
        tf.greater(gen_prob_indicator, 1 - gen_fake_probability),
        lambda: image + intensity_diff * lesion_mask_blur \
                      - intensity_diff * fake_lesion_mask_blur,
        lambda: image)
    label = tf.cond(
        tf.greater(gen_prob_indicator, 1 - gen_fake_probability),
        lambda: tf.cast(background_mask, tf.float32) * 0 + \
            tf.cast(liver_mask, tf.float32) * 1 + \
            tf.cast(fake_lesion_mask, tf.float32) * 2,
        lambda: label)
    # pylint: enable=g-long-lambda

    return image, label
Ejemplo n.º 23
0
 def _deal_with_nan(grad):
     # Reference: https://stackoverflow.com/questions/33712178/tensorflow-nan-bug
     assert isinstance(grad, tf.Tensor)
     return tf.where(tf.is_nan(grad), tf.zeros(grad.shape), grad)
def normalize_each_feature(observation_values, obs_code, vocab_size, mode,
                           momentum):
  """Combines SparseTensors of observation codes and values into a Tensor.

  Args:
    observation_values: A SparseTensor of type float with the observation
      values of dense shape [batch_size, max_sequence_length, 1].
      There may be no time gaps in between codes.
    obs_code: A Tensor of shape [?, 3] of type int32 with the ids that go along
      with the observation_values. We will do the normalization separately for
      each lab test.
    vocab_size: The range of the values in obs_code is from 0 to vocab_size.
    mode: The execution mode, as defined in tf.estimator.ModeKeys.
    momentum: Mean and variance will be updated as
      momentum*old_value + (1-momentum) * new_value.
  Returns:
    observation_values as in the input only with normalized values.
  """
  with tf.variable_scope('batch_normalization'):
    new_indices = []
    new_values = []

    for i in range(vocab_size):
      with tf.variable_scope('bn' + str(i)):
        positions_of_feature_i = tf.where(tf.equal(obs_code, i))
        values_of_feature_i = tf.gather_nd(observation_values.values,
                                           positions_of_feature_i)
        if mode == tf.estimator.ModeKeys.TRAIN:
          tf.summary.scalar('avg_observation_values/' + str(i),
                            tf.reduce_mean(values_of_feature_i))
          tf.summary.histogram('observation_values/' + str(i),
                               values_of_feature_i)
        batchnorm_layer = tf.layers.BatchNormalization(
            axis=1,
            momentum=momentum,
            epsilon=0.01,
            trainable=True)
        normalized_values = tf.squeeze(
            batchnorm_layer.apply(
                tf.expand_dims(values_of_feature_i, axis=1),
                training=(mode == tf.estimator.ModeKeys.TRAIN)
            ),
            axis=1,
            name='squeeze_normalized_values')
        if mode == tf.estimator.ModeKeys.TRAIN:
          tf.summary.scalar('batchnorm_layer/moving_mean/' + str(i),
                            tf.squeeze(batchnorm_layer.moving_mean))
          tf.summary.scalar('batchnorm_layer/moving_variance/' + str(i),
                            tf.squeeze(batchnorm_layer.moving_variance))
          tf.summary.scalar('avg_normalized_values/' + str(i),
                            tf.reduce_mean(normalized_values))
          tf.summary.histogram('normalized_observation_values/' + str(i),
                               normalized_values)
        indices_i = tf.gather_nd(observation_values.indices,
                                 positions_of_feature_i)
        new_indices += [indices_i]
        normalized_values = tf.where(tf.is_nan(normalized_values),
                                     tf.zeros_like(normalized_values),
                                     normalized_values)
        new_values += [normalized_values]

    normalized_sp_tensor = tf.SparseTensor(
        indices=tf.concat(new_indices, axis=0),
        values=tf.concat(new_values, axis=0),
        dense_shape=observation_values.dense_shape)
    normalized_sp_tensor = tf.sparse_reorder(normalized_sp_tensor)
    return normalized_sp_tensor
Ejemplo n.º 25
0
def _calculate_expected_result(dist_per_cell, numeric_values,
                               numeric_values_scale, input_mask_float,
                               logits_aggregation, config):
    """Calculate the expected result given cell and aggregation probabilities."""
    if config.use_gumbel_for_cells:
        gumbel_dist = tfp.distributions.RelaxedBernoulli(
            # The token logits where already divided by the temperature and used for
            # computing cell selection errors so we need to multiply it again here
            config.temperature,
            logits=dist_per_cell.logits_parameter() * config.temperature)
        scaled_probability_per_cell = gumbel_dist.sample()
    else:
        scaled_probability_per_cell = _get_probs(dist_per_cell)

    # <float32>[batch_size, seq_length]
    scaled_probability_per_cell = (scaled_probability_per_cell /
                                   numeric_values_scale) * input_mask_float
    count_result = tf.reduce_sum(scaled_probability_per_cell, axis=1)
    numeric_values_masked = tf.where(
        tf.is_nan(numeric_values), tf.zeros_like(numeric_values),
        numeric_values)  # Mask non-numeric table values to zero.
    sum_result = tf.reduce_sum(scaled_probability_per_cell *
                               numeric_values_masked,
                               axis=1)
    avg_approximation = config.average_approximation_function
    if avg_approximation == AverageApproximationFunction.RATIO:
        average_result = sum_result / (count_result + _EPSILON_ZERO_DIVISION)
    elif avg_approximation == AverageApproximationFunction.FIRST_ORDER:
        # The sum of all probabilities exept that correspond to other cells
        ex = (
            tf.reduce_sum(scaled_probability_per_cell, axis=1, keepdims=True) -
            scaled_probability_per_cell + 1)
        average_result = tf.reduce_sum(numeric_values_masked *
                                       scaled_probability_per_cell / ex,
                                       axis=1)
    elif avg_approximation == AverageApproximationFunction.SECOND_ORDER:
        # The sum of all probabilities exept that correspond to other cells
        ex = (
            tf.reduce_sum(scaled_probability_per_cell, axis=1, keepdims=True) -
            scaled_probability_per_cell + 1)
        pointwise_var = (scaled_probability_per_cell *
                         (1 - scaled_probability_per_cell))
        var = tf.reduce_sum(pointwise_var, axis=1,
                            keepdims=True) - pointwise_var
        multiplier = (var / tf.math.square(ex) + 1) / ex
        average_result = tf.reduce_sum(
            numeric_values_masked * scaled_probability_per_cell * multiplier,
            axis=1)
    else:
        tf.logging.error("Invalid average_approximation_function: %s",
                         config.average_approximation_function)

    if config.use_gumbel_for_agg:
        gumbel_dist = tfp.distributions.RelaxedOneHotCategorical(
            config.agg_temperature, logits=logits_aggregation[:, 1:])
        # <float32>[batch_size, num_aggregation_labels - 1]
        aggregation_op_only_probs = gumbel_dist.sample()
    else:
        # <float32>[batch_size, num_aggregation_labels - 1]
        aggregation_op_only_probs = tf.nn.softmax(logits_aggregation[:, 1:] /
                                                  config.agg_temperature,
                                                  axis=-1)
    all_results = tf.concat([
        tf.expand_dims(sum_result, axis=1),
        tf.expand_dims(average_result, axis=1),
        tf.expand_dims(count_result, axis=1)
    ],
                            axis=1)
    expected_result = tf.reduce_sum(all_results * aggregation_op_only_probs,
                                    axis=1)
    return expected_result
Ejemplo n.º 26
0
def SanitizedAutoCorrelation(x, axis, *args, **kwargs):
    res = tfp.stats.auto_correlation(x, axis, *args, **kwargs)
    res = tf.where(tf.is_nan(res), tf.ones_like(res), res)
    res = tf.where(tf.is_inf(res), tf.ones_like(res), res)
    return res
Ejemplo n.º 27
0
def replace_nan(tensor, default):
    return where(is_nan(tensor), ones_like(tensor) * default, tensor)
Ejemplo n.º 28
0
  def _apply_gradients(self, grads_and_vars, learning_rate):
    print('_apply_gradients is called!!!')
    """See base class."""

    # Create slot variables
    var_list = []
    for (grad, param) in grads_and_vars:
      if grad is None or param is None:
        continue
      var_list.append(param)
    with ops.init_scope():
      self._create_slots(var_list)

    # Build training operations
    assignments = []
    check_values = []
    for (grad, param) in grads_and_vars:
      if grad is None or param is None:
        continue

      param_name = self._get_variable_name(param.name)

      #m, v = self.mv_lookup[param_name]
      m = self.get_slot(param, param_name + "/adam_m")
      v = self.get_slot(param, param_name + "/adam_v")

      # Standard Adam update.
      next_m = (
          tf.multiply(self.beta_1, m) + tf.multiply(1.0 - self.beta_1, grad))
      next_v = (
          tf.multiply(self.beta_2, v) + tf.multiply(1.0 - self.beta_2,
                                                    tf.square(grad)))
      update = next_m / (tf.sqrt(next_v) + self.epsilon)

      check_update_nan = tf.Assert(tf.logical_not(tf.reduce_all(tf.is_nan(update))), [param_name, 'NAN update', update])
      check_update_inf = tf.Assert(tf.logical_not(tf.reduce_all(tf.is_inf(update))), [param_name, 'INF update', update])
      check_values.append(check_update_nan)
      check_values.append(check_update_inf)
      #update = 0

      # Just adding the square of the weights to the loss function is *not*
      # the correct way of using L2 regularization/weight decay with Adam,
      # since that will interact with the m and v parameters in strange ways.
      #
      # Instead we want ot decay the weights in a manner that doesn't interact
      # with the m/v parameters. This is equivalent to adding the square
      # of the weights to the loss with plain (non-momentum) SGD.
      if self.weight_decay_rate > 0:
        if self._do_use_weight_decay(param_name):
          update += self.weight_decay_rate * param

      update_with_lr = learning_rate * update
      # update_with_lr = tf.Print(update_with_lr, ['\nupdate_with_lr', param_name, tf.shape(update_with_lr), update_with_lr], summarize=32)
      max_update_with_lr = tf.reduce_max(update_with_lr)
      min_update_with_lr = tf.reduce_min(update_with_lr)
      # update_with_lr = tf.Print(update_with_lr, ['\nupdate_with_lr', param_name, tf.shape(update_with_lr), min_update_with_lr, max_update_with_lr], summarize=32)

      check_update_with_lr_nan = tf.Assert(tf.logical_not(tf.reduce_all(tf.is_nan(update_with_lr))), [param_name, 'NAN update_with_lr', update_with_lr])
      check_update_with_lr_inf = tf.Assert(tf.logical_not(tf.reduce_all(tf.is_inf(update_with_lr))), [param_name, 'INF update_with_lr', update_with_lr])
      check_values.append(check_update_with_lr_nan)
      check_values.append(check_update_with_lr_inf)

      next_param = param - update_with_lr

      check_next_param_nan = tf.Assert(tf.logical_not(tf.reduce_all(tf.is_nan(next_param))), [param_name, 'NAN next_param', next_param])
      check_next_param_inf = tf.Assert(tf.logical_not(tf.reduce_all(tf.is_inf(next_param))), [param_name, 'INF next_param', next_param])
      check_values.append(check_next_param_nan)
      check_values.append(check_next_param_inf)

      # Ensure that the debug operations are executed.
      for op in check_values:
        op.mark_used()

      '''
      assignments.extend(
          [param.assign(next_param),]
      )
      '''
      assignments.extend(
          [
           param.assign(next_param),
           m.assign(next_m),
           v.assign(next_v)
          ]
          )
      assignments.extend(check_values)

    return assignments
Ejemplo n.º 29
0
    def _build_graph(
        Npartitions,
        voc_size,
        batch_size,
        gamma_regularizer,
        reg2,
        optimizer_param,
        optimizer_type,
        init_std_dev=.05,
    ):

        graph = tf.Graph()
        with graph.as_default():

            chosen_index_1 = tf.placeholder(dtype=tf.int32, shape=(batch_size))
            chosen_index_2 = tf.placeholder(dtype=tf.int32, shape=(batch_size))
            is_corrections_pl = tf.placeholder_with_default(tf.ones(
                batch_size, dtype=tf.float32),
                                                            shape=(batch_size))

            learning_rate_pl = tf.placeholder(dtype=tf.float32)

            t_weights_free = tf.Variable(tf.truncated_normal(
                [Npartitions, Npartitions], mean=0., stddev=init_std_dev),
                                         dtype=tf.float32)
            t_weights_free_sym = t_weights_free + tf.transpose(t_weights_free)
            t_weights = tf.reshape(
                tf.nn.softmax(
                    tf.reshape(t_weights_free_sym,
                               [Npartitions * Npartitions])),
                [Npartitions, Npartitions])

            t_topics_free = tf.Variable(tf.truncated_normal(
                [Npartitions, voc_size], mean=0., stddev=init_std_dev),
                                        dtype=tf.float32)
            t_topics = tf.nn.softmax(t_topics_free)  #default axis is (-1)

            t_topics_free_pl = tf.placeholder(tf.float32,
                                              shape=[Npartitions, voc_size])
            t_weights_free_pl = tf.placeholder(
                tf.float32, shape=[Npartitions, Npartitions])

            t_weights_free_assign_op = tf.assign(t_weights_free,
                                                 t_weights_free_pl)
            t_topics_free_assign_op = tf.assign(t_topics_free,
                                                t_topics_free_pl)

            t_gamma = gamma_regularizer
            t_gamma2 = reg2

            pre_target = tf.log((tf.reduce_sum((tf.matmul(
                tf.expand_dims(
                    tf.transpose(tf.gather(t_topics, chosen_index_1, axis=1)),
                    -1),
                tf.expand_dims(
                    tf.transpose(tf.gather(t_topics, chosen_index_2, axis=1)),
                    1)) * t_weights),
                                               axis=[1, 2])))
            target = tf.reduce_mean(is_corrections_pl * tf.where(
                tf.is_nan(pre_target), tf.zeros_like(pre_target), pre_target
            )) + t_gamma * tf.reduce_sum(
                tf.diag_part(t_weights)) + t_gamma2 * tf.reduce_sum(
                    tf.diag_part(t_weights) / tf.reduce_sum(t_weights, axis=1))

            #now optimizer
            t_loss = -target

            #t_optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
            #t_optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum = .9)
            #t_optimizer = tf.train.AdagradOptimizer(learning_rate=learning_rate)

            if optimizer_type == 'adam':
                t_optimizer = tf.train.AdamOptimizer(
                    learning_rate=learning_rate_pl, **optimizer_param)
            elif optimizer_type == 'rmsprop':
                t_optimizer = tf.train.RMSPropOptimizer(
                    learning_rate=learning_rate_pl, **optimizer_param)
            else:
                raise ValueError('Unknown optimizer')

            opt_vars = t_optimizer.variables()
            opt_vars_pls = [
                tf.placeholder(dtype=v.dtype, shape=v.shape) for v in opt_vars
            ]
            opt_vars_assigns = [
                tf.assign(v, pl) for v, pl in zip(opt_vars, opt_vars_pls)
            ]

            t_train_op = t_optimizer.minimize(t_loss)

            t_tfinit = tf.global_variables_initializer()
            saver = tf.train.Saver(max_to_keep=2)

            t_loss_to_display = -(target - (t_gamma * tf.reduce_sum(
                tf.diag_part(t_weights)) + t_gamma2 * tf.reduce_sum(
                    tf.diag_part(t_weights) / tf.reduce_sum(t_weights, axis=1))
                                            ))

            return (graph, t_tfinit, t_loss_to_display, t_topics, t_train_op,
                    t_weights, chosen_index_1, chosen_index_2,
                    is_corrections_pl, saver, t_topics_free_pl,
                    t_weights_free_pl, t_weights_free_assign_op,
                    t_topics_free_assign_op, pre_target, learning_rate_pl,
                    t_weights_free, t_topics_free, opt_vars, opt_vars_pls,
                    opt_vars_assigns)
Ejemplo n.º 30
0
 def denan(x):
     return tf.where(tf.is_nan(x), tf.zeros_like(x), x)