def make_losses(self, pred_relevant, targets_preprocessed, objective_indices, objective_coeffs):
     # make a loss function and compute some summary numbers
     
     per_target_loss = my_ops.mse_ignore_nans(pred_relevant, targets_preprocessed, reduction_indices=0)
     loss = tf.reduce_sum(per_target_loss)
     
     # compute objective value, just for logging purposes
     # TODO add multiplication by the objective_coeffs (somehow not trivial)
     obj = tf.reduce_sum(self.postprocess_predictions(targets_preprocessed), 1)
     #obj = tf.sum(self.postprocess_predictions(targets_preprocessed[:,objective_indices]) * objective_coeffs[None,:], axis=1)
     obj_nonan = tf.where(tf.is_nan(obj), tf.zeros_like(obj), obj)
     num_valid_targets = tf.reduce_sum(1-tf.cast(tf.is_nan(obj), tf.float32))
     mean_obj = tf.reduce_sum(obj_nonan) / num_valid_targets
     
     # summaries
     obj_sum = tf.summary.scalar("objective_todo", mean_obj)
     #TODO
     per_target_loss_sums = []
     #per_target_loss_sums = [tf.summary.scalar(name, loss) for name,loss in zip(self.target_names,per_target_loss)]
     loss_sum = tf.summary.scalar("full loss", loss)
     
     #self.per_target_loss = tf.get_variable('avg_targets', [self.target_dim], initializer=tf.constant_initializer(value=0.))
     
     full_loss = loss
     errs_to_print = [loss]
     short_summary = [loss_sum]
     detailed_summary = per_target_loss_sums + [obj_sum]
     
     return full_loss, errs_to_print, short_summary, detailed_summary
Example #2
0
    def cut(self, hits, start, end):
        """
        Cuts [start:end] diapason from input data
        :param hits: hits timeseries
        :param start: start index
        :param end: end index
        :return: tuple (train_hits, test_hits, dow, lagged_hits)
        """
        # Pad hits to ensure we have enough array length for prediction
        hits = tf.concat([hits, tf.fill([self.predict_window], np.NaN)], axis=0)
        cropped_hit = hits[start:end]

        # cut day of week
        cropped_dow = self.inp.dow[start:end]

        # Cut lagged hits
        # gather() accepts only int32 indexes
        cropped_lags = tf.cast(self.inp.lagged_ix[start:end], tf.int32)
        # Mask for -1 (no data) lag indexes
        lag_mask = cropped_lags < 0
        # Convert -1 to 0 for gather(), it don't accept anything exotic
        cropped_lags = tf.maximum(cropped_lags, 0)
        # Translate lag indexes to hit values
        lagged_hit = tf.gather(hits, cropped_lags)
        # Convert masked (see above) or NaN lagged hits to zeros
        lag_zeros = tf.zeros_like(lagged_hit)
        lagged_hit = tf.where(lag_mask | tf.is_nan(lagged_hit), lag_zeros, lagged_hit)

        # Split for train and test
        x_hits, y_hits = tf.split(cropped_hit, [self.train_window, self.predict_window], axis=0)

        # Convert NaN to zero in for train data
        x_hits = tf.where(tf.is_nan(x_hits), tf.zeros_like(x_hits), x_hits)
        return x_hits, y_hits, cropped_dow, lagged_hit
Example #3
0
  def testUniformNans(self):
    a = 10.0
    b = [11.0, 100.0]
    uniform = uniform_lib.Uniform(low=a, high=b)

    no_nans = tf.constant(1.0)
    nans = tf.constant(0.0) / tf.constant(0.0)
    self.assertTrue(self.evaluate(tf.is_nan(nans)))
    with_nans = tf.stack([no_nans, nans])

    pdf = uniform.prob(with_nans)

    is_nan = self.evaluate(tf.is_nan(pdf))
    self.assertFalse(is_nan[0])
    self.assertTrue(is_nan[1])
Example #4
0
  def __call__(self,
               prediction_tensor,
               target_tensor,
               ignore_nan_targets=False,
               scope=None,
               **params):
    """Call the loss function.

    Args:
      prediction_tensor: an N-d tensor of shape [batch, anchors, ...]
        representing predicted quantities.
      target_tensor: an N-d tensor of shape [batch, anchors, ...] representing
        regression or classification targets.
      ignore_nan_targets: whether to ignore nan targets in the loss computation.
        E.g. can be used if the target tensor is missing groundtruth data that
        shouldn't be factored into the loss.
      scope: Op scope name. Defaults to 'Loss' if None.
      **params: Additional keyword arguments for specific implementations of
              the Loss.

    Returns:
      loss: a tensor representing the value of the loss function.
    """
    with tf.name_scope(scope, 'Loss',
                       [prediction_tensor, target_tensor, params]) as scope:
      if ignore_nan_targets:
        target_tensor = tf.where(tf.is_nan(target_tensor),
                                 prediction_tensor,
                                 target_tensor)
      return self._compute_loss(prediction_tensor, target_tensor, **params)
Example #5
0
  def testUniformNans(self):
    with self.test_session():
      a = 10.0
      b = [11.0, 100.0]
      uniform = tf.contrib.distributions.Uniform(a=a, b=b)

      no_nans = tf.constant(1.0)
      nans = tf.constant(0.0) / tf.constant(0.0)
      self.assertTrue(tf.is_nan(nans).eval())
      with_nans = tf.pack([no_nans, nans])

      pdf = uniform.pdf(with_nans)

      is_nan = tf.is_nan(pdf).eval()
      self.assertFalse(is_nan[0])
      self.assertTrue(is_nan[1])
Example #6
0
def filter_groundtruth_with_nan_box_coordinates(tensor_dict):
  """Filters out groundtruth with no bounding boxes.

  Args:
    tensor_dict: a dictionary of following groundtruth tensors -
      fields.InputDataFields.groundtruth_boxes
      fields.InputDataFields.groundtruth_classes
      fields.InputDataFields.groundtruth_confidences
      fields.InputDataFields.groundtruth_keypoints
      fields.InputDataFields.groundtruth_instance_masks
      fields.InputDataFields.groundtruth_is_crowd
      fields.InputDataFields.groundtruth_area
      fields.InputDataFields.groundtruth_label_types

  Returns:
    a dictionary of tensors containing only the groundtruth that have bounding
    boxes.
  """
  groundtruth_boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
  nan_indicator_vector = tf.greater(tf.reduce_sum(tf.to_int32(
      tf.is_nan(groundtruth_boxes)), reduction_indices=[1]), 0)
  valid_indicator_vector = tf.logical_not(nan_indicator_vector)
  valid_indices = tf.where(valid_indicator_vector)

  return retain_groundtruth(tensor_dict, valid_indices)
Example #7
0
    def NLL(self, y, lengths, pis, mus, sigmas, rho, es, eps=1e-8):
        sigma_1, sigma_2 = tf.split(sigmas, 2, axis=2)
        y_1, y_2, y_3 = tf.split(y, 3, axis=2)
        mu_1, mu_2 = tf.split(mus, 2, axis=2)

        norm = 1.0 / (2*np.pi*sigma_1*sigma_2 * tf.sqrt(1 - tf.square(rho)))
        Z = tf.square((y_1 - mu_1) / (sigma_1)) + \
            tf.square((y_2 - mu_2) / (sigma_2)) - \
            2*rho*(y_1 - mu_1)*(y_2 - mu_2) / (sigma_1*sigma_2)

        exp = -1.0*Z / (2*(1 - tf.square(rho)))
        gaussian_likelihoods = tf.exp(exp) * norm
        gmm_likelihood = tf.reduce_sum(pis * gaussian_likelihoods, 2)
        gmm_likelihood = tf.clip_by_value(gmm_likelihood, eps, np.inf)

        bernoulli_likelihood = tf.squeeze(tf.where(tf.equal(tf.ones_like(y_3), y_3), es, 1 - es))

        nll = -(tf.log(gmm_likelihood) + tf.log(bernoulli_likelihood))
        sequence_mask = tf.logical_and(
            tf.sequence_mask(lengths, maxlen=tf.shape(y)[1]),
            tf.logical_not(tf.is_nan(nll)),
        )
        nll = tf.where(sequence_mask, nll, tf.zeros_like(nll))
        num_valid = tf.reduce_sum(tf.cast(sequence_mask, tf.float32), axis=1)

        sequence_loss = tf.reduce_sum(nll, axis=1) / tf.maximum(num_valid, 1.0)
        element_loss = tf.reduce_sum(nll) / tf.maximum(tf.reduce_sum(num_valid), 1.0)
        return sequence_loss, element_loss
def kl_divergence(distribution_a, distribution_b,
                  allow_nan_stats=True, name=None):
  """Get the KL-divergence KL(distribution_a || distribution_b).

  If there is no KL method registered specifically for `type(distribution_a)`
  and `type(distribution_b)`, then the class hierarchies of these types are
  searched.

  If one KL method is registered between any pairs of classes in these two
  parent hierarchies, it is used.

  If more than one such registered method exists, the method whose registered
  classes have the shortest sum MRO paths to the input types is used.

  If more than one such shortest path exists, the first method
  identified in the search is used (favoring a shorter MRO distance to
  `type(distribution_a)`).

  Args:
    distribution_a: The first distribution.
    distribution_b: The second distribution.
    allow_nan_stats: Python `bool`, default `True`. When `True`,
      statistics (e.g., mean, mode, variance) use the value "`NaN`" to
      indicate the result is undefined. When `False`, an exception is raised
      if one or more of the statistic's batch members are undefined.
    name: Python `str` name prefixed to Ops created by this class.

  Returns:
    A Tensor with the batchwise KL-divergence between `distribution_a`
    and `distribution_b`.

  Raises:
    NotImplementedError: If no KL method is defined for distribution types
      of `distribution_a` and `distribution_b`.
  """
  kl_fn = _registered_kl(type(distribution_a), type(distribution_b))
  if kl_fn is None:
    # TODO(b/117098119): For backwards compatibility, we check TF's registry as
    # well. This typically happens when this function is called on a pair of
    # TF's distributions.
    with deprecation.silence():
      return tf.distributions.kl_divergence(distribution_a, distribution_b)

  with tf.name_scope("KullbackLeibler"):
    kl_t = kl_fn(distribution_a, distribution_b, name=name)
    if allow_nan_stats:
      return kl_t

    # Check KL for NaNs
    kl_t = tf.identity(kl_t, name="kl")

    with tf.control_dependencies([
        tf.Assert(
            tf.logical_not(
                tf.reduce_any(tf.is_nan(kl_t))),
            ["KL calculation between %s and %s returned NaN values "
             "(and was called with allow_nan_stats=False). Values:"
             % (distribution_a.name, distribution_b.name), kl_t])]):
      return tf.identity(kl_t, name="checked_kl")
Example #9
0
 def scale(self, x):
   """Scale x from -0.5 - 0.5 to 0 - 255."""
   x = tf.where(tf.is_nan(x), tf.ones_like(x), x)
   x = tf.where(tf.is_inf(x), tf.ones_like(x), x)
   x = tf.clip_by_value(x, -0.5, 0.5)
   x += 0.5
   x = x * 2**self.hparams.n_bits_x
   return tf.cast(tf.clip_by_value(x, 0, 255), dtype=tf.uint8)
Example #10
0
    def _build_mu_algorithm(self):
        """build dataflow graph for Multiplicative algorithm"""

        V, H, W = self.V, self.H, self.W
        rank = self.rank
        shape = V.get_shape()

        graph = tf.get_default_graph()

        #save W for calculating delta with the updated W
        W_old = tf.get_variable(name="W_old", shape=[shape[0], rank])
        save_W = W_old.assign(W)

        #Multiplicative updates
        with graph.control_dependencies([save_W]):
            #update operation for H
            Wt = tf.transpose(W)
            WV = tf.matmul(Wt, V)
            WWH = tf.matmul(tf.matmul(Wt, W), H)
            WV_WWH = WV / WWH
            #select op should be executed in CPU not in GPU
            with tf.device('/cpu:0'):
                #convert nan to zero
                WV_WWH = tf.select(tf.is_nan(WV_WWH),
                                    tf.zeros_like(WV_WWH),
                                    WV_WWH)
            H_new = H * WV_WWH
            update_H = H.assign(H_new)

        with graph.control_dependencies([save_W, update_H]):
            #update operation for W (after updating H)
            Ht = tf.transpose(H)
            VH = tf.matmul(V, Ht)
            WHH = tf.matmul(W, tf.matmul(H, Ht))
            VH_WHH = VH / WHH
            with tf.device('/cpu:0'):
                VH_WHH = tf.select(tf.is_nan(VH_WHH),
                                        tf.zeros_like(VH_WHH),
                                        VH_WHH)
            W_new = W * VH_WHH
            update_W = W.assign(W_new)

        self.delta = tf.reduce_sum(tf.abs(W_old - W))

        self.step = tf.group(save_W, update_H, update_W)
Example #11
0
def replace_nan_groundtruth_label_scores_with_ones(label_scores):
  """Replaces nan label scores with 1.0.

  Args:
    label_scores: a tensor containing object annoation label scores.

  Returns:
    a tensor where NaN label scores have been replaced by ones.
  """
  return tf.where(
      tf.is_nan(label_scores), tf.ones(tf.shape(label_scores)), label_scores)
Example #12
0
 def set_zero_on_high_global_norm(self, grad, grad_norm_threshold, global_norm_tag=None):
   """
   :param tf.Tensor grad:
   :param float grad_norm_threshold:
   :param str|None global_norm_tag:
   :rtype: tf.Tensor
   """
   norm = self.get_global_grad_norm(tag=global_norm_tag)
   # Also check nan/inf. Treat them as if we would have been over grad_norm_threshold.
   zero_cond = tf.logical_or(tf.is_nan(norm), tf.is_inf(norm))
   zero_cond = tf.logical_or(zero_cond, tf.greater(norm, grad_norm_threshold))
   return tf.where(zero_cond, tf.zeros_like(grad), grad)
Example #13
0
 def _prob(self, x):
   broadcasted_x = x * tf.ones(
       self.batch_shape_tensor(), dtype=x.dtype)
   return tf.where(
       tf.is_nan(broadcasted_x),
       broadcasted_x,
       tf.where(
           tf.logical_or(broadcasted_x < self.low,
                         # This > is only sound for continuous uniform
                         broadcasted_x > self.high),
           tf.zeros_like(broadcasted_x),
           tf.ones_like(broadcasted_x) / self.range()))
Example #14
0
 def _compare(self, x, use_gpu):
     np_finite, np_inf, np_nan = np.isfinite(x), np.isinf(x), np.isnan(x)
     with self.test_session(use_gpu=use_gpu) as sess:
         inx = tf.convert_to_tensor(x)
         ofinite, oinf, onan = tf.is_finite(inx), tf.is_inf(inx), tf.is_nan(inx)
         tf_finite, tf_inf, tf_nan = sess.run([ofinite, oinf, onan])
     self.assertAllEqual(np_inf, tf_inf)
     self.assertAllEqual(np_nan, tf_nan)
     self.assertAllEqual(np_finite, tf_finite)
     self.assertShapeEqual(np_inf, oinf)
     self.assertShapeEqual(np_nan, onan)
     self.assertShapeEqual(np_finite, ofinite)
Example #15
0
 def _get_cubic_root(self):
   """Get the cubic root."""
   # We have the equation x^2 D^2 + (1-x)^4 * C / h_min^2
   # where x = sqrt(mu).
   # We substitute x, which is sqrt(mu), with x = y + 1.
   # It gives y^3 + py = q
   # where p = (D^2 h_min^2)/(2*C) and q = -p.
   # We use the Vieta's substitution to compute the root.
   # There is only one real solution y (which is in [0, 1] ).
   # http://mathworld.wolfram.com/VietasSubstitution.html
   assert_array = [
       tf.Assert(
           tf.logical_not(tf.is_nan(self._dist_to_opt_avg)),
           [self._dist_to_opt_avg,]),
       tf.Assert(
           tf.logical_not(tf.is_nan(self._h_min)),
           [self._h_min,]),
       tf.Assert(
           tf.logical_not(tf.is_nan(self._grad_var)),
           [self._grad_var,]),
       tf.Assert(
           tf.logical_not(tf.is_inf(self._dist_to_opt_avg)),
           [self._dist_to_opt_avg,]),
       tf.Assert(
           tf.logical_not(tf.is_inf(self._h_min)),
           [self._h_min,]),
       tf.Assert(
           tf.logical_not(tf.is_inf(self._grad_var)),
           [self._grad_var,])
   ]
   with tf.control_dependencies(assert_array):
     p = self._dist_to_opt_avg**2 * self._h_min**2 / 2 / self._grad_var
     w3 = (-tf.sqrt(p**2 + 4.0 / 27.0 * p**3) - p) / 2.0
     w = tf.sign(w3) * tf.pow(tf.abs(w3), 1.0/3.0)
     y = w - p / 3.0 / w
     x = y + 1
   return x
Example #16
0
  def __call__(self,
               prediction_tensor,
               target_tensor,
               ignore_nan_targets=False,
               losses_mask=None,
               scope=None,
               **params):
    """Call the loss function.

    Args:
      prediction_tensor: an N-d tensor of shape [batch, anchors, ...]
        representing predicted quantities.
      target_tensor: an N-d tensor of shape [batch, anchors, ...] representing
        regression or classification targets.
      ignore_nan_targets: whether to ignore nan targets in the loss computation.
        E.g. can be used if the target tensor is missing groundtruth data that
        shouldn't be factored into the loss.
      losses_mask: A [batch] boolean tensor that indicates whether losses should
        be applied to individual images in the batch. For elements that
        are True, corresponding prediction, target, and weight tensors will be
        removed prior to loss computation. If None, no filtering will take place
        prior to loss computation.
      scope: Op scope name. Defaults to 'Loss' if None.
      **params: Additional keyword arguments for specific implementations of
              the Loss.

    Returns:
      loss: a tensor representing the value of the loss function.
    """
    with tf.name_scope(scope, 'Loss',
                       [prediction_tensor, target_tensor, params]) as scope:
      if ignore_nan_targets:
        target_tensor = tf.where(tf.is_nan(target_tensor),
                                 prediction_tensor,
                                 target_tensor)
      if losses_mask is not None:
        tensor_multiplier = self._get_loss_multiplier_for_tensor(
            prediction_tensor,
            losses_mask)
        prediction_tensor *= tensor_multiplier
        target_tensor *= tensor_multiplier

        if 'weights' in params:
          params['weights'] = tf.convert_to_tensor(params['weights'])
          weights_multiplier = self._get_loss_multiplier_for_tensor(
              params['weights'],
              losses_mask)
          params['weights'] *= weights_multiplier
      return self._compute_loss(prediction_tensor, target_tensor, **params)
    def __init__(self, batch_size, vocab_size, encoding_size, embedding_size,
                    num_glimpses = 8,
                    grad_norm_clip = 5.,
                    l2_reg_coef=1e-4,
                    session=tf.Session(),
                    name='AlternatingAttention'):
        """
        Creates an iterative alternating attention network as described in https://arxiv.org/abs/1606.02245
        """
        self._batch_size = batch_size
        self._vocab_size = vocab_size
        self._encode_size = encoding_size
        self._infer_size = 4 * encoding_size
        self._embedding_size = embedding_size
        self._num_glimpses = num_glimpses
        self._sess = session
        self._name = name

        self._build_placeholders()
        self._build_variables()

        # Regularization
        tf.contrib.layers.apply_regularization(tf.contrib.layers.l2_regularizer(l2_reg_coef), [self._embeddings])


        # Answer probability
        doc_attentions = self._inference(self._docs, self._queries)
        nans =  tf.reduce_sum(tf.to_float(tf.is_nan(doc_attentions)))

        self._doc_attentions = doc_attentions
        ans_mask = tf.to_float(tf.equal(tf.expand_dims(self._answers, -1), self._docs))
        P_a = tf.reduce_sum(ans_mask * doc_attentions, 1)
        loss_op = -tf.reduce_mean(tf.log(P_a + tf.constant(0.00001)))
        self._loss_op = loss_op

        # Optimizer and gradients
        with tf.name_scope("optimizer"):
            self._opt = tf.train.AdamOptimizer(learning_rate=self._learning_rate)
            grads_and_vars = self._opt.compute_gradients(loss_op)
            capped_grads_and_vars = [(tf.clip_by_norm(g, grad_norm_clip), v) for g,v in grads_and_vars]
            self._train_op = self._opt.apply_gradients(capped_grads_and_vars, global_step=self._global_step)

        tf.summary.scalar('loss', self._loss_op)
        tf.summary.scalar('learning_rate', self._learning_rate)
        tf.summary.histogram('answer_probability', P_a)
        self._summary_op = tf.summary.merge_all()

        self._sess.run(tf.global_variables_initializer())
  def check_grads(grads_and_vars):
    has_nan_ops = []
    amax_ops = []

    for grad, _ in grads_and_vars:
      if grad is not None:
        if isinstance(grad, tf.IndexedSlices):
          x = grad.values
        else:
          x = grad

        has_nan_ops.append(tf.reduce_any(tf.is_nan(x)))
        amax_ops.append(tf.reduce_max(tf.abs(x)))

    has_nan = tf.reduce_any(has_nan_ops)
    amax = tf.reduce_max(amax_ops)
    return has_nan, amax
  def def_preprocessing_fn(inputs):
    """tf.transform's callback function for preprocessing inputs.

    Args:
      inputs: map from feature keys to raw not-yet-transformed features.

    Returns:
      Map from string feature key to transformed feature operations.
    """
    outputs = {}
    for key in taxi.DENSE_FLOAT_FEATURE_KEYS:
      # Preserve this feature as a dense float, setting nan's to the mean.
      outputs[taxi.transformed_name(key)] = transform.scale_to_z_score(
          _fill_in_missing(inputs[key]))

    for key in taxi.VOCAB_FEATURE_KEYS:
      # Build a vocabulary for this feature.
      outputs[
          taxi.transformed_name(key)] = transform.compute_and_apply_vocabulary(
              _fill_in_missing(inputs[key]),
              top_k=taxi.VOCAB_SIZE,
              num_oov_buckets=taxi.OOV_SIZE)

    for key in taxi.BUCKET_FEATURE_KEYS:
      outputs[taxi.transformed_name(key)] = transform.bucketize(
          _fill_in_missing(inputs[key]), taxi.FEATURE_BUCKET_COUNT)

    for key in taxi.CATEGORICAL_FEATURE_KEYS:
      outputs[taxi.transformed_name(key)] = _fill_in_missing(inputs[key])

    # Was this passenger a big tipper?
    taxi_fare = _fill_in_missing(inputs[taxi.FARE_KEY])
    tips = _fill_in_missing(inputs[taxi.LABEL_KEY])
    outputs[taxi.transformed_name(taxi.LABEL_KEY)] = tf.where(
        tf.is_nan(taxi_fare),
        tf.cast(tf.zeros_like(taxi_fare), tf.int64),
        # Test if the tip was > 20% of the fare.
        tf.cast(
            tf.greater(tips, tf.multiply(taxi_fare, tf.constant(0.2))),
            tf.int64))

    return outputs
Example #20
0
 def _arccosine(self, slist1, slist2, tf_embs):
     """
     Uses an arccosine kernel of degree 0 to calculate
     the similarity matrix between two vectors of embeddings. 
     This is just cosine similarity projected into the [0,1] interval.
     """
     dot = self._dot(slist1, slist2, tf_embs)
     # This calculation corresponds to an arc-cosine with 
     # degree 0. It can be interpreted as cosine
     # similarity but projected into a [0,1] interval.
     # TODO: arc-cosine with degree 1.
     tf_pi = tf.constant(np.pi, dtype=tf.float64)
     tf_norms = tf.constant(self.norms, dtype=tf.float64, name='norms')
     normlist1 = tf.gather(tf_norms, slist1, name='normlist1')
     normlist2 = tf.matrix_transpose(tf.gather(tf_norms, slist2, name='normlist2'))
     norms = tf.batch_matmul(normlist1, normlist2)
     cosine = tf.clip_by_value(tf.truediv(dot, norms), -1, 1)
     angle = tf.acos(cosine)
     angle = tf.select(tf.is_nan(angle), tf.ones_like(angle) * tf_pi, angle)
     return 1 - (angle / tf_pi)
Example #21
0
def mse(outputs, targets):
    """
    Compute Mean Squared Error between given outputs and targets.

    If any values in ``targets`` are ``nan``, that will be treated as
    zero error for those elements.

    Parameters
    ----------
    outputs : ``tf.Tensor``
        Output values from a Probe in a network.
    targets : ``tf.Tensor``
        Target values for a Probe in a network.

    Returns
    -------
    mse : ``tf.Tensor``
        Tensor representing the mean squared error.
    """

    targets = tf.where(tf.is_nan(targets), outputs, targets)
    return tf.reduce_mean(tf.square(targets - outputs))
Example #22
0
def save_model(sess, net, is_training, keep_prob):

    input_placeholder = tf.placeholder(tf.uint8, name='input_placeholder', shape=[None, SP1_BOX[0], SP1_BOX[1], SP1_BOX[2]])
    input_32 = tf.cast(input_placeholder, tf.float32)

    mean, var = tf.nn.moments(input_32, [1], keep_dims=True) #single image normalization
    test_batch = tf.div(tf.subtract(input_32, mean), tf.sqrt(var))
    test_batch = tf.where(tf.is_nan(test_batch), tf.zeros_like(test_batch), test_batch)
    test_batch = tf.nn.avg_pool(test_batch, 
                            ksize=[1, SP1_BOX[0]/SP2_BOX[0], SP1_BOX[1]/SP2_BOX[1], 1],
                            strides=[1, SP1_BOX[0]/SP2_BOX[0], SP1_BOX[1]/SP2_BOX[1], 1],
                            padding='SAME')
    if args.scheme == 'GBTC':
        test_batch  = test_batch * 2 #trained on 4chan

    global_step = tf.get_variable('global_step', [],
                                  initializer=tf.constant_initializer(0),
                                  trainable=False)
    if FLAGS.relu_input == 'relu':
        test_batch = tf.nn.relu(test_batch) 
    elif FLAGS.relu_input == 'lrelu':
        test_batch = lrelu(test_batch, alpha=0.2)
    logits = net.inference(test_batch)
    predictions = tf.nn.softmax(logits, name='output')

    init = tf.global_variables_initializer()
    sess.run(init)
    #import IPython; IPython.embed()
    saver = tf.train.Saver(tf.global_variables())
    latest = tf.train.latest_checkpoint(FLAGS.train_dir)
    if not latest:
        print("No checkpoint to continue from in", FLAGS.train_dir)
        sys.exit(1)
    print("resume", latest)
    saver.restore(sess, latest)
    checkpoint_path = os.path.join(FLAGS.train_dir, 'model_with_preprocessing.ckpt')
    saver.save(sess, checkpoint_path, global_step=global_step)
    return
Example #23
0
 def _arccosine(self, s1, s2, tf_embs):
     """
     Uses an arccosine kernel of degree 0 to calculate
     the similarity matrix between two vectors of embeddings. 
     This is just cosine similarity projected into the [0,1] interval.
     """
     tf_pi = tf.constant(np.pi, dtype=tf.float64)
     mat1 = tf.gather(tf_embs, s1)
     mat2 = tf.gather(tf_embs, s2)
     tf_norms = tf.constant(self.norms, dtype=tf.float64, name='norms')
     norms1 = tf.gather(tf_norms, s1)
     norms2 = tf.gather(tf_norms, s2)
     dot = tf.matmul(mat1, tf.transpose(mat2))
     norms = tf.matmul(norms1, tf.transpose(norms2))
     # We clip values due to numerical errors
     # which put some values outside the arccosine range.
     cosine = tf.clip_by_value(dot / norms, -1, 1)
     angle = tf.acos(cosine)
     # The 0 vector has norm 0, which generates a NaN.
     # We catch these NaNs and replace them with pi,
     # which ends up returning 0 similarity.
     angle = tf.select(tf.is_nan(angle), tf.ones_like(angle) * tf_pi, angle)
     return 1 - (angle / tf_pi)
Example #24
0
def create_model(inputs, targets, target_masks, iter_num):
    """Create the full model for training/testing
    """
    out_channels = int(targets.get_shape()[-1])
    assert out_channels == int(inputs.get_shape()[-1])

    outputs, iters, img_shape, max_iter = IterGAN(inputs,
                                                  out_channels,
                                                  iter_num,
                                                  name='')

    rest = {}

    if a.sample_lambda > 0.0:
        with tf.name_scope('sample_steps'):
            i = tf.random_uniform((), maxval=max_iter, dtype=tf.int32)
            j = tf.random_uniform((), maxval=2, dtype=tf.int32)
            real_imgs = tf.stack([inputs, targets], name='real_imgs')
            d = IMG_SHAPE[1]
            with tf.name_scope('sample_fake'):
                sample_fake = iters[:, :, (i + 1) * d:(i + 2) * d, :]
                sample_fake.set_shape(img_shape)
            sample_real = real_imgs[j]
            with tf.variable_scope('disciminator_sample'):
                predict_sample_real = create_discriminator(sample_real)
            with tf.variable_scope('disciminator_sample', reuse=True):
                predict_sample_fake = create_discriminator(sample_fake)
            rest['sample'] = {
                'i': i,
                'j': j,
                'predict_real': predict_sample_real,
                'predict_fake': predict_sample_fake,
                'real_inp': sample_real,
                'fake_inp': sample_fake
            }

    # create two copies of discriminator, one for real and one for fake pairs
    # they share the same underlying variables
    with tf.name_scope('real_discriminator'):
        with tf.variable_scope('discriminator'):
            # 2x [batch, height, width, channels] => [batch, 30, 30, 1]
            predict_real = create_discriminator(inputs, targets)

    with tf.name_scope('fake_discriminator'):
        with tf.variable_scope('discriminator', reuse=True):
            # 2x [batch, height, width, channels] => [batch, 30, 30, 1]
            predict_fake = create_discriminator(inputs, outputs)

    with tf.name_scope('discriminator_loss'):
        # minimizing -tf.log will try to get inputs to 1
        # predict_real => 1
        # predict_fake => 0
        discrim_loss = tf.reduce_mean(-(tf.log(predict_real + EPS) +
                                        tf.log(1 - predict_fake + EPS)))
        if a.sample_lambda > 0.0:
            discim_loss = discrim_loss + a.sample_lambda * \
                tf.reduce_mean(-(tf.log(predict_sample_real + EPS) +
                               tf.log(1 - predict_sample_fake + EPS)))

    with tf.name_scope('generator_loss'):
        # predict_fake => 1
        # abs(targets - outputs) => 0
        gen_loss_GAN = tf.reduce_mean(-tf.log(predict_fake + EPS))
        if a.mmad_loss:
            with tf.name_scope('MMAD'):
                dif = tf.abs(targets - outputs, name='absdist')
                temp = tf.reduce_mean(dif)
                foreground_L1 = tf.reduce_mean(tf.boolean_mask(
                    dif, target_masks),
                                               name='foreground')
                neg_target_masks = tf.logical_not(target_masks, name='neg')
                background_L1 = tf.reduce_mean(tf.boolean_mask(
                    dif, neg_target_masks),
                                               name='background')
                gen_loss_L1 = 2 * foreground_L1 / 3 + background_L1 / 3
                gen_loss_L1 = tf.where(tf.is_nan(gen_loss_L1), temp,
                                       gen_loss_L1)
        else:
            gen_loss_L1 = tf.reduce_mean(tf.abs(targets - outputs))
        gen_loss = gen_loss_GAN * a.gan_weight + gen_loss_L1 * a.l1_weight
        if a.sample_lambda > 0.0:
            gen_loss = gen_loss + a.sample_lambda * \
                tf.reduce_mean(-tf.log(predict_sample_fake + EPS))

    global_step = tf.contrib.framework.get_or_create_global_step()
    incr_global_step = tf.assign(global_step, global_step + 1)

    if a.mode in {'train'}:
        with tf.name_scope('discriminator_train'):
            discrim_tvars = [
                var for var in tf.trainable_variables()
                if var.name.startswith('discriminator')
            ]
            discrim_optim = tf.train.AdamOptimizer(a.lr, a.beta1)
            discrim_grads_and_vars = discrim_optim.compute_gradients(
                discrim_loss, var_list=discrim_tvars)
            discrim_train = discrim_optim.apply_gradients(
                discrim_grads_and_vars)

        with tf.name_scope('generator_train'):
            with tf.control_dependencies([discrim_train]):
                gen_tvars = [
                    var for var in tf.trainable_variables()
                    if var.name.startswith('generator')
                ]
                gen_optim = tf.train.AdamOptimizer(a.lr, a.beta1)
                gen_grads_and_vars = gen_optim.compute_gradients(
                    gen_loss, var_list=gen_tvars)
                gen_train = gen_optim.apply_gradients(gen_grads_and_vars)

        ema = tf.train.ExponentialMovingAverage(decay=0.99)
        update_losses = ema.apply([discrim_loss, gen_loss_GAN, gen_loss_L1])

        return Model(predict_real=predict_real,
                     predict_fake=predict_fake,
                     discrim_loss=ema.average(discrim_loss),
                     discrim_grads_and_vars=discrim_grads_and_vars,
                     gen_loss_GAN=ema.average(gen_loss_GAN),
                     gen_loss_L1=ema.average(gen_loss_L1),
                     gen_grads_and_vars=gen_grads_and_vars,
                     outputs=outputs,
                     iters=tf.concat(iters, axis=2, name='between_steps'),
                     train=tf.group(update_losses, incr_global_step,
                                    gen_train),
                     rest=rest)

    else:
        return Model(predict_real=predict_real,
                     predict_fake=predict_fake,
                     discrim_loss=discrim_loss,
                     discrim_grads_and_vars=tf.constant(0),
                     gen_loss_GAN=gen_loss_GAN,
                     gen_loss_L1=gen_loss_L1,
                     gen_grads_and_vars=tf.constant(0),
                     outputs=outputs,
                     iters=tf.concat(iters, axis=2, name='between_steps'),
                     train=tf.constant(0),
                     rest=rest)
Example #25
0
def main():
    args = get_arguments()
    
    if args.dataset == 'ade20k':
        param = ADE20k_param
    elif args.dataset == 'cityscapes':
        param = cityscapes_param
    else:
        param = surreal_param

    # Set placeholder
    image_filename = tf.placeholder(dtype=tf.string)
    anno_filename = tf.placeholder(dtype=tf.string)

    # Read & Decode image
    img = tf.image.decode_image(tf.read_file(image_filename), channels=3)
    anno = tf.image.decode_image(tf.read_file(anno_filename), channels=1)
    img.set_shape([None, None, 3])
    anno.set_shape([None, None, 1])

    ori_shape = tf.shape(img)
    img = preprocess(img, param)

    model = model_config[args.model]
    net = model({'data': img}, num_classes=param['num_classes'], 
                    filter_scale=args.filter_scale, evaluation=True)

    # Predictions.
    raw_output = net.layers['conv6_cls']

    raw_output_up = tf.image.resize_bilinear(raw_output, size=ori_shape[:2], align_corners=True)
    raw_output_up = tf.argmax(raw_output_up, axis=3)
    raw_pred = tf.expand_dims(raw_output_up, dim=3)

    # mIoU
    pred_flatten = tf.reshape(raw_pred, [-1,])
    raw_gt = tf.reshape(anno, [-1,])

    mask = tf.not_equal(raw_gt, param['ignore_label'])
    indices = tf.squeeze(tf.where(mask), 1)
    gt = tf.cast(tf.gather(raw_gt, indices), tf.int32)
    pred = tf.gather(pred_flatten, indices)

    
#I do not know which one I have to choose
    if args.dataset == 'ade20k':
        pred = tf.add(pred, tf.constant(1, dtype=tf.int64))
        mIoU, update_op = tf.contrib.metrics.streaming_mean_iou(pred, gt, num_classes=param['num_classes']+1)
    elif args.dataset == 'cityscapes':
        mIoU, update_op = tf.contrib.metrics.streaming_mean_iou(pred, gt, num_classes=param['num_classes'])
    elif args.dataset == 'surreal':
        less_equal_class = tf.less_equal(raw_gt, param['num_classes']-1)
        not_equal_ignore = tf.not_equal(raw_gt, param['ignore_label'])
        mask = tf.logical_and(less_equal_class, not_equal_ignore)
        indices = tf.squeeze(tf.where(mask), 1)
        gt = tf.cast(tf.gather(raw_gt, indices), tf.int64)
        pred = tf.cast(tf.gather(pred_flatten, indices),tf.int64)
        gt_n = tf.reshape(gt, [-1])
        pred_n = tf.reshape(pred, [-1])
        with tf.name_scope('metrics'):
            mIoU, update_op = tf.metrics.mean_iou(gt_n,pred_n, num_classes=param['num_classes'])
            accu, update_acc = tf.metrics.accuracy(gt_n,pred_n)
            reca, update_rec = tf.metrics.recall(gt_n,pred_n)
            prec, update_pre = tf.metrics.precision(gt_n,pred_n)
            mean, update_mean = tf.metrics.mean_per_class_accuracy(gt_n,pred_n, num_classes =param['num_classes'])
            conf_matrix = tf.confusion_matrix(gt_n,pred_n, num_classes=param['num_classes'])
            acc_per_class = tf.diag_part(conf_matrix)/tf.reduce_sum(conf_matrix,1)
            acc_per_class_good = tf.where(tf.is_nan(acc_per_class), tf.zeros_like(acc_per_class), acc_per_class)
    running_vars = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="metrics")
        
    # Set up tf session and initialize variables.
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    init = tf.global_variables_initializer()
    running_vars_initializer = tf.variables_initializer(var_list=running_vars)
    
    sess.run(init)
    sess.run(running_vars_initializer)


    listy = []
#tiene pinta que lo que tenia que estar en snapshots en train ahora va a model
    model_path = model_paths[args.model]
    if args.model == 'others':
        ckpt = tf.train.get_checkpoint_state(model_path)
        if ckpt and ckpt.model_checkpoint_path:
            loader = tf.train.Saver(var_list=tf.global_variables())
            load_step = int(os.path.basename(ckpt.model_checkpoint_path).split('-')[1])
            load(loader, sess, ckpt.model_checkpoint_path)
        else:
            print('No checkpoint file found.')
    else:
        #net.load(model_path, sess)
        print('Restore from {}'.format(model_path))

    img_files, anno_files = read_labeled_image_list(param['data_dir'], param['data_list'])
    for i in trange(param['num_steps'], desc='evaluation', leave=True):
        feed_dict = {image_filename: img_files[i], anno_filename: anno_files[i]}
        _ = sess.run([update_op,update_acc,update_rec,update_pre, update_mean], feed_dict=feed_dict)
        m,a,r,p,ma,apc = sess.run([mIoU,accu,reca,prec, mean,acc_per_class_good], feed_dict=feed_dict)
        f = 2*p*r/(p+r)
        metris = np.array([m,a,f,r,p])
        metris = np.append(metris, ma)
        metris = np.append(metris, apc)
        listy.append(metris) 
        
        
        if i > 0 and args.measure_time:
            calculate_time(sess, net, raw_pred, feed_dict)
    
    ll = np.mean(np.array(listy), axis = 0)
    np.save("./loss_data/loss_metrics.npy",ll)
    print('MIOU: {}'.format(m))
Example #26
0
    def lrp_one_timestep(self, r_incoming, t):
        """lrp applied to TeLL LSTMLayer for 1 timestep
        
        Parameters
        -------
        r_incoming : tensor (batchsize, 1, units)
            relevance coming in (flowing in from upper layer/future timestep)
        t : tensor
            int tensor with current timestep (as to be used to index o, c, i, z)
        """
        zero = self.__zero__
        zero_init = self.__zero_init__
        
        alpha, beta = self._alpha_, self._beta_
        mul_rule = self._mul_rule_
        
        act_h = self._act_h_
        w_o, w_i = self._w_o_, self._w_i_
        o_min, i_min, c_min, z_min = self._o_min_, self._i_min_, self._c_min_, self._z_min_
        o_max, i_max, c_max, z_max = self._o_max_, self._i_max_, self._c_max_, self._z_max_
        o, c, i, z = self._o_, self._c_, self._i_, self._z_
        
        lrp_keys = self._lrp_keys_
        lrp_dict = self._lrp_dict_
        r_z, r_from_o, r_from_i, r_cc, r_y, r_cy, r_o, r_c, r_zi, r_i = [lrp_dict[k] for k in lrp_keys]
        
        #
        # for time t
        #
        if mul_rule is None:
            r_y = tf.concat([r_y, tf.expand_dims(r_incoming[:, -1, :], axis=1)], axis=1)
            r_cy = tf.concat([r_cy, tf.expand_dims(r_y[:, -1, :], axis=1)], axis=1)
            r_o = tf.concat([r_o, zero_init], axis=1)
    
            r_c = tf.concat([r_c, tf.expand_dims(r_cy[:, -1, :] + r_cc[:, -1, :], axis=1)], axis=1)
    
            r_zi_new = tf.expand_dims(r_c[:, -1, :] * (i[:, t, :] * z[:, t, :] / c[:, t, :]), axis=1)
            r_zi = tf.concat([r_zi, tf.where(tf.is_nan(r_zi_new), zero_init, r_zi_new)], axis=1)
    
            r_z = tf.concat([r_z, tf.expand_dims(r_zi[:, -1, :], axis=1)], axis=1)
            r_i = tf.concat([r_i, zero_init], axis=1)

        else:
            r_y = tf.concat([r_y, tf.expand_dims(r_incoming[:, -1, :] + r_from_o[:, -1, :] + r_from_i[:, -1, :],
                                                 axis=1)], axis=1)
            r_cy = tf.concat([r_cy, tf.expand_dims(mul_rule(act_h(c[:, t, :]), o[:, t, :], r_y[:, -1, :],
                                                            c_min, c_max, o_min, o_max), axis=1)], axis=1)
            r_o = tf.concat([r_o, tf.expand_dims(mul_rule(o[:, t, :], act_h(c[:, t, :]), r_y[:, -1, :],
                                                          o_min, o_max, c_min, c_max), axis=1)], axis=1)
    
            r_c = tf.concat([r_c, tf.expand_dims(r_cy[:, -1, :] + r_cc[:, -1, :], axis=1)], axis=1)
    
            r_zi_new = tf.expand_dims(r_c[:, -1, :] * (i[:, t, :] * z[:, t, :] / c[:, t, :]), axis=1)
            r_zi = tf.concat([r_zi, tf.where(tf.is_nan(r_zi_new), zero_init, r_zi_new)], axis=1)
    
            r_z = tf.concat([r_z, tf.expand_dims(mul_rule(z[:, t, :], i[:, t, :], r_zi[:, -1, :],
                                                          z_min, z_max, i_min, i_max), axis=1)], axis=1)
            r_i = tf.concat([r_i, tf.expand_dims(mul_rule(i[:, t, :], z[:, t, :], r_zi[:, -1, :],
                                                          i_min, i_max, z_min, z_max), axis=1)], axis=1)
        
        #
        # distribute R to units through recurrent connections
        #
        t_greater_0 = tf.greater(t, zero)
        r_from_o_t = lrp(r=r_o[:, -1, :], w=w_o, x=o[:, t - 1, :], x_min=o_min, alpha=alpha, beta=beta)
        r_from_o = tf.cond(t_greater_0,
                           lambda: tf.concat([r_from_o, tf.expand_dims(r_from_o_t, axis=1)], axis=1),
                           lambda: r_from_o)

        r_from_i_t = lrp(r=r_i[:, -1, :], w=w_i, x=i[:, t - 1, :], x_min=i_min, alpha=alpha, beta=beta)
        r_from_i = tf.cond(t_greater_0,
                           lambda: tf.concat([r_from_i, tf.expand_dims(r_from_i_t, axis=1)], axis=1),
                           lambda: r_from_i)
        
        #
        # for time t-1
        #
        r_cc_new = tf.expand_dims(c[:, t - 1, :] / c[:, t, :] * r_c[:, -1, :], axis=1)
        r_cc = tf.cond(t_greater_0,
                       lambda: tf.concat([r_cc, tf.where(tf.is_nan(r_cc_new), zero_init, r_cc_new)], axis=1),
                       lambda: r_cc)
        
        self._lrp_dict_ = OrderedDict(((k, v) for k, v in
                                       zip(lrp_keys, [r_z, r_from_o, r_from_i, r_cc, r_y, r_cy, r_o, r_c, r_zi, r_i])))
Example #27
0
def proportional_multiplication_rule(x, y, z, x_min, x_max, y_min, y_max):
    
    r = z * ((x-x_min) / (x_max - x_min)) / (((x-x_min) / (x_max - x_min)) + ((y-y_min) / (y_max - y_min)))
    return tf.where(tf.is_nan(r), tf.zeros_like(r), r)
Example #28
0
def mse_ignore_nans(preds, targets, **kwargs):
    #Computes mse, ignores targets which are NANs
    
    # replace nans in the target with corresponding preds, so that there is no gradient for those
    targets_nonan = tf.where(tf.is_nan(targets), preds, targets)
    return tf.reduce_mean(tf.square(targets_nonan - preds), **kwargs)
Example #29
0
    def __init__(
        self,
        features,
        params={
            'prior_mean_hidden_layer': -1e-5,
            'prior_stddev_hidden_layer': 1e-6,
            'prior_stddev_outer_layer': 1e-8
        }):
        self.features = features
        # Inputs to the tensorflow graph. X will be our phi(S, A), Y will be our reward
        self.X = tf.placeholder(tf.float32, [None, features])
        self.Y = tf.placeholder(tf.float32, [None, 1])
        self.hidden_layer_mean = params['prior_mean_hidden_layer']
        self.hidden_layer_stddev = params['prior_stddev_hidden_layer']
        self.outer_layer_sttdev = params['prior_stddev_outer_layer']

        # Should be expandable to a deep network by adding more layers
        # Can add dense flipout layers for fully bayesian or could add simple dense or convolutional layers
        # to project into a smaller feature space before doing full distributions (would be more computationally efficient)
        self.layers = tf.keras.Sequential([
            tfp.layers.DenseFlipout(
                # one output for estimating the reward
                1,
                # the _prior_ distribution over our weights (even though it says posterior, it is the prior in the bayes rule sense)
                # this creates a vector of learnable independent normal distributions
                kernel_posterior_fn=tfp_layers_util.
                default_mean_field_normal_fn(
                    # initialize the mean of the normal distributions randomly so that the means are slightly negative (pessimistic init)
                    loc_initializer=tf.random_normal_initializer(
                        mean=self.hidden_layer_mean,
                        stddev=self.hidden_layer_stddev
                    )  # prior mean and stddev of nodes in hidden layer
                ),
                # regularize our weights by pulling them towards a N(0, 1e-8) distribution
                # cannot have a N(0, 0) distribution, so pull them towards something with no variance
                kernel_prior_fn=KernelPrior(self.outer_layer_sttdev).
                output,  # prior stddev over y's (outputs, in our case th rewards)
                # Don't use a bias weight here
                bias_posterior_fn=
                None,  # set to None to keep everything local (local variance over all features)
            )
        ])

        # make predictions by sampling weights from the posterior and multiplying phi(S, A)
        self.predictions = self.layers(self.X)
        # model the variance of the noise on Y with a learnable normal distribution
        std = VariationalParameter('noise_std', [1])
        # build the distribution over Y ~ N(W*phi(S, A), std)
        pred_dist = tfd.Normal(loc=self.predictions, scale=std.sample())

        # Build the loss function
        # get the log probability of observing this value of Y given our parameters: P(Y | theta)
        log_prob = pred_dist.log_prob(self.Y)
        # make sure this log probability isn't nan (bug in tensorflow when variance approaches 0. if it is nan, just set it to zero)
        non_nan = tf.where(tf.is_nan(log_prob), tf.zeros_like(log_prob),
                           log_prob)
        # get the mean over the outputs (only 1 output for now so this isn't really necessary, but it is good to be generic)
        neg_log_prob = -tf.reduce_mean(non_nan)
        # The KL-divergence is what trains the variance over the weights, the neg_log_prob is the loss over the mean
        # The KL-divergence is added as a "regularizer" to the layers as a hack to make this work with the tensorflow infrastructure (that's how tfp works)
        kl_div = sum(self.layers.losses)
        # the ELBO loss is just the sum of the loss over the variance (kl-div) and the loss over the mean (neg_log_prob)
        elbo_loss = neg_log_prob + kl_div

        # minimize the loss using some optimizer (adam with small learning rate seems to work well)
        optimizer = tf.train.AdamOptimizer(0.01)
        self.train = optimizer.minimize(elbo_loss)

        # initialize the tensorflow graph and get initial values of the weights
        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())
        init_op.run()
    def model_fn(self, features, labels, mode):
        """Auto-Scaling 3D CNN model.

    For more information on how to write a model function, see:
      https://www.tensorflow.org/guide/custom_estimators#write_a_model_function
    """
        input_layer = features

        # Replace missing values by 0
        hidden_layer = tf.where(tf.is_nan(input_layer),
                                tf.zeros_like(input_layer), input_layer)

        # Repeatedly apply 3D CNN, followed by 3D max pooling
        # until the hidden layer has reasonable number of entries
        REASONABLE_NUM_ENTRIES = 1000
        num_filters = 16  # The number of filters is fixed
        while True:
            shape = hidden_layer.shape
            kernel_size = [
                min(3, shape[1]),
                min(3, shape[2]),
                min(3, shape[3])
            ]
            hidden_layer = tf.layers.conv3d(inputs=hidden_layer,
                                            filters=num_filters,
                                            kernel_size=kernel_size)
            pool_size = [min(2, shape[1]), min(2, shape[2]), min(2, shape[3])]
            hidden_layer = tf.layers.max_pooling3d(inputs=hidden_layer,
                                                   pool_size=pool_size,
                                                   strides=pool_size,
                                                   padding='valid',
                                                   data_format='channels_last')
            if get_num_entries(hidden_layer) < REASONABLE_NUM_ENTRIES:
                break

        hidden_layer = tf.layers.flatten(hidden_layer)
        hidden_layer = tf.layers.dense(inputs=hidden_layer,
                                       units=64,
                                       activation=tf.nn.relu)
        hidden_layer = tf.layers.dropout(
            inputs=hidden_layer,
            rate=0.15,
            training=mode == tf.estimator.ModeKeys.TRAIN)

        logits = tf.layers.dense(inputs=hidden_layer, units=self.output_dim)
        sigmoid_tensor = tf.nn.sigmoid(logits, name="sigmoid_tensor")

        predictions = {
            # Generate predictions (for PREDICT and EVAL mode)
            "classes": tf.argmax(input=logits, axis=1),
            # "classes": binary_predictions,
            # Add `sigmoid_tensor` to the graph. It is used for PREDICT and by the
            # `logging_hook`.
            "probabilities": sigmoid_tensor
        }

        if mode == tf.estimator.ModeKeys.PREDICT:
            return tf.estimator.EstimatorSpec(mode=mode,
                                              predictions=predictions)

        # Calculate Loss (for both TRAIN and EVAL modes)
        # For multi-label classification, a correct loss is sigmoid cross entropy
        loss = sigmoid_cross_entropy_with_logits(labels=labels, logits=logits)

        # Configure the Training Op (for TRAIN mode)
        if mode == tf.estimator.ModeKeys.TRAIN:
            optimizer = tf.train.AdamOptimizer()
            train_op = optimizer.minimize(
                loss=loss, global_step=tf.train.get_global_step())
            return tf.estimator.EstimatorSpec(mode=mode,
                                              loss=loss,
                                              train_op=train_op)

        # Add evaluation metrics (for EVAL mode)
        assert mode == tf.estimator.ModeKeys.EVAL
        eval_metric_ops = {
            "accuracy":
            tf.metrics.accuracy(labels=labels,
                                predictions=predictions["classes"])
        }
        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=loss,
                                          eval_metric_ops=eval_metric_ops)
Example #31
0
  def __init__(self,is_training,params):
    self.batch_size = batch_size = params["batch_size"]
    self.num_steps = num_steps = params["seq_length"]
    self._Y_vals=[]
    size = params['n_hidden']
    input_size = params['input_size']
    keep_prob=params['keep_prob']
    max_grad_norm=params['max_grad_norm']

    self._input_data = tf.placeholder(tf.float32, [batch_size, num_steps,input_size])
    self._targets = tf.placeholder(tf.float32, [batch_size*num_steps,params["n_output"]])
    self._zeros=tf.zeros([batch_size*num_steps,params["n_output"]],tf.float32)

    lstm_cell = tf.nn.rnn_cell.LSTMCell(num_units=size,input_size=input_size)
    if is_training and keep_prob < 1:
      lstm_cell = tf.nn.rnn_cell.DropoutWrapper(
          lstm_cell, output_keep_prob=keep_prob)

    lstm_cell2 = tf.nn.rnn_cell.LSTMCell(num_units=size,input_size=size)
    if is_training and keep_prob < 1:
      lstm_cell2 = tf.nn.rnn_cell.DropoutWrapper(
          lstm_cell2, output_keep_prob=keep_prob)

    lstm_cell3 = tf.nn.rnn_cell.LSTMCell(num_units=size,input_size=size)
    if is_training and keep_prob < 1:
      lstm_cell3 = tf.nn.rnn_cell.DropoutWrapper(
          lstm_cell3, output_keep_prob=keep_prob)


    cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell,lstm_cell2,lstm_cell3])

    self._initial_state = cell.zero_state(batch_size, tf.float32)

    if is_training and keep_prob < 1:
       self._input_data = tf.nn.dropout(self._input_data, keep_prob)

    outputs = []
    state = self._initial_state
    with tf.variable_scope("RNN"):
      for time_step in range(num_steps):
        if time_step > 0: tf.get_variable_scope().reuse_variables()
        (cell_output, state) = cell(self._input_data[:,time_step,:], state)
        outputs.append(cell_output)



    output = tf.reshape(tf.concat(1, outputs), [-1, size])
    softmax_w = tf.get_variable("softmax_w", [size, params["n_output"]])
    softmax_b = tf.get_variable("softmax_b", [params["n_output"]])
    self._Y_vals = tf.tanh(tf.matmul(output, softmax_w) + softmax_b)
    tmp = self._Y_vals - self._targets
    tmpt=tf.select(tf.is_nan(tmp),self._zeros,tmp)
    loss=  tf.nn.l2_loss(tmpt)
    self._cost = cost = tf.reduce_mean(loss)
    self._final_state = state

    self._tvars = tf.trainable_variables()

    if not is_training:
      return

    self._lr = tf.Variable(0.0, trainable=False)
    grads, _ = tf.clip_by_global_norm(tf.gradients(cost, self._tvars),max_grad_norm)
    optimizer = tf.train.AdamOptimizer(self.lr)
    self._train_op = optimizer.apply_gradients(zip(grads, self._tvars))
Example #32
0
def SanitizedAutoCorrelation(x, axis, *args, **kwargs):
    res = tfd.auto_correlation(x, axis, *args, **kwargs)
    res = tf.where(tf.is_nan(res), tf.ones_like(res), res)
    res = tf.where(tf.is_inf(res), tf.ones_like(res), res)
    return res
Example #33
0
    def gradient_descent(self, sess, model):
        def compare(x, y):
            if self.TARGETED:
                return x == y
            else:
                return x != y

        shape = (BATCH_SIZE, model.image_size, model.image_size,
                 model.num_channels)

        # the variable to optimize over
        modifier = tf.Variable(np.zeros(shape, dtype=np.float32))
        canchange = tf.Variable(np.zeros(shape), dtype=np.float32)
        # the variables we're going to hold, use for efficiency

        simg = tf.Variable(np.zeros(shape, dtype=np.float32))
        original = tf.Variable(np.zeros(shape, dtype=np.float32))
        timg = tf.Variable(np.zeros(shape, dtype=np.float32))
        tlab = tf.Variable(
            np.zeros((BATCH_SIZE, model.num_labels), dtype=np.float32))

        # and the assignment to set the variables
        assign_simg = tf.placeholder(np.float32, shape)
        assign_original = tf.placeholder(np.float32, shape)
        assign_timg = tf.placeholder(np.float32, shape)
        assign_tlab = tf.placeholder(np.float32,
                                     (BATCH_SIZE, self.model.num_labels))

        # these are the variables to initialize when we run
        setup = []
        setup.append(tf.assign(timg, assign_timg))
        setup.append(tf.assign(original, assign_original))
        setup.append(tf.assign(simg, assign_simg))
        setup.append(tf.assign(tlab, assign_tlab))

        newimg = (tf.tanh(modifier + simg) / 2) * self.norm_to_01(
            canchange) + (1 - self.norm_to_01(canchange)) * original
        Initnewimg = newimg
        Initnewimg = tf.clip_by_value((Initnewimg + 0.5) * 255., 0., 255.)
        Initnewimg = Initnewimg / 255. - 0.5

        Initoutput = model.predict(Initnewimg)

        Initreal = tf.reduce_sum((tlab) * Initoutput, 1)

        Initother = tf.reduce_max((1 - tlab) * Initoutput - (tlab * 10000), 1)

        if self.TARGETED:
            Initloss1 = tf.maximum(0.0, Initother - Initreal + .01)
        else:
            Initloss1 = tf.maximum(0.0, Initreal - Initother + .01)

        # sum up the losses
        Initloss_sbin = self.L_0loss(self.norm_to_01(canchange), 10.)

        Initloss_midbin = tf.where(tf.is_nan(Initloss_sbin),
                                   tf.zeros_like(Initloss_sbin), Initloss_sbin)
        Initloss_sbin = tf.where(
            tf.is_nan(Initloss_sbin),
            tf.zeros_like(Initloss_sbin) + tf.reduce_mean(Initloss_midbin),
            Initloss_sbin)

        Initloss_bin = Initloss_sbin  # tf.reduce_mean(Initloss_sbin)#

        Initloss_smod = self.L_0loss(
            (tf.tanh(modifier + simg) / 2 - tf.tanh(timg) / 2), 10.)
        Initloss_midbin = tf.where(tf.is_nan(Initloss_smod),
                                   tf.zeros_like(Initloss_smod), Initloss_smod)
        Initloss_smod = tf.where(
            tf.is_nan(Initloss_smod),
            tf.zeros_like(Initloss_smod) + tf.reduce_mean(Initloss_midbin),
            Initloss_smod)
        Initloss_mod = Initloss_smod

        Initloss = 10. * Initloss1 + 0.5 * Initloss_bin + 0.5 * Initloss_mod  # 0.2 for mnist, 0.5 for cifar10

        # setup the adam optimizer and keep track of variables we're creating
        start_vars = set(x.name for x in tf.global_variables())
        optimizer = tf.train.AdamOptimizer(self.LEARNING_RATE)
        Inittrain = optimizer.minimize(Initloss,
                                       var_list=[modifier, canchange])

        end_vars = tf.global_variables()
        new_vars = [x for x in end_vars if x.name not in start_vars]
        init = tf.variables_initializer(
            var_list=[modifier, canchange, simg, original, timg, tlab] +
            new_vars)

        def doit(oimgs, labs, starts):
            # convert to tanh-space
            imgs = np.arctanh(np.array(oimgs) * 1.999999)
            starts = np.arctanh(np.array(starts) * 1.999999)
            # initialize the variables
            sess.run(init)
            sess.run(
                setup, {
                    assign_timg: imgs,
                    assign_tlab: labs,
                    assign_simg: starts,
                    assign_original: oimgs
                })

            old_nimg = None
            old_Equal_count = old_Initloss_b = 1000.
            for step in range(self.MAX_ITERATIONS):
                # remember the old value

                _, works, ploss, qloss, Initloss_b, Initloss_m = sess.run([
                    Inittrain, Initloss1, Initoutput, tlab, Initloss_sbin,
                    Initloss_mod
                ])
                #print("works",works)
                #print("Initloss_b",Initloss_b)
                #print("Initloss_m",Initloss_m)
                if self.TARGETED:
                    Flag = np.argmax(ploss, 1) == np.argmax(np.squeeze(qloss))

                    if np.sum(Flag) >= 1:
                        op_index = np.argmin(Initloss_b * Flag)
                        nimg = sess.run((Initnewimg))
                        if Initloss_b[op_index] < old_Initloss_b:
                            old_nimg = nimg[op_index]
                            old_Initloss_b = Initloss_b[op_index]
                else:
                    if np.argmax(ploss) != np.argmax(np.squeeze(qloss)):
                        nimg = sess.run((Initnewimg))
                        cal_img = np.around(
                            np.clip((np.array(oimgs) + 0.5) * 255., 0., 255.))
                        cal_nimg = np.around(
                            np.clip((np.array(nimg) + 0.5) * 255., 0., 255.))
                        Equal_count = np.sum(
                            np.all(np.abs(cal_img - cal_nimg) > 1, axis=3),
                            (1, 2))
                        if Equal_count < old_Equal_count:
                            old_Equal_count = Equal_count
                            old_nimg = nimg
            if old_nimg is not None:
                init_input = np.expand_dims(oimgs[0], 0)
                cal_img = np.around(
                    np.clip((init_input + 0.5) * 255., 0., 255.))
                cal_nimg = np.around(
                    np.clip((np.array(old_nimg) + 0.5) * 255., 0., 255.))
                Equal_count = np.sum(np.abs(cal_img - cal_nimg) > 1.)

                #print("Equal count:", np.sum(np.all(np.sum(np.abs(cal_img - cal_nimg), 0) > 1, axis=2)))
                return Equal_count, old_nimg
            else:
                return None, None

        return doit
Example #34
0
 def checkForNan(tensor):
     return tf.reduce_sum(tf.add(tf.to_float(tf.is_nan(tensor)), tf.to_float(tf.is_inf(tensor))))
Example #35
0
    def project_dual(self):
        """Function that projects the input dual variables onto the feasible set.
    
        Returns:
          projected_dual: Feasible dual solution corresponding to current dual
          projected_certificate: Objective value of feasible dual
        """
        # TODO: consider whether we can use shallow copy of the lists without
        # using tf.identity
        projected_lambda_pos = [tf.identity(x) for x in self.lambda_pos]
        projected_lambda_neg = [tf.identity(x) for x in self.lambda_neg]
        projected_lambda_quad = [tf.identity(x) for x in self.lambda_quad]
        projected_lambda_lu = [tf.identity(x) for x in self.lambda_lu]
        projected_nu = tf.identity(self.nu)

        # TODO: get rid of the special case for one hidden layer
        # Different projection for 1 hidden layer
        if self.nn_params.num_hidden_layers == 1:
            # Creating equivalent PSD matrix for H by Schur complements
            diag_entries = 0.5 * tf.divide(
                tf.square(self.lambda_quad[self.nn_params.num_hidden_layers]),
                (self.lambda_quad[self.nn_params.num_hidden_layers] +
                 self.lambda_lu[self.nn_params.num_hidden_layers]))
            # If lambda_quad[i], lambda_lu[i] are 0, entry is NaN currently,
            # but we want to set that to 0
            diag_entries = tf.where(tf.is_nan(diag_entries),
                                    tf.zeros_like(diag_entries), diag_entries)
            matrix = (
                tf.matmul(tf.matmul(tf.transpose(
                    self.nn_params.weights[self.nn_params.num_hidden_layers - 1]),
                    utils.diag(diag_entries)),
                    self.nn_params.weights[self.nn_params.num_hidden_layers - 1]))
            new_matrix = utils.diag(
                2 * self.lambda_lu[self.nn_params.num_hidden_layers - 1]) - matrix
            # Making symmetric
            new_matrix = 0.5 * (new_matrix + tf.transpose(new_matrix))
            eig_vals = tf.self_adjoint_eigvals(new_matrix)
            min_eig = tf.reduce_min(eig_vals)
            # If min_eig is positive, already feasible, so don't add
            # Otherwise add to make PSD [1E-6 is for ensuring strictly PSD (useful
            # while inverting)
            projected_lambda_lu[0] = (projected_lambda_lu[0] +
                                      0.5 * tf.maximum(-min_eig, 0) + 1E-6)

        else:
            # Minimum eigen value of H
            # TODO: Write this in terms of matrix multiply
            # matrix H is a submatrix of M, thus we just need to extend existing code
            # for computing matrix-vector product (see get_psd_product function).
            # Then use the same trick to compute smallest eigenvalue.
            eig_vals = tf.self_adjoint_eigvals(self.matrix_h)
            min_eig = tf.reduce_min(eig_vals)

            for i in range(self.nn_params.num_hidden_layers + 1):
                # Since lambda_lu appears only in diagonal terms, can subtract to
                # make PSD and feasible
                projected_lambda_lu[i] = (projected_lambda_lu[i] +
                                          0.5 * tf.maximum(-min_eig, 0) + 1E-6)
                # Adjusting lambda_neg wherever possible so that lambda_neg + lambda_lu
                # remains close to unchanged
                # projected_lambda_neg[i] = tf.maximum(0.0, projected_lambda_neg[i] +
                #                                     (0.5*min_eig - 1E-6)*
                #                                     (self.lower[i] + self.upper[i]))

        projected_dual_var = {'lambda_pos': projected_lambda_pos,
                              'lambda_neg': projected_lambda_neg,
                              'lambda_lu': projected_lambda_lu,
                              'lambda_quad': projected_lambda_quad,
                              'nu': projected_nu}
        projected_dual_object = DualFormulation(projected_dual_var,
                                                self.nn_params,
                                                self.test_input,
                                                self.true_class,
                                                self.adv_class,
                                                self.input_minval,
                                                self.input_maxval,
                                                self.epsilon)
        projected_certificate = projected_dual_object.compute_certificate()
        return projected_certificate
Example #36
0
    def build_loss(self, objective):
        """
        Adds elements into the graph to compute the given objective.

        Parameters
        ----------
        objective : dict of {:class:`~nengo:nengo.Probe`: ``"mse"`` or \
                                                          callable or ``None``}
            The objective used to compute loss for each probe. Passing
            ``"mse"`` will use mean squared error. A custom function
            ``f(output, target) -> loss`` can be passed that consumes the
            actual output and target output for a probe in ``targets``
            and returns a ``tf.Tensor`` representing the scalar loss value for
            that Probe (loss will be summed across Probes).

        Returns
        -------
        ``tf.Tensor``
            Tensor representing the sum of the given objectives applied to
            target probes
        """

        key = frozenset(objective.items())

        try:
            # return the cached loss tensor if it exists
            return self.losses[key]
        except KeyError:
            pass

        loss = []
        for p, obj in objective.items():
            # create a placeholder for the target values
            if p not in self.target_phs:
                self.target_phs[p] = tf.placeholder(
                    self.dtype, (self.minibatch_size, None, p.size_in),
                    name="targets")

            # compute loss
            if obj == "mse":
                # note: nan targets converted to zero error
                target = tf.where(tf.is_nan(self.target_phs[p]),
                                  self.probe_arrays[p], self.target_phs[p])

                loss.append(
                    tf.reduce_mean(tf.square(target - self.probe_arrays[p])))
            elif callable(obj):
                # move minibatch dimension back to the front
                loss.append(obj(self.probe_arrays[p], self.target_phs[p]))
            elif obj is None:
                # user is directly specifying error, not using objective
                continue
            else:
                raise NotImplementedError

        if len(loss) > 0:
            # sum loss across probes (note: this will also sum across
            # the output of `objective` if it doesn't return a scalar)
            loss = tf.reduce_sum(loss)
        else:
            loss = None

        self.losses[key] = loss

        return loss
    tf.summary.histogram("loss", loss)
    merge = tf.summary.merge_all()



    hm_steps = 25000
    sess.run(tf.global_variables_initializer())

    input_size = height

    for batch in shuffle(batch_size, input_size):
        step, Xp, Y1p, Y2p = batch
        if step == 0:
            time.sleep(1)
            continue
        debugger = tf.logical_or(tf.is_nan(loss), tf.is_inf(loss))

        while (1):
            d, l = sess.run([debugger, loss], feed_dict = {X:Xp, Y1:Y1p, Y2:Y2p, "YOLO/dropout:0" = 0.5})
            if (not d):
                break
            else:
                print("Re-random variables!")
                sess.run(tf.global_variables_initializer())
        summary, _ , lossp, lxy, lwh, lobj, lnoobj, lp = sess.run([merge, trainer, loss, loss_xy, loss_wh, loss_obj, loss_noobj, loss_p], feed_dict = {X: Xp, Y1: Y1p, Y2:Y2p, "YOLO/dropout:0" = 0.5})

        print("""Step {} : loss {}
    loss_xy     = {}
    loss_wh     = {}
    loss_obj    = {}
    loss_noobj  = {}
Example #38
0
    def ScaleGradients(self, var_grads, gradient_adjuster=None):
        """Scales gradients according to training params.

    Args:
      var_grads: a `.NestedMap` whose values are (var, grad) pairs.
      gradient_adjuster: if not None, a function that mutates a given var_grads.

    Returns:
      A `.NestedMap` containing:
      - has_nan_or_inf: a scalar of 0 or 1, indicating whether there is any NaN
        or Inf in input gradients.
      - final_var_grads: a `.NestedMap` whose values are (var, grad) pairs,
        where gradients have already been scaled.
      - grad_scale: the gradient scale. 0 if gradient updates should be skipped
        for the step. (Optional, only returned in case global norm clipping is
        used.)
    """
        p = self.params

        # Computes gradients' norm and adds their summaries. Note that all_grad_norm
        # may be nan, which may cause grad_scale to be nan.
        for name, vg in var_grads.FlattenItems():
            summary_utils.AddNormSummary(name + '/' + p.name,
                                         py_utils.NestedMap(s=vg))
        all_grad_norm = tf.sqrt(
            py_utils.SumSquared([
                g for (_, g) in py_utils.NestedMap(child=var_grads).Flatten()
            ]))
        all_var_norm = tf.sqrt(
            py_utils.SumSquared([
                v for (v, _) in py_utils.NestedMap(child=var_grads).Flatten()
            ]))
        grad_norm_is_nan_or_inf = tf.logical_or(tf.is_nan(all_grad_norm),
                                                tf.is_inf(all_grad_norm))

        # Optional gradient adjustment. Note that this happens after computing
        # all_grad_norm.
        if gradient_adjuster is not None:
            tf.logging.info('gradient_adjuster=%s', gradient_adjuster)
            var_grads = gradient_adjuster(var_grads)

        # Handles NaN/Inf gradients.
        has_nan_or_inf = py_utils.HasNanOrInfGradient(var_grads)
        # Grad norm can still be inf even if none of the individual grad is inf.
        has_nan_or_inf = tf.logical_or(has_nan_or_inf, grad_norm_is_nan_or_inf)

        return_values = py_utils.NestedMap()
        if p.clip_gradient_single_norm_to_value:
            # Currently using both types of clipping simultaneously is unsupported.
            if p.clip_gradient_norm_to_value:
                raise ValueError(
                    'Cannot use clip_gradient_single_norm_to_value=%f and '
                    'clip_gradient_norm_to_value=%f.' %
                    (p.clip_gradient_single_norm_to_value,
                     p.clip_gradient_norm_to_value))
            final_var_grads = py_utils.ApplyGradNormCliping(
                var_grads, p.clip_gradient_single_norm_to_value)

        else:
            grad_scale = self._GetGlobalGradScale(all_grad_norm,
                                                  has_nan_or_inf)
            self._AddEvalMetric('grad_norm/all', all_grad_norm,
                                tf.constant(1.0))
            self._AddEvalMetric('var_norm/all', all_var_norm, tf.constant(1.0))
            self._AddEvalMetric('grad_scale_all', grad_scale, tf.constant(1.0))
            final_var_grads = py_utils.ApplyGradMultiplier(
                var_grads, grad_scale)
            return_values.grad_scale = grad_scale

        return_values.has_nan_or_inf = has_nan_or_inf
        return_values.final_var_grads = final_var_grads
        return return_values
Example #39
0
    def angular_symmetry(self, atom_matrix):
        """
        Generate radial basis functions given an atom_matrix consisting of the atom types
        and coordinates.

        Parameters
        ----------
        atom_matrix: tf.Tensor
            An atom matrix of shape (None, 4), where rank 0 determines the number of atoms
            and rank 1 consists of (t, x, y, z) such that t is a compacted atomic number.

        Returns
        -------
        tf.Tensor
            Featurized representation of shape (num_atoms, len(sym.A_Rs)*len(sym.A_thetas)*sym.max_atom_types*(sym.max_atom_types+1)/2)

        """
        num_atoms = tf.shape(atom_matrix)[0]
        atom_idxs = tf.range(tf.shape(atom_matrix)[0])
        atom_types = tf.cast(atom_matrix[:, 0], dtype=tf.int32)
        atom_coords = atom_matrix[:, 1:] # atom_coords shape: (num_atoms, 3)
        type_groups_idxs = tf.dynamic_partition(atom_idxs, atom_types, self.max_atom_types, name="dp_angular")
        lookup = np.array([[[0, 3]],[[2,3]],[[5,3]]])

        angular_features = []
        for type_a in range(self.max_atom_types):
            j_idxs = type_groups_idxs[type_a]

            for type_b in range(type_a, self.max_atom_types):
                k_idxs = type_groups_idxs[type_b]

                tile_a = tf.tile(tf.expand_dims(j_idxs, 1), [1, tf.shape(k_idxs)[0]], name="tile_outer1")
                tile_a = tf.expand_dims(tile_a, 2) 
                tile_b = tf.tile(tf.expand_dims(k_idxs, 0), [tf.shape(j_idxs)[0], 1], name="tile_outer2")
                tile_b = tf.expand_dims(tile_b, 2) 
                cartesian_product = tf.concat([tile_a, tile_b], axis=2) # int64s?
                
                group_coords = tf.nn.embedding_lookup(atom_coords, cartesian_product) # shape: (len(type_a), len(type_b), 2, 3)
                delta_jk = group_coords[:, :, 0, :] - group_coords[:, :, 1, :]
                R_jk = tf.norm(delta_jk, axis=-1)

                dist_vec = tf.reshape(atom_coords, (-1, 1, 1, 1, 3)) # shape (6, 3, 3, 2, 3), vector difference

                deltas = group_coords - dist_vec # shape: (num_atoms, len(type_a), len(type_b), 2, 3)
                delta_ij = deltas[:, :, :, 0, :]
                delta_ik = deltas[:, :, :, 1, :]

                # LHS computation
                denom = tf.multiply(tf.norm(delta_ij, axis=-1), (tf.norm(delta_ik, axis=-1))) #
                dot = tf.reduce_sum(tf.multiply(delta_ij, delta_ik), axis=-1)
                theta_ijk = tf.acos(dot / denom)   # if i=j || j=k then NaN

                lhs = tf.pow(1 + tf.cos(tf.expand_dims(theta_ijk, -1) - tf.reshape(self.A_thetas, (1, 1, 1, -1))), self.A_zeta)
                lhs = tf.where(tf.is_nan(lhs), tf.zeros_like(lhs), lhs) # clean up nans numerically, the real zeroing happens later
                lhs = tf.where(tf.is_inf(lhs), tf.zeros_like(lhs), lhs) # clean up infs numerically, the real zeroing happens later
                
                # RHS computation
                R_ij_ik = tf.norm(deltas, axis=-1) # shape (6, 3, 3, 2), norm distance
                f_C_true = 0.5*tf.cos(tf.div(np.pi * R_ij_ik, self.A_Rc)) + 0.5 # TODO: refactor with radial code?
                f_C_flags = tf.nn.relu(tf.sign(self.A_Rc - R_ij_ik)) # 1 if within cutoff, 0 otherwise
                f_C_R_ij_ik  = f_C_true * f_C_flags

                # note: element wise multiply
                fCRi_fCRj = tf.multiply(f_C_R_ij_ik[:, :, :, 0], f_C_R_ij_ik[:, :, :, 1])
                R_ij = R_ij_ik[:, :, :, 0]
                R_ik = R_ij_ik[:, :, :, 1]

                inner = tf.expand_dims((R_ij + R_ik) / 2.0, -1) - tf.reshape(self.A_Rs, (1, 1, 1, -1))
                rhs = tf.exp(-self.A_eta*tf.pow(inner, 2)) * tf.expand_dims(fCRi_fCRj, -1)

                # lhs shape: [num_atoms, len(type_a), len(type_b), len(A_thetas)]
                # rhs shape: [num_atoms, len(type_a), len(type_b), len(A_Rs)]
                lhs = tf.expand_dims(lhs, axis=3)
                rhs = tf.expand_dims(rhs, axis=4)
                summand = tf.multiply(lhs, rhs) # (num_atoms, len(type_a), len(type_b), len(A_Rs), len(A_thetas))

                # zero-out/fix summand elements where i == j || j == k || i == k
                # we store a triplet of shape
                # (num_atoms, len(type_a), len(type_b), 3) where 3 is the distance of ij, ik, and jk respectively
                # R_ij shape: (num_atoms, len(type_a), len(type_b))
                # R_ik shape: (num_atoms, len(type_a), len(type_b))
                R_jk = tf.tile(tf.expand_dims(R_jk, axis=0), [num_atoms, 1, 1], name="tile_inner")
                R_ijk = tf.stack([R_ij, R_ik, R_jk], axis=-1)

                # R_jk shape: (len(type_a), len(type_b))
                # We want to form R_ijk of shape (num_atoms, len(type_a), len(type_b), 3)
                min_dists = tf.reduce_min(R_ijk, axis=-1)
                keep_flags = tf.nn.relu(tf.sign(tf.abs(min_dists) - 1e-7))
                keep_flags = tf.expand_dims(keep_flags, -1)
                keep_flags = tf.expand_dims(keep_flags, -1)

                summand = tf.multiply(summand, keep_flags)
                result = tf.multiply(tf.pow(np.float32(2.0), 1-self.A_zeta), tf.reduce_sum(summand, [1,2])) 
                result = tf.reshape(result, (num_atoms, len(self.A_thetas)*len(self.A_Rs)))

                angular_features.append(result)

        angular_features = tf.concat(angular_features, axis=1)
        angular_features = tf.reshape(angular_features, (num_atoms, self.angular_feature_size())) # ravel

        return angular_features
Example #40
0
 def is_nan(self, sess, feed_dict={}):
     return sess.run(tf.is_nan(self.tensor), feed_dict)
Example #41
0
    def model_fn(self, features, labels, mode):
        """Auto-Scaling 3D CNN model.

    For more information on how to write a model function, see:
      https://www.tensorflow.org/guide/custom_estimators#write_a_model_function
    """
        input_layer = features * 2.0 - 1.0

        # Replace missing values by 0
        hidden_layer = tf.where(tf.is_nan(input_layer),
                                tf.zeros_like(input_layer), input_layer)
        ###
        kernel_size = [1, 3, 3]
        hidden_layer_0_0 = self.conv3d_with_batchnorm(
            features=hidden_layer,
            depth=16,
            kernel_size=kernel_size,
            padding='same',
            mode=mode == tf.estimator.ModeKeys.TRAIN,
            layer_order=0)

        hidden_layer_0_1 = self.conv3d_with_batchnorm(
            features=hidden_layer,
            depth=16,
            kernel_size=[1, 1, 1],
            padding='same',
            mode=mode == tf.estimator.ModeKeys.TRAIN,
            layer_order=1)

        hideen_layer = tf.concat([hidden_layer_0_0, hidden_layer_0_1], axis=4)

        ###
        # Repeatedly apply 3D CNN, followed by 3D max pooling
        # until the hidden layer has reasonable number of entries
        REASONABLE_NUM_ENTRIES = 1000
        num_filters = 16  # The number of filters is fixed
        i = 2
        while True:
            shape = hidden_layer.shape
            kernel_size = [
                min(3, shape[1]),
                min(3, shape[2]),
                min(3, shape[3])
            ]
            hidden_layer_2 = tf.layers.conv3d(inputs=hidden_layer,
                                              filters=num_filters,
                                              kernel_size=kernel_size,
                                              kernel_initializer=initializer,
                                              use_bias=False,
                                              padding='same')
            hidden_layer_2 = tf.layers.batch_normalization(
                inputs=hidden_layer_2,
                momentum=0.99,
                training=mode == tf.estimator.ModeKeys.TRAIN)
            hidden_layer_2 = prelu(hidden_layer_2, i)
            i += 1
            hidden_layer = tf.concat([hidden_layer, hidden_layer_2], axis=4)

            pool_size = [min(2, shape[1]), min(2, shape[2]), min(2, shape[3])]
            hidden_layer = tf.layers.max_pooling3d(inputs=hidden_layer,
                                                   pool_size=pool_size,
                                                   strides=pool_size,
                                                   padding='valid',
                                                   data_format='channels_last')

            if get_num_entries(hidden_layer) < REASONABLE_NUM_ENTRIES:
                break

        hidden_layer = tf.layers.flatten(hidden_layer)
        hidden_layer = tf.layers.dense(
            inputs=hidden_layer,
            units=1024,
            kernel_initializer=initializer,
            use_bias=False,
            kernel_regularizer=tf.contrib.layers.l2_regularizer(0.001)
            #activation=tf.nn.relu
        )
        hidden_layer = tf.layers.batch_normalization(
            inputs=hidden_layer,
            momentum=0.99,
            training=mode == tf.estimator.ModeKeys.TRAIN)
        hidden_layer = tf.nn.relu(hidden_layer)
        hidden_layer = tf.layers.dropout(
            inputs=hidden_layer,
            rate=0.2,
            training=mode == tf.estimator.ModeKeys.TRAIN)

        logits = tf.layers.dense(
            inputs=hidden_layer,
            units=self.output_dim,
            kernel_initializer=initializer,
            bias_initializer=initializer,
            kernel_regularizer=tf.contrib.layers.l2_regularizer(0.001),
            bias_regularizer=tf.contrib.layers.l2_regularizer(0.001))
        sigmoid_tensor = tf.nn.sigmoid(logits, name="sigmoid_tensor")

        predictions = {
            # Generate predictions (for PREDICT and EVAL mode)
            "classes": tf.argmax(input=logits, axis=1),
            # "classes": binary_predictions,
            # Add `sigmoid_tensor` to the graph. It is used for PREDICT and by the
            # `logging_hook`.
            "probabilities": sigmoid_tensor
        }

        if mode == tf.estimator.ModeKeys.PREDICT:
            return tf.estimator.EstimatorSpec(mode=mode,
                                              predictions=predictions)

        # Calculate Loss (for both TRAIN and EVAL modes)
        # For multi-label classification, a correct loss is sigmoid cross entropy
        loss = sigmoid_cross_entropy_with_logits(labels=labels, logits=logits)

        # Configure the Training Op (for TRAIN mode)
        if mode == tf.estimator.ModeKeys.TRAIN:
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
            #optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.0001)
            train_op = optimizer.minimize(
                loss=loss, global_step=tf.train.get_global_step())
            train_op = tf.group([train_op, update_ops])
            return tf.estimator.EstimatorSpec(mode=mode,
                                              loss=loss,
                                              train_op=train_op)

        # Add evaluation metrics (for EVAL mode)
        assert mode == tf.estimator.ModeKeys.EVAL
        eval_metric_ops = {
            "accuracy":
            tf.metrics.accuracy(labels=labels,
                                predictions=predictions["classes"])
        }
        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=loss,
                                          eval_metric_ops=eval_metric_ops)
Example #42
0
def yolo_loss(args,
              anchors,
              num_classes,
              ignore_thresh=.5,
              label_smoothing=0.1,
              print_loss=False,
              normalize=True):
    # 一共有三层
    num_layers = len(anchors) // 3

    #---------------------------------------------------------------------------------------------------#
    #   将预测结果和实际ground truth分开,args是[*model_body.output, *y_true]
    #   y_true是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。
    #   yolo_outputs是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。
    #---------------------------------------------------------------------------------------------------#
    y_true = args[num_layers:]
    yolo_outputs = args[:num_layers]

    #-----------------------------------------------------------#
    #   13x13的特征层对应的anchor是[142, 110], [192, 243], [459, 401]
    #   26x26的特征层对应的anchor是[36, 75], [76, 55], [72, 146]
    #   52x52的特征层对应的anchor是[12, 16], [19, 36], [40, 28]
    #-----------------------------------------------------------#
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]
                   ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]]

    # 得到input_shpae为416,416
    input_shape = K.cast(
        K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))

    loss = 0
    num_pos = 0
    #-----------------------------------------------------------#
    #   取出每一张图片
    #   m的值就是batch_size
    #-----------------------------------------------------------#
    m = K.shape(yolo_outputs[0])[0]
    mf = K.cast(m, K.dtype(yolo_outputs[0]))

    #---------------------------------------------------------------------------------------------------#
    #   y_true是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。
    #   yolo_outputs是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。
    #---------------------------------------------------------------------------------------------------#
    for l in range(num_layers):
        #-----------------------------------------------------------#
        #   以第一个特征层(m,13,13,3,85)为例子
        #   取出该特征层中存在目标的点的位置。(m,13,13,3,1)
        #-----------------------------------------------------------#
        object_mask = y_true[l][..., 4:5]
        #-----------------------------------------------------------#
        #   取出其对应的种类(m,13,13,3,80)
        #-----------------------------------------------------------#
        true_class_probs = y_true[l][..., 5:]
        if label_smoothing:
            true_class_probs = _smooth_labels(true_class_probs,
                                              label_smoothing)

        #-----------------------------------------------------------#
        #   将yolo_outputs的特征层输出进行处理、获得四个返回值
        #   其中:
        #   grid        (13,13,1,2) 网格坐标
        #   raw_pred    (m,13,13,3,85) 尚未处理的预测结果
        #   pred_xy     (m,13,13,3,2) 解码后的中心坐标
        #   pred_wh     (m,13,13,3,2) 解码后的宽高坐标
        #-----------------------------------------------------------#
        grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l],
                                                     anchors[anchor_mask[l]],
                                                     num_classes,
                                                     input_shape,
                                                     calc_loss=True)

        #-----------------------------------------------------------#
        #   pred_box是解码后的预测的box的位置
        #   (m,13,13,3,4)
        #-----------------------------------------------------------#
        pred_box = K.concatenate([pred_xy, pred_wh])

        #-----------------------------------------------------------#
        #   找到负样本群组,第一步是创建一个数组,[]
        #-----------------------------------------------------------#
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]),
                                     size=1,
                                     dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')

        #-----------------------------------------------------------#
        #   对每一张图片计算ignore_mask
        #-----------------------------------------------------------#
        def loop_body(b, ignore_mask):
            #-----------------------------------------------------------#
            #   取出n个真实框:n,4
            #-----------------------------------------------------------#
            true_box = tf.boolean_mask(y_true[l][b, ..., 0:4],
                                       object_mask_bool[b, ..., 0])
            #-----------------------------------------------------------#
            #   计算预测框与真实框的iou
            #   pred_box    13,13,3,4 预测框的坐标
            #   true_box    n,4 真实框的坐标
            #   iou         13,13,3,n 预测框和真实框的iou
            #-----------------------------------------------------------#
            iou = box_iou(pred_box[b], true_box)

            #-----------------------------------------------------------#
            #   best_iou    13,13,3 每个特征点与真实框的最大重合程度
            #-----------------------------------------------------------#
            best_iou = K.max(iou, axis=-1)

            #-----------------------------------------------------------#
            #   判断预测框和真实框的最大iou小于ignore_thresh
            #   则认为该预测框没有与之对应的真实框
            #   该操作的目的是:
            #   忽略预测结果与真实框非常对应特征点,因为这些框已经比较准了
            #   不适合当作负样本,所以忽略掉。
            #-----------------------------------------------------------#
            ignore_mask = ignore_mask.write(
                b, K.cast(best_iou < ignore_thresh, K.dtype(true_box)))
            return b + 1, ignore_mask

        #-----------------------------------------------------------#
        #   在这个地方进行一个循环、循环是对每一张图片进行的
        #-----------------------------------------------------------#
        _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m,
                                                       loop_body,
                                                       [0, ignore_mask])

        #-----------------------------------------------------------#
        #   ignore_mask用于提取出作为负样本的特征点
        #   (m,13,13,3)
        #-----------------------------------------------------------#
        ignore_mask = ignore_mask.stack()
        #   (m,13,13,3,1)
        ignore_mask = K.expand_dims(ignore_mask, -1)

        #-----------------------------------------------------------#
        #   真实框越大,比重越小,小框的比重更大。
        #-----------------------------------------------------------#
        box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4]

        #-----------------------------------------------------------#
        #   计算Ciou loss
        #-----------------------------------------------------------#
        raw_true_box = y_true[l][..., 0:4]
        ciou = box_ciou(pred_box, raw_true_box)
        ciou_loss = object_mask * box_loss_scale * (1 - ciou)

        #------------------------------------------------------------------------------#
        #   如果该位置本来有框,那么计算1与置信度的交叉熵
        #   如果该位置本来没有框,那么计算0与置信度的交叉熵
        #   在这其中会忽略一部分样本,这些被忽略的样本满足条件best_iou<ignore_thresh
        #   该操作的目的是:
        #   忽略预测结果与真实框非常对应特征点,因为这些框已经比较准了
        #   不适合当作负样本,所以忽略掉。
        #------------------------------------------------------------------------------#
        confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \
            (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask

        class_loss = object_mask * K.binary_crossentropy(
            true_class_probs, raw_pred[..., 5:], from_logits=True)

        location_loss = K.sum(
            tf.where(tf.is_nan(ciou_loss), tf.zeros_like(ciou_loss),
                     ciou_loss))
        confidence_loss = K.sum(
            tf.where(tf.is_nan(confidence_loss),
                     tf.zeros_like(confidence_loss), confidence_loss))
        class_loss = K.sum(
            tf.where(tf.is_nan(class_loss), tf.zeros_like(class_loss),
                     class_loss))
        #-----------------------------------------------------------#
        #   计算正样本数量
        #-----------------------------------------------------------#
        num_pos += tf.maximum(K.sum(K.cast(object_mask, tf.float32)), 1)
        loss += location_loss + confidence_loss + class_loss
        # if print_loss:
        #   loss = tf.Print(loss, [loss, location_loss, confidence_loss, class_loss, K.sum(ignore_mask)], message='loss: ')

    if normalize:
        loss = loss / num_pos
    else:
        loss = loss / mf
    return loss
Example #43
0
def dice_loss(logits,
              labels,
              num_classes,
              smooth=1e-5,
              include_background=True,
              only_present=False):
    """Calculates a smooth Dice coefficient loss from sparse labels.

    Args:
        logits (tf.Tensor): logits prediction for which to calculate
            crossentropy error
        labels (tf.Tensor): sparse labels used for crossentropy error
            calculation
        num_classes (int): number of class labels to evaluate on
        smooth (float): smoothing coefficient for the loss computation
        include_background (bool): flag to include a loss on the background
            label or not
        only_present (bool): flag to include only labels present in the
            inputs or not

    Returns:
        tf.Tensor: Tensor scalar representing the loss
    """

    # Get a softmax probability of the logits predictions and a one hot
    # encoding of the labels tensor
    probs = tf.nn.softmax(logits)
    onehot_labels = tf.one_hot(
        indices=labels,
        depth=num_classes,
        dtype=tf.float32,
        name='onehot_labels')

    # Compute the Dice similarity coefficient
    label_sum = tf.reduce_sum(onehot_labels, axis=[1, 2, 3], name='label_sum')
    pred_sum = tf.reduce_sum(probs, axis=[1, 2, 3], name='pred_sum')
    intersection = tf.reduce_sum(onehot_labels * probs, axis=[1, 2, 3],
                                 name='intersection')

    per_sample_per_class_dice = (2. * intersection + smooth)
    per_sample_per_class_dice /= (label_sum + pred_sum + smooth)

    # Include or exclude the background label for the computation
    if include_background:
        flat_per_sample_per_class_dice = tf.reshape(
            per_sample_per_class_dice, (-1, ))
        flat_label = tf.reshape(label_sum, (-1, ))
    else:
        flat_per_sample_per_class_dice = tf.reshape(
            per_sample_per_class_dice[:, 1:], (-1, ))
        flat_label = tf.reshape(label_sum[:, 1:], (-1, ))

    # Include or exclude non-present labels for the computation
    if only_present:
        masked_dice = tf.boolean_mask(flat_per_sample_per_class_dice,
                                      tf.logical_not(tf.equal(flat_label, 0)))
    else:
        masked_dice = tf.boolean_mask(
            flat_per_sample_per_class_dice,
            tf.logical_not(tf.is_nan(flat_per_sample_per_class_dice)))

    dice = tf.reduce_mean(masked_dice)
    loss = 1. - dice

    return loss
    def build_model(self):
        Z = tf.placeholder(tf.float32, [self.batch_size, self.dim_z])
        Y = tf.placeholder(tf.float32, [self.batch_size, self.dim_y])

        image_real = tf.placeholder(tf.float32,
                                    [self.batch_size] + self.image_shape)
        pred_high = tf.placeholder(tf.float32,
                                   [self.batch_size] + self.image_shape)
        pred_low = tf.placeholder(tf.float32,
                                  [self.batch_size] + self.image_shape)
        h4 = self.generate(Z, Y)
        #image_gen comes from sigmoid output of generator
        image_gen = tf.nn.sigmoid(h4)

        raw_real2 = self.discriminate(image_real, Y)
        #p_real = tf.nn.sigmoid(raw_real)
        p_real = tf.reduce_mean(raw_real2)

        raw_gen2 = self.discriminate(image_gen, Y)
        #p_gen = tf.nn.sigmoid(raw_gen)
        p_gen = tf.reduce_mean(raw_gen2)

        discrim_cost = tf.reduce_mean(raw_real2) - tf.reduce_mean(raw_gen2)
        gen_cost = -tf.reduce_mean(raw_gen2)

        mask = tf.placeholder(tf.float32, [self.batch_size] + self.image_shape,
                              name='mask')
        '''contextual_loss_latter = tf.reduce_sum(tf.contrib.layers.flatten(
            -tf.log(tf.abs(image_real-image_gen))), 1)'''
        #contextual_loss_latter = tf.reduce_sum(tf.log(tf.contrib.layers.flatten(tf.abs(image_gen - pred_high))), 1)

        #log loss
        '''contextual_loss_latter = tf.reduce_sum(tf.contrib.layers.flatten(
        -tf.log(tf.maximum(
            (mask + tf.multiply(tf.ones_like(mask) - mask, pred_high)) - tf.multiply(
                tf.ones_like(mask) - mask, image_gen), 0.0001*tf.ones_like(mask)))
        -tf.log(tf.maximum(
            (mask + tf.multiply(tf.ones_like(mask) - mask, image_gen)) - tf.multiply(
                tf.ones_like(mask) - mask, pred_low), 0.0001*tf.ones_like(mask)))), 1)'''
        contextual_loss_latter = tf.contrib.layers.flatten(-tf.log(
            (mask + tf.multiply(tf.ones_like(mask) - mask, pred_high)) -
            tf.multiply(tf.ones_like(mask) - mask, image_gen)) - tf.log(
                (mask + tf.multiply(tf.ones_like(mask) - mask, image_gen)) -
                tf.multiply(tf.ones_like(mask) - mask, pred_low)))
        contextual_loss_latter = tf.where(
            tf.is_nan(contextual_loss_latter),
            tf.ones_like(contextual_loss_latter) * 1000000.0,
            contextual_loss_latter)
        contextual_loss_latter2 = tf.reduce_sum(contextual_loss_latter, 1)
        #square loss
        '''contextual_loss_latter = tf.reduce_sum(tf.contrib.layers.flatten(
            tf.square(tf.multiply(tf.ones_like(mask) - mask, image_gen) - tf.multiply(tf.ones_like(mask) - mask, pred_high)))
        +tf.contrib.layers.flatten(
            tf.square(
                tf.multiply(tf.ones_like(mask) - mask, image_gen) - tf.multiply(tf.ones_like(mask) - mask, pred_high)))
        , 1)'''
        contextual_loss_former = tf.reduce_sum(
            tf.contrib.layers.flatten(
                tf.square(
                    tf.multiply(mask, image_gen) -
                    tf.multiply(mask, image_real))), 1)
        contextual_loss_prepare = tf.reduce_sum(
            tf.contrib.layers.flatten(
                tf.square(
                    tf.multiply(tf.ones_like(mask) - mask, image_gen) -
                    tf.multiply(tf.ones_like(mask) - mask, image_real))), 1)
        perceptual_loss = gen_cost
        complete_loss = contextual_loss_former + self.lam * perceptual_loss + 0.05 * contextual_loss_latter2
        grad_complete_loss = tf.gradients(complete_loss, Z)
        grad_uniform_loss = tf.gradients(contextual_loss_prepare, Z)

        return Z, Y, image_real, discrim_cost, gen_cost, p_real, p_gen, grad_complete_loss, \
               pred_high, pred_low, mask, contextual_loss_latter, contextual_loss_former, grad_uniform_loss
def build_heatmap(in_tensor, config, names = None):
 
    num_detections  = config.DETECTION_MAX_INSTANCES
    img_h, img_w    = config.IMAGE_SHAPE[:2]
    batch_size      = config.BATCH_SIZE
    num_classes     = config.NUM_CLASSES  
    print('\n ')
    print('  > NEW build_heatmap() for ', names )
    print('    orignal in_tensor shape : ', in_tensor.shape)       
    # rois per image is determined by size of input tensor 
    #   detection mode:   config.TRAIN_ROIS_PER_IMAGE 
    #   ground_truth  :   config.DETECTION_MAX_INSTANCES
    rois_per_image  = (in_tensor.shape)[2] 
    # strt_cls        = 0 if rois_per_image == 32 else 1
    print('    num of bboxes per class is : ', rois_per_image )

    #-----------------------------------------------------------------------------    
    ## Stack non_zero bboxes from in_tensor into pt2_dense 
    #-----------------------------------------------------------------------------
    # pt2_ind shape is [?, 3]. 
    #   pt2_ind[0] corresponds to image_index 
    #   pt2_ind[1] corresponds to class_index 
    #   pt2_ind[2] corresponds to roi row_index 
    # pt2_dense shape is [?, 6]
    #    pt2_dense[0] is image index
    #    pt2_dense[1:4]  roi cooridnaytes 
    #    pt2_dense[5]    is class id 
    #-----------------------------------------------------------------------------
    pt2_sum = tf.reduce_sum(tf.abs(in_tensor[:,:,:,:-2]), axis=-1)
    print('    pt2_sum shape ',pt2_sum.shape)
    # print(pt2_sum[0].eval())
    pt2_ind = tf.where(pt2_sum > 0)

    ## replaced the two operations below with the one above - 15-05-2018
    # pt2_mask = tf.greater(pt2_sum , 0)
    # pt2_ind  = tf.where(pt2_mask)
    # print(' pt2_mask shape ', pt2_mask.get_shape())
    # print(pt2_mask.eval())
    # print('    pt2_ind shape ', pt2_ind.get_shape())
    # print(pt2_ind.eval())

    pt2_dense = tf.gather_nd( in_tensor, pt2_ind)
    print('    dense shape ',pt2_dense.get_shape())

    #-----------------------------------------------------------------------------
    ## Build mesh-grid to hold pixel coordinates  
    #-----------------------------------------------------------------------------
    X = tf.range(img_w, dtype=tf.int32)
    Y = tf.range(img_h, dtype=tf.int32)
    X, Y = tf.meshgrid(X, Y)

    # duplicate (repeat) X and Y into a  batch_size x rois_per_image tensor
    print('    X/Y shapes :',  X.get_shape(), Y.get_shape())
    ones = tf.ones([tf.shape(pt2_dense)[0] , 1, 1], dtype = tf.int32)
    rep_X = ones * X
    rep_Y = ones * Y 
    print('    Ones:    ', ones.shape)                
    print('    ones_exp * X', ones.shape, '*', X.shape, '= ',rep_X.shape)
    print('    ones_exp * Y', ones.shape, '*', Y.shape, '= ',rep_Y.shape)

    # # stack the X and Y grids 
    bef_pos = tf.to_float(tf.stack([rep_X,rep_Y], axis = -1))
    print('    before transpse ', bef_pos.get_shape())
    pos_grid = tf.transpose(bef_pos,[1,2,0,3])
    print('    after transpose ', pos_grid.get_shape())    

    #-----------------------------------------------------------------------------
    ##  Build mean and convariance tensors for Multivariate Normal Distribution 
    #-----------------------------------------------------------------------------
    width  = pt2_dense[:,3] - pt2_dense[:,1]      # x2 - x1
    height = pt2_dense[:,2] - pt2_dense[:,0]
    cx     = pt2_dense[:,1] + ( width  / 2.0)
    cy     = pt2_dense[:,0] + ( height / 2.0)
    means  = tf.stack((cx,cy),axis = -1)
    covar  = tf.stack((width * 0.5 , height * 0.5), axis = -1)
    covar  = tf.sqrt(covar)

    tfd = tf.contrib.distributions
    mvn = tfd.MultivariateNormalDiag( loc  = means,  scale_diag = covar)
    prob_grid = mvn.prob(pos_grid)
    print('     Prob_grid shape before tanspose: ',prob_grid.get_shape())
    prob_grid = tf.transpose(prob_grid,[2,0,1])
    print('     Prob_grid shape after tanspose: ',prob_grid.get_shape())    
    print('    >> input to MVN.PROB: pos_grid (meshgrid) shape: ', pos_grid.get_shape())
    print('    << output probabilities shape:' , prob_grid.get_shape())

    #--------------------------------------------------------------------------------
    ## IMPORTANT: kill distributions of NaN boxes (resulting from bboxes with height/width of zero
    ## which cause singular sigma cov matrices
    #--------------------------------------------------------------------------------
    prob_grid = tf.where(tf.is_nan(prob_grid),  tf.zeros_like(prob_grid), prob_grid)


    # scatter out the probability distributions based on class --------------------------
    print('\n    Scatter out the probability distributions based on class --------------') 
    gauss_scatt   = tf.scatter_nd(pt2_ind, prob_grid, [batch_size, num_classes, rois_per_image, img_w, img_h])
    print('    pt2_ind shape   : ', pt2_ind.shape)  
    print('    prob_grid shape : ', prob_grid.shape)  
    print('    gauss_scatt     : ', gauss_scatt.shape)   # batch_sz , num_classes, num_rois, image_h, image_w
    
    # heatmap: sum gauss_scattered based on class ---------------------------------------
    print('\n    Reduce sum based on class ---------------------------------------------')         
    gauss_sum = tf.reduce_sum(gauss_scatt, axis=2, name='pred_heatmap2')
    gauss_sum = tf.where(gauss_sum > 1e-12, gauss_sum, tf.zeros_like(gauss_sum))
    
    print('    gaussian_sum shape     : ', gauss_sum.get_shape(), 'Keras tensor ', KB.is_keras_tensor(gauss_sum) )      
    
    ##---------------------------------------------------------------------------------------------
    ## heatmap L2 normalization
    ## Normalization using the  `gauss_sum` (batchsize , num_classes, height, width) 
    ## 17-05-2018 (New method, replace dthe previous method that usedthe transposed gauss sum
    ## 17-05-2018 Replaced with normalization across the CLASS axis 
    ##---------------------------------------------------------------------------------------------

    # print('\n    L2 normalization ------------------------------------------------------')   
    gauss_L2norm   = KB.l2_normalize(gauss_sum, axis = +1)   # normalize along the CLASS axis 
    print('    gauss L2 norm   : ', gauss_L2norm.shape   ,' Keras tensor ', KB.is_keras_tensor(gauss_L2norm) )

    print('\n    normalization ------------------------------------------------------')   
    gauss_norm    = gauss_sum / tf.reduce_max(gauss_sum, axis=[-2,-1], keepdims = True)
    gauss_norm    = tf.where(tf.is_nan(gauss_norm),  tf.zeros_like(gauss_norm), gauss_norm)
    print('    gauss norm   : ', gauss_norm.shape   ,' Keras tensor ', KB.is_keras_tensor(gauss_norm) )
    
    ##--------------------------------------------------------------------------------------------
    ## generate score based on gaussian using bounding box masks 
    ## NOTE: Score is generated on NORMALIZED gaussian distributions (GAUSS_NORM)
    ##       If want to do this on NON-NORMALIZED, we need to apply it on GAUSS_SUM
    ##--------------------------------------------------------------------------------------------
    # flatten guassian scattered and input_tensor, and pass on to build_bbox_score routine 
    in_shape = tf.shape(in_tensor)
    in_tensor_flattened  = tf.reshape(in_tensor, [-1, in_shape[-1]])
    bboxes = tf.to_int32(tf.round(in_tensor_flattened[...,0:4]))
    print('    in_tensor               ', in_tensor.shape)
    print('    in_tensorr_flattened is ', in_tensor_flattened.shape)
    print('    boxes shape             ', bboxes.shape)
    print('    Rois per image        : ', rois_per_image)


    #--------------------------------------------------------------------------------------------------------------------------
    # duplicate GAUSS_NORM <num_roi> times to pass along with bboxes to map_fn function
    #   Here we have a choice to calculate scores using the GAUSS_SUM (unnormalized) or GAUSS_NORM (normalized)
    #   after looking at the scores and ratios for each option, I decided to go with the normalized 
    #   as the numbers are larger
    #
    # Examples>
    #   Using GAUSS_SUM
    # [   3.660313    3.513489   54.475536   52.747402    1.          0.999997    4.998889 2450.          0.00204     0.444867]
    # [   7.135149    1.310972   50.020126   44.779854    1.          0.999991    4.981591 1892.          0.002633    0.574077]
    # [  13.401865    0.         62.258957   46.636948    1.          0.999971    4.957398 2303.          0.002153    0.469335]
    # [   0.          0.         66.42349    56.123024    1.          0.999908    4.999996 3696.          0.001353    0.294958]
    # [   0.          0.         40.78952    60.404335    1.          0.999833    4.586552 2460.          0.001864    0.406513]    
    #
    #   Using GAUSS_NORM:
    # [   3.660313    3.513489   54.475536   52.747402    1.          0.999997 1832.9218   2450.          0.748131    0.479411]
    # [   7.135149    1.310972   50.020126   44.779854    1.          0.999991 1659.3965   1892.          0.877059    0.56203 ]
    # [  13.401865    0.         62.258957   46.636948    1.          0.999971 1540.4974   2303.          0.668909    0.428645]
    # [   0.          0.         66.42349    56.123024    1.          0.999908 1925.3267   3696.          0.520922    0.333813]
    # [   0.          0.         40.78952    60.404335    1.          0.999833 1531.321    2460.          0.622488    0.398898]
    # 
    #  to change the source, change the following line gauss_norm <--> gauss_sum
    #---------------------------------------------------------------------------------------------------------------------------
    temp = tf.expand_dims(gauss_norm, axis =2)
    temp = tf.tile(temp, [1,1, rois_per_image ,1,1])
    temp_shape   = KB.int_shape(temp)
    temp_reshape = KB.reshape(temp, (-1, temp_shape[-2], temp_shape[-1]))
    print('    heatmap original shape  : ', gauss_norm.shape)
    print('    heatmap replicated      : ', temp_shape)
    print('    heatmap flattened       : ', temp_reshape.shape)

    scores = tf.map_fn(build_mask_routine, [temp_reshape, bboxes], dtype=tf.float32)


    # consider the two new columns for reshaping the gaussian_bbox_scores
    new_shape   = tf.shape(in_tensor)+ [0,0,0, tf.shape(scores)[-1]]        
    bbox_scores = tf.concat([in_tensor_flattened, scores], axis = -1)
    bbox_scores = tf.reshape(bbox_scores, new_shape)
    # print('    new shape is            : ', new_shape.eval())
    print('    in_tensor_flattened     : ', in_tensor_flattened.shape)
    print('    Scores shape            : ', scores.shape)   # [(num_batches x num_class x num_rois ), 3]
    print('    boxes_scores (rehspaed) : ', bbox_scores.shape)    

    ##--------------------------------------------------------------------------------------------
    ## Normalize computed score above, and add it to the heatmap_score tensor as last column
    ##--------------------------------------------------------------------------------------------
    scr_L2norm   = tf.nn.l2_normalize(bbox_scores[...,-1], axis = -1)   # shape (num_imgs, num_class, num_rois)
    scr_L2norm   = tf.expand_dims(scr_L2norm, axis = -1)

    ##--------------------------------------------------------------------------------------------
    # shape of tf.reduce_max(bbox_scores[...,-1], axis = -1, keepdims=True) is (num_imgs, num_class, 1)
    #  This is a regular normalization that moves everything between [0, 1]. 
    #  This causes negative values to move to -inf, which is a problem in FCN scoring. 
    # To address this a normalization between [-1 and +1] was introduced in FCN.
    # Not sure how this will work with training tho.
    ##--------------------------------------------------------------------------------------------
    scr_norm     = bbox_scores[...,-1]/ tf.reduce_max(bbox_scores[...,-1], axis = -1, keepdims=True)
    scr_norm     = tf.where(tf.is_nan(scr_norm),  tf.zeros_like(scr_norm), scr_norm)     
    
    #--------------------------------------------------------------------------------------------
    # this normalization moves values to [-1, +1] which we use in FCN, but not here. 
    #--------------------------------------------------------------------------------------------    
    # reduce_max = tf.reduce_max(bbox_scores[...,-1], axis = -1, keepdims=True)
    # reduce_min = tf.reduce_min(bbox_scores[...,-1], axis = -1, keepdims=True)  ## epsilon    = tf.ones_like(reduce_max) * 1e-7
    # scr_norm  = (2* (bbox_scores[...,-1] - reduce_min) / (reduce_max - reduce_min)) - 1     

    scr_norm     = tf.where(tf.is_nan(scr_norm),  tf.zeros_like(scr_norm), scr_norm)  
    scr_norm     = tf.expand_dims(scr_norm, axis = -1)                             # shape (num_imgs, num_class, 32, 1)
    bbox_scores  = tf.concat([bbox_scores, scr_norm, scr_L2norm], axis = -1)
    
    gauss_heatmap        = KB.identity(tf.transpose(gauss_sum,[0,2,3,1]), name = names[0])
    gauss_heatmap_norm   = KB.identity(tf.transpose(gauss_norm,[0,2,3,1]), name = names[0]+'_norm')
    gauss_heatmap_L2norm = KB.identity(tf.transpose(gauss_L2norm,[0,2,3,1]), name = names[0]+'_L2norm')
    gauss_scores         = KB.identity(bbox_scores, name = names[0]+'_scores') 
    
    print('    gauss_heatmap final shape : ', gauss_heatmap.shape   ,' Keras tensor ', KB.is_keras_tensor(gauss_heatmap) )  
    print('    gauss_scores  final shape : ', gauss_scores.shape ,' Keras tensor ', KB.is_keras_tensor(gauss_scores) )  
    print('    complete')

    return   gauss_heatmap_norm, gauss_scores, gauss_heatmap,gauss_heatmap_L2norm    # [gauss_sum, gauss_scatt, means, covar]    
Example #46
0
def pirank_deep_loss(labels,
                     logits,
                     features,
                     tau,
                     taustar,
                     ndcg_k,
                     top_k,
                     list_size,
                     merge_block_size,
                     ste,
                     straight_backprop,
                     full_loss,
                     tau_scheme,
                     seed,
                     depth=1):
    if not top_k:
        top_k = ndcg_k

    # Just here we want them the same
    assert top_k == ndcg_k

    assert merge_block_size >= top_k
    assert list_size % merge_block_size == 0
    assert list_size >= merge_block_size
    assert merge_block_size % top_k == 0

    div = merge_block_size // top_k
    with tf.name_scope("merge_nr_scope_depth_{}".format(depth)):
        is_label_valid = tfr.utils.is_label_valid(labels)
        labels = tf.where(is_label_valid,
                          labels,
                          tf.zeros_like(labels),
                          name='labels')
        logits = tf.where(
            is_label_valid,
            logits,
            -1e-6 * tf.ones_like(logits) +
            tf.reduce_min(input_tensor=logits, axis=1, keepdims=True),
            name='logits')

        if list_size > merge_block_size:
            # Merge top_k logits from previous layers of merge
            logits, phat_tops = map(
                list(
                    zip(*[
                        pirank_deep_loss(labels[:, i:(i + list_size // div)],
                                         logits[:, i:(i + list_size // div)],
                                         None,
                                         tau,
                                         taustar,
                                         ndcg_k,
                                         top_k,
                                         list_size // div,
                                         merge_block_size,
                                         ste,
                                         full_loss,
                                         depth=depth + 1)
                        for i in range(0, list_size, list_size // div)
                    ])).__getitem__, [0, 1])
            logits = tf.concat(list(logits), 1, name='merged_logits')
            phat_tops = list(phat_tops)
            if not full_loss:
                labels = tf.concat(phat_tops, 1, name='merged_labels')

        # Get P_hat
        logits = tf.expand_dims(logits, 2, name="logits_exp")
        labels = tf.expand_dims(labels, 2, name="labels_exp")
        tau = tau**depth if tau_scheme == 'square' else tau
        if ste or depth == 1:
            P_hat_backward = util.neuralsort(logits, tau)
            P_hat_backward = tf.identity(P_hat_backward, name="P_hat_backward")
            P_hat_forward = util.neuralsort(logits, taustar)
            P_hat = P_hat_backward + tf.stop_gradient(P_hat_forward -
                                                      P_hat_backward)
        else:
            P_hat = util.neuralsort(logits, tau)
        P_hat = tf.identity(P_hat, name="P_hat")

        phat_top = P_hat[:, :top_k, :]
        if full_loss and list_size > merge_block_size:
            # Do the recursive product
            if phat_tops is not None:
                res = []
                for i, pt in enumerate(phat_tops):
                    l, h = i * top_k, (i + 1) * top_k
                    res.append(tf.matmul(phat_top[:, :, l:h], pt))
                phat_top = tf.concat(res, 2)
                assert phat_top.shape[2] == list_size
        phat_top = tf.identity(phat_top, name='phat_top')

        if depth == 1:
            # Any deeper layers than last, return top_k
            label_powers = tf.pow(2.0,
                                  tf.cast(labels, dtype=tf.float32),
                                  name="label_powers") - 1.0
            sorted_powers = tf.matmul(phat_top,
                                      label_powers,
                                      name='sorted_powers')
            numerator = tf.reduce_sum(sorted_powers,
                                      axis=-1,
                                      name="dcg_numerator")
            position = tf.cast(tf.range(1, ndcg_k + 1),
                               dtype=tf.float32,
                               name="dcg_position")
            denominator = tf.math.log(position + 1, name="dcg_denominator")
            dcg = numerator / (1e-10 + denominator)
            dcg = tf.reduce_sum(input_tensor=dcg,
                                axis=1,
                                keepdims=True,
                                name="dcg")
            labels = tf.squeeze(labels, 2)
            ideal_sorted_labels, _ = tf.nn.top_k(labels, k=ndcg_k, sorted=True)
            numerator = tf.pow(2.0,
                               tf.cast(ideal_sorted_labels, dtype=tf.float32),
                               name="ideal_dcg_numerator") - 1.0
            ideal_dcg = numerator / (1e-10 + denominator)
            ideal_dcg = tf.reduce_sum(ideal_dcg,
                                      axis=1,
                                      keepdims=True,
                                      name="ideal_dcg")
            dcg = tf.where(tf.is_nan(dcg), tf.zeros_like(dcg), dcg)
            ideal_dcg = tf.where(tf.is_nan(ideal_dcg), tf.ones_like(ideal_dcg),
                                 ideal_dcg)
            ndcg = tf.reduce_sum(dcg) / (1e-10 + tf.reduce_sum(ideal_dcg))
            ndcg = tf.identity(ndcg, name='ndcg')
            return 1. - ndcg
        else:
            topk_logits = tf.matmul(phat_top, logits)
            if straight_backprop:
                topk_logits = tf.stop_gradient(topk_logits)
            topk_logits = tf.squeeze(topk_logits, 2, name="topk_logits")
            if not full_loss:
                topk_labels = tf.matmul(phat_top, labels)
                topk_labels = tf.squeeze(topk_labels, 2, name="topk_labels")
            return topk_logits, phat_top if full_loss else topk_labels
Example #47
0
def lrp_lstm(r_out, o, c, i, z, w_o, w_i, act_h, multiplication_rule, alpha=1, beta=0, o_min=None, i_min=None,
             c_min=None, z_min=None, o_max=None, i_max=None, c_max=None, z_max=None):
    """lrp applied to TeLL LSTMLayer
    
    Parameters
    -------
    r_out : tensor (batchsize, timesteps, units)
    o, c, i, z  : tensor (batchsize, timesteps, units)
    w_o, w_i : tensor (incoming, outgoing)
    act_h activation function after cell
    multiplication_rule : int
    0...50/50 rule; 1...proportional rule; 3...no multiplication rule, no relevance through recurrent gate connections
    """
    n_timesteps = tf.shape(r_out)[1]
    
    if multiplication_rule == 0:
        mul_rule = additive_multiplication_rule
    elif multiplication_rule == 1:
        mul_rule = proportional_multiplication_rule
    elif multiplication_rule == 3:
        mul_rule = None
    else:
        raise AttributeError("Only multiplication_rule 0 and 1 are implemented")
    
    if beta == 0:
        o_min = tf.reduce_min(o, axis=1)
        i_min = tf.reduce_min(i, axis=1)
        c_min = tf.reduce_min(c, axis=1)
        z_min = tf.reduce_min(z, axis=1)
        
        o_max = tf.reduce_max(o, axis=1)
        i_max = tf.reduce_max(i, axis=1)
        c_max = tf.reduce_max(c, axis=1)
        z_max = tf.reduce_max(z, axis=1)
    else:
        o_min = tf.reduce_mean(o, axis=1)
        i_min = tf.reduce_mean(i, axis=1)
        c_min = tf.reduce_mean(c, axis=1)
        z_min = tf.reduce_mean(z, axis=1)
        
        o_max = 1
        i_max = 1
        c_max = 1
        z_max = 1

    # Create an set initializations for dict with LRP variables
    lrp_keys = ['r_from_o', 'r_from_i', 'r_cc', 'r_y', 'r_cy', 'r_o', 'r_c', 'r_zi', 'r_z', 'r_i']
    zero = tf.constant(0, dtype=tf.int32)
    zero_init = tf.zeros_like(r_out[:, 0:1, :], tf.float32)
    lrp_dict = OrderedDict([(k, zero_init) for k in lrp_keys])
    
    with tf.name_scope("LRPRNNLoop"):
        # Create initial tensors
        init_tensors = OrderedDict([('t', n_timesteps-1)])
        init_tensors.update(lrp_dict)
        
        # Get initial tensor shapes in tf format
        init_shapes = OrderedDict([('t', init_tensors['t'].get_shape())])
        lrp_shapes = OrderedDict((k, tf.TensorShape(lrp_dict[k].get_shape().as_list()[:1] + [None] +
                                                    lrp_dict[k].get_shape().as_list()[2:])) for k in lrp_dict.keys())
        init_shapes.update(lrp_shapes)
    
    def cond(t, *args):
        return tf.greater(t, zero)

    def body(t, r_from_o, r_from_i, r_cc, r_y, r_cy, r_o, r_c, r_zi, r_z, r_i):
        #
        # for time t
        #
        if mul_rule is None:
            r_y = tf.concat([r_y, tf.expand_dims(r_out[:, t, :], axis=1)], axis=1)
            r_cy = tf.concat([r_cy, tf.expand_dims(r_y[:, -1, :], axis=1)], axis=1)
            r_o = tf.concat([r_o, zero_init], axis=1)

            r_c = tf.concat([r_c, tf.expand_dims(r_cy[:, -1, :] + r_cc[:, -1, :], axis=1)], axis=1)
            
            r_zi_new = tf.expand_dims(r_c[:, -1, :] * (i[:, t, :] * z[:, t, :] / c[:, t, :]), axis=1)
            r_zi = tf.concat([r_zi, tf.where(tf.is_nan(r_zi_new), zero_init, r_zi_new)], axis=1)

            r_z = tf.concat([r_z, tf.expand_dims(r_zi[:, -1, :], axis=1)], axis=1)
            r_i = tf.concat([r_i, zero_init], axis=1)
            
        else:
            r_y = tf.concat([r_y, tf.expand_dims(r_out[:, t, :] + r_from_o[:, -1, :] + r_from_i[:, -1, :], axis=1)],
                            axis=1)
            r_cy = tf.concat([r_cy, tf.expand_dims(mul_rule(act_h(c[:, t, :]), o[:, t, :], r_y[:, -1, :],
                                                            c_min, c_max, o_min, o_max), axis=1)], axis=1)
            r_o = tf.concat([r_o, tf.expand_dims(mul_rule(o[:, t, :], act_h(c[:, t, :]), r_y[:, -1, :],
                                                          o_min, o_max, c_min, c_max), axis=1)], axis=1)

            r_c = tf.concat([r_c, tf.expand_dims(r_cy[:, -1, :] + r_cc[:, -1, :], axis=1)], axis=1)
            
            r_zi_new = tf.expand_dims(r_c[:, -1, :] * (i[:, t, :] * z[:, t, :] / c[:, t, :]), axis=1)
            r_zi = tf.concat([r_zi, tf.where(tf.is_nan(r_zi_new), zero_init, r_zi_new)], axis=1)

            r_z = tf.concat([r_z, tf.expand_dims(mul_rule(z[:, t, :], i[:, t, :], r_zi[:, -1, :],
                                                          z_min, z_max, i_min, i_max), axis=1)], axis=1)
            r_i = tf.concat([r_i, tf.expand_dims(mul_rule(i[:, t, :], z[:, t, :], r_zi[:, -1, :],
                                                          i_min, i_max, z_min, z_max), axis=1)], axis=1)
        
        #
        # distribute R to units through recurrent connections
        #
        r_from_o_t = lrp(r=r_o[:, -1, :], w=w_o, x=o[:, t-1, :], x_min=o_min, alpha=alpha, beta=beta)
        r_from_o = tf.concat([r_from_o, tf.expand_dims(r_from_o_t, axis=1)], axis=1)
        
        r_from_i_t = lrp(r=r_i[:, -1, :], w=w_i, x=i[:, t-1, :], x_min=i_min, alpha=alpha, beta=beta)
        r_from_i = tf.concat([r_from_i, tf.expand_dims(r_from_i_t, axis=1)], axis=1)
        
        #
        # for time t-1
        #
        r_cc_new = tf.expand_dims(c[:, t-1, :] / c[:, t, :] * r_c[:, -1, :], axis=1)
        r_cc = tf.concat([r_cc, tf.where(tf.is_nan(r_cc_new), zero_init, r_cc_new)], axis=1)
        
        t -= 1
        
        return [t, r_from_o, r_from_i, r_cc, r_y, r_cy, r_o, r_c, r_zi, r_z, r_i]
    
    wl_ret = tf.while_loop(cond=cond, body=body, loop_vars=tuple(init_tensors.values()),
                           shape_invariants=tuple(init_shapes.values()), parallel_iterations=10,
                           back_prop=True, swap_memory=True)

    # Re-Associate returned tensors with keys
    r_collection = OrderedDict(zip(init_tensors.keys(), wl_ret))
    _ = r_collection.pop('t')
    
    # Remove artificial timestep at end of sequences (sequences are in reversed temporal order)
    for k in r_collection.keys():
        if k not in ['r_from_o', 'r_from_i', 'r_cc']:
            r_collection[k] = r_collection[k][:, 1:, :]
    
    #
    # for time t=0
    #
    t = 0
    if mul_rule is None:
        r_collection['r_y'] = tf.concat([r_collection['r_y'], tf.expand_dims(r_out[:, t, :], axis=1)], axis=1)
        r_collection['r_cy'] = tf.concat([r_collection['r_cy'], tf.expand_dims(r_collection['r_y'][:, -1, :], axis=1)], axis=1)
        r_collection['r_o'] = tf.concat([r_collection['r_o'], zero_init], axis=1)

        r_collection['r_c'] = tf.concat([r_collection['r_c'],
                                         tf.expand_dims(r_collection['r_cy'][:, -1, :] +
                                                        r_collection['r_cc'][:, -1, :], axis=1)], axis=1)

        r_collection['r_zi_new'] = tf.expand_dims(r_collection['r_c'][:, -1, :] *
                                                  (i[:, t, :] * z[:, t, :] / c[:, t, :]), axis=1)
        r_collection['r_zi'] = tf.concat([r_collection['r_zi'],
                                          tf.where(tf.is_nan(r_collection['r_zi_new']), zero_init,
                                                   r_collection['r_zi_new'])], axis=1)

        r_collection['r_z'] = tf.concat([r_collection['r_z'], tf.expand_dims(r_collection['r_zi'][:, -1, :], axis=1)],
                                        axis=1)
        r_collection['r_i'] = tf.concat([r_collection['r_i'], zero_init], axis=1)
        
    else:
        r_collection['r_y'] = tf.concat([r_collection['r_y'],
                                         tf.expand_dims(r_out[:, t, :] + r_collection['r_from_o'][:, -1, :] +
                                                        r_collection['r_from_i'][:, -1, :], axis=1)], axis=1)
        r_collection['r_cy'] = tf.concat([r_collection['r_cy'],
                                          tf.expand_dims(mul_rule(act_h(c[:, t, :]), o[:, t, :],
                                                                  r_collection['r_y'][:, -1, :],
                                                                  c_min, c_max, o_min, o_max), axis=1)], axis=1)
        r_collection['r_o'] = tf.concat([r_collection['r_o'],
                                         tf.expand_dims(mul_rule(o[:, t, :], act_h(c[:, t, :]),
                                                                 r_collection['r_y'][:, -1, :],
                                                                 o_min, o_max, c_min, c_max), axis=1)], axis=1)

        r_collection['r_c'] = tf.concat([r_collection['r_c'],
                                         tf.expand_dims(r_collection['r_cy'][:, -1, :] +
                                                        r_collection['r_cc'][:, -1, :], axis=1)], axis=1)

        r_zi_new = tf.expand_dims(r_collection['r_c'][:, -1, :] * (i[:, t, :] * z[:, t, :] / c[:, t, :]), axis=1)
        r_collection['r_zi'] = tf.concat([r_collection['r_zi'], tf.where(tf.is_nan(r_zi_new), zero_init, r_zi_new)],
                                         axis=1)

        r_collection['r_z'] = tf.concat([r_collection['r_z'],
                                         tf.expand_dims(mul_rule(z[:, t, :], i[:, t, :], r_collection['r_zi'][:, -1, :],
                                                                 z_min, z_max, i_min, i_max), axis=1)], axis=1)
        r_collection['r_i'] = tf.concat([r_collection['r_i'],
                                         tf.expand_dims(mul_rule(i[:, t, :], z[:, t, :], r_collection['r_zi'][:, -1, :],
                                                                 i_min, i_max, z_min, z_max), axis=1)], axis=1)
    
    
    # # Initialize input gate and output gate relevance with 0
    # r_from_o = [tf.zeros_like(r_out[:, 0, :], tf.float32)]  # r_o redistributed to the individual units in t-1
    # r_from_i = [tf.zeros_like(r_out[:, 0, :], tf.float32)]  # r_i redistributed to the individual units in t-1
    # r_cc = [tf.zeros_like(r_out[:, 0, :], tf.float32)]  # r_ct<-ct+1
    #
    # r_y = []
    # r_cy = []  # r_ct<-yt
    # r_o = []
    # r_c = []
    # r_zi = []
    # r_z = []
    # r_i = []
    # for t in rev_timesteps:
    #     #
    #     # for time t
    #     #
    #     if mul_rule is None:
    #         r_y.append(r_out[:, t, :])
    #         r_cy.append(r_y[-1])
    #         r_o.append(tf.zeros_like(r_y[-1]))
    #
    #         r_c.append(r_cy[-1] + r_cc[-1])
    #
    #         r_zi.append(r_c[-1] * (i[:, t, :] * z[:, t, :] / c[:, t, :]))
    #         r_zi[-1] = tf.where(tf.is_nan(r_zi[-1]), tf.zeros_like(r_zi[-1]), r_zi[
    #             -1])  # TODO: This only holds for all-positive case! Otherwise we will need to consider r_zi[-2] to assign either full R or 0
    #
    #         r_z.append(r_zi[-1])
    #         r_i.append(tf.zeros_like(r_zi[-1]))
    #
    #     else:
    #         r_y.append(r_out[:, t, :] + r_from_o[-1] + r_from_i[-1])
    #         r_cy.append(mul_rule(act_h(c[:, t, :]), o[:, t, :], r_y[-1], c_min, c_max, o_min, o_max))
    #         r_o.append(mul_rule(o[:, t, :], act_h(c[:, t, :]), r_y[-1], o_min, o_max, c_min, c_max))
    #
    #         r_c.append(r_cy[-1] + r_cc[-1])
    #
    #         r_zi.append(r_c[-1] * (i[:, t, :] * z[:, t, :] / c[:, t, :]))
    #         r_zi[-1] = tf.where(tf.is_nan(r_zi[-1]), tf.zeros_like(r_zi[-1]), r_zi[-1])  # TODO: This only holds for all-positive case! Otherwise we will need to consider r_zi[-2] to assign either full R or 0
    #
    #         r_z.append(mul_rule(z[:, t, :], i[:, t, :], r_zi[-1], z_min, z_max, i_min, i_max))
    #         r_i.append(mul_rule(i[:, t, :], z[:, t, :], r_zi[-1], i_min, i_max, z_min, z_max))
    #
    #     if t > 0:
    #         #
    #         # distribute R to units through recurrent connections
    #         #
    #         r_from_o_t = lrp(r=r_o[-1], w=w_o, x=o[:, t-1, :], x_min=o_min, alpha=alpha, beta=beta)
    #         r_from_o.append(r_from_o_t)
    #
    #         r_from_i_t = lrp(r=r_i[-1], w=w_i, x=i[:, t-1, :], x_min=i_min, alpha=alpha, beta=beta)
    #         r_from_i.append(r_from_i_t)
    #
    #         #
    #         # for time t-1
    #         #
    #         r_cc.append(c[:, t-1, :] / c[:, t, :] * r_c[-1])
    #         r_cc[-1] = tf.where(tf.is_nan(r_cc[-1]), tf.zeros_like(r_cc[-1]), r_cc[-1])  # TODO: This only holds for all-positive case!
    #
    # r_collection = dict(r_from_o=tf.stack(r_from_o, axis=1), r_from_i=tf.stack(r_from_i, axis=1),
    #                     r_cc=tf.stack(r_cc, axis=1), r_y=tf.stack(r_y, axis=1), r_cy=tf.stack(r_cy, axis=1),
    #                     r_o=tf.stack(r_o, axis=1), r_c=tf.stack(r_c, axis=1), r_zi=tf.stack(r_zi, axis=1),
    #                     r_z=tf.stack(r_z, axis=1), r_i=tf.stack(r_i, axis=1))
    
    # Relevance is stored with reversed time dimension - correct it
    r_collection = OrderedDict((k, v[:, ::-1, :]) for k, v in r_collection.items())
    
    return r_collection['r_z'], r_collection
Example #48
0
def interp_regular_1d_grid(x,
                           x_ref_min,
                           x_ref_max,
                           y_ref,
                           axis=-1,
                           fill_value='constant_extension',
                           fill_value_below=None,
                           fill_value_above=None,
                           grid_regularizing_transform=None,
                           name=None):
  """Linear `1-D` interpolation on a regular (constant spacing) grid.

  Given reference values, this function computes a piecewise linear interpolant
  and evaluates it on a new set of `x` values.

  The interpolant is built from `M` reference values indexed by one dimension
  of `y_ref` (specified by the `axis` kwarg).

  If `y_ref` is a vector, then each value `y_ref[i]` is considered to be equal
  to `f(x_ref[i])`, for `M` (implicitly defined) reference values between
  `x_ref_min` and `x_ref_max`:

  ```none
  x_ref[i] = x_ref_min + i * (x_ref_max - x_ref_min) / (M - 1),
  i = 0, ..., M - 1.
  ```

  If `rank(y_ref) > 1`, then `y_ref` contains `M` reference values of a
  `rank(y_ref) - 1` rank tensor valued function of one variable.
  `x_ref` is a `Tensor` of values of that variable (any shape allowed).

  Args:
    x: Numeric `Tensor` The x-coordinates of the interpolated output values.
    x_ref_min:  `Tensor` of same `dtype` as `x`.  The minimum value of the
      (implicitly defined) reference `x_ref`.
    x_ref_max:  `Tensor` of same `dtype` as `x`.  The maximum value of the
      (implicitly defined) reference `x_ref`.
    y_ref:  `N-D` `Tensor` (`N > 0`) of same `dtype` as `x`.
      The reference output values.
    axis:  Scalar `Tensor` designating the dimension of `y_ref` that indexes
      values of the interpolation variable.
      Default value: `-1`, the rightmost axis.
    fill_value:  Determines what values output should take for `x` values that
      are below `x_ref_min` or above `x_ref_max`.
      `Tensor` or one of the strings
        "constant_extension" ==> Extend as constant function.
        "extrapolate" ==> Extrapolate in a linear fashion.
      Default value: `"constant_extension"`
    fill_value_below:  Optional override of `fill_value` for `x < x_ref_min`.
    fill_value_above:  Optional override of `fill_value` for `x > x_ref_max`.
    grid_regularizing_transform:  Optional transformation `g` which regularizes
      the implied spacing of the x reference points.  In other words, if
      provided, we assume `g(x_ref_i)` is a regular grid between `g(x_ref_min)`
      and `g(x_ref_max)`.
    name:  A name to prepend to created ops.
      Default value: `"interp_regular_1d_grid"`.

  Returns:
    y_interp:  Interpolation between members of `y_ref`, at points `x`.
      `Tensor` of same `dtype` as `x`, and shape
      `y.shape[:axis] + x.shape + y.shape[axis + 1:]`

  Raises:
    ValueError:  If `fill_value` is not an allowed string.
    ValueError:  If `axis` is not a scalar.

  #### Examples

  Interpolate a function of one variable:

  ```python
  y_ref = tf.exp(tf.linspace(start=0., stop=10., 20))

  interp_regular_1d_grid(
      x=[6.0, 0.5, 3.3], x_ref_min=0., x_ref_max=1., y_ref=y_ref)
  ==> approx [exp(6.0), exp(0.5), exp(3.3)]
  ```

  Interpolate a matrix-valued function of one variable:

  ```python
  mat_0 = [[1., 0.], [0., 1.]]
  mat_1 = [[0., -1], [1, 0]]
  y_ref = [mat_0, mat_1]

  # Get three output matrices at once.
  tfp.math.interp_regular_1d_grid(
      x=[0., 0.5, 1.], x_ref_min=0., x_ref_max=1., y_ref=y_ref, axis=0)
  ==> [mat_0, 0.5 * mat_0 + 0.5 * mat_1, mat_1]
  ```

  Interpolate a function of one variable on a log-spaced grid:

  ```python
  x_ref = tf.exp(tf.linspace(tf.log(1.), tf.log(100000.), num_pts))
  y_ref = tf.log(x_ref + x_ref**2)

  interp_regular_1d_grid(x=[1.1, 2.2], x_ref_min=1., x_ref_max=100000., y_ref,
      grid_regularizing_transform=tf.log)
  ==> [tf.log(1.1 + 1.1**2), tf.log(2.2 + 2.2**2)]
  ```

  """

  with tf.name_scope(
      name,
      'interp_regular_1d_grid',
      values=[
          x, x_ref_min, x_ref_max, y_ref, axis, fill_value, fill_value_below,
          fill_value_above
      ]):

    # Arg checking.
    allowed_fv_st = ('constant_extension', 'extrapolate')
    for fv in (fill_value, fill_value_below, fill_value_above):
      if isinstance(fv, str) and fv not in allowed_fv_st:
        raise ValueError(
            'A fill value ({}) was not an allowed string ({})'.format(
                fv, allowed_fv_st))

    # Separate value fills for below/above incurs extra cost, so keep track of
    # whether this is needed.
    need_separate_fills = (
        fill_value_above is not None or fill_value_below is not None or
        fill_value == 'extrapolate'  # always requries separate below/above
    )
    if need_separate_fills and fill_value_above is None:
      fill_value_above = fill_value
    if need_separate_fills and fill_value_below is None:
      fill_value_below = fill_value

    axis = tf.convert_to_tensor(axis, name='axis', dtype=tf.int32)
    _assert_ndims_statically(axis, expect_ndims=0)
    axis = distribution_util.make_non_negative_axis(axis, tf.rank(y_ref))

    dtype = dtype_util.common_dtype([x, x_ref_min, x_ref_max, y_ref],
                                    preferred_dtype=tf.float32)
    x = tf.convert_to_tensor(x, name='x', dtype=dtype)
    x_ref_min = tf.convert_to_tensor(x_ref_min, name='x_ref_min', dtype=dtype)
    x_ref_max = tf.convert_to_tensor(x_ref_max, name='x_ref_max', dtype=dtype)
    y_ref = tf.convert_to_tensor(y_ref, name='y_ref', dtype=dtype)

    ny = tf.cast(tf.shape(y_ref)[axis], dtype)

    # Map [x_ref_min, x_ref_max] to [0, ny - 1].
    # This is the (fractional) index of x.
    if grid_regularizing_transform is None:
      g = lambda x: x
    else:
      g = grid_regularizing_transform
    fractional_idx = ((g(x) - g(x_ref_min)) / (g(x_ref_max) - g(x_ref_min)))
    x_idx_unclipped = fractional_idx * (ny - 1)

    # Wherever x is NaN, x_idx_unclipped will be NaN as well.
    # Keep track of the nan indices here (so we can impute NaN later).
    # Also eliminate any NaN indices, since there is not NaN in 32bit.
    nan_idx = tf.is_nan(x_idx_unclipped)
    x_idx_unclipped = tf.where(nan_idx, tf.zeros_like(x_idx_unclipped),
                               x_idx_unclipped)

    x_idx = tf.clip_by_value(x_idx_unclipped, tf.zeros((), dtype=dtype), ny - 1)

    # Get the index above and below x_idx.
    # Naively we could set idx_below = floor(x_idx), idx_above = ceil(x_idx),
    # however, this results in idx_below == idx_above whenever x is on a grid.
    # This in turn results in y_ref_below == y_ref_above, and then the gradient
    # at this point is zero.  So here we "jitter" one of idx_below, idx_above,
    # so that they are at different values.  This jittering does not affect the
    # interpolated value, but does make the gradient nonzero (unless of course
    # the y_ref values are the same).
    idx_below = tf.floor(x_idx)
    idx_above = tf.minimum(idx_below + 1, ny - 1)
    idx_below = tf.maximum(idx_above - 1, 0)

    # These are the values of y_ref corresponding to above/below indices.
    idx_below_int32 = tf.to_int32(idx_below)
    idx_above_int32 = tf.to_int32(idx_above)
    y_ref_below = tf.gather(y_ref, idx_below_int32, axis=axis)
    y_ref_above = tf.gather(y_ref, idx_above_int32, axis=axis)

    # out_shape = y_ref.shape[:axis] + x.shape + y_ref.shape[axis + 1:]
    out_shape = tf.shape(y_ref_below)

    # Return a convex combination.
    t = x_idx - idx_below

    t = _expand_ends(t, out_shape, axis)

    y = t * y_ref_above + (1 - t) * y_ref_below

    # Now begins a long excursion to fill values outside [x_min, x_max].

    # Re-insert NaN wherever x was NaN.
    y = tf.where(
        _expand_ends(nan_idx, out_shape, axis, broadcast=True),
        tf.fill(tf.shape(y), tf.constant(np.nan, y.dtype)), y)

    x_idx_unclipped = _expand_ends(
        x_idx_unclipped, out_shape, axis, broadcast=True)

    if not need_separate_fills:
      if fill_value == 'constant_extension':
        pass  # Already handled by clipping x_idx_unclipped.
      else:
        y = tf.where((x_idx_unclipped < 0) | (x_idx_unclipped > ny - 1),
                     fill_value + tf.zeros_like(y), y)
    else:
      # Fill values below x_ref_min <==> x_idx_unclipped < 0.
      if fill_value_below == 'constant_extension':
        pass  # Already handled by the clipping that created x_idx_unclipped.
      elif fill_value_below == 'extrapolate':
        y_0 = tf.gather(y_ref, tf.zeros(tf.shape(x), dtype=tf.int32), axis=axis)
        y_1 = tf.gather(y_ref, tf.ones(tf.shape(x), dtype=tf.int32), axis=axis)
        x_delta = (x_ref_max - x_ref_min) / (ny - 1)
        x_factor = (x - x_ref_min) / x_delta
        x_factor = _expand_ends(x_factor, out_shape, axis, broadcast=True)
        y = tf.where(x_idx_unclipped < 0, y_0 + x_factor * (y_1 - y_0), y)
      else:
        y = tf.where(x_idx_unclipped < 0, fill_value_below + tf.zeros_like(y),
                     y)
      # Fill values above x_ref_min <==> x_idx_unclipped > ny - 1.
      if fill_value_above == 'constant_extension':
        pass  # Already handled by the clipping that created x_idx_unclipped.
      elif fill_value_above == 'extrapolate':
        ny_int32 = tf.shape(y_ref)[axis]
        y_n1 = tf.gather(y_ref, tf.fill(tf.shape(x), ny_int32 - 1), axis=axis)
        y_n2 = tf.gather(y_ref, tf.fill(tf.shape(x), ny_int32 - 2), axis=axis)
        x_delta = (x_ref_max - x_ref_min) / (ny - 1)
        x_factor = (x - x_ref_max) / x_delta
        x_factor = _expand_ends(x_factor, out_shape, axis, broadcast=True)
        y = tf.where(x_idx_unclipped > ny - 1,
                     y_n1 + x_factor * (y_n1 - y_n2), y)
      else:
        y = tf.where(x_idx_unclipped > ny - 1,
                     fill_value_above + tf.zeros_like(y), y)

    return y
Example #49
0
 def lrp_lstm_c(r_out, o, c, i, z, w_o, w_i, act_h, n_timesteps, multiplication_rule):
     """lrp applied to TeLL LSTMLayer
 
     Parameters
     -------
     r_out : tensor (batchsize, timesteps, units)
     o, c, i, z  : tensor (batchsize, timesteps, units)
     w_o, w_i : tensor (incoming, outgoing)
     act_h activation function after cell
     multiplication_rule : int
     0...50/50 rule; 1...proportional rule; 3...no multiplication rule, no relevance through recurrent gate connections
     """
     if multiplication_rule == 0:
         mul_rule = additive_multiplication_rule
     elif multiplication_rule == 1:
         mul_rule = proportional_multiplication_rule
     elif multiplication_rule == 3:
         mul_rule = None
     else:
         raise AttributeError("Only multiplication_rule 0 and 1 are implemented")
     
     # Initialize input gate and output gate relevance with 0
     r_from_o = [tf.zeros_like(r_out[:, 0, :], tf.float32)]  # r_o redistributed to the individual units in t-1
     r_from_i = [tf.zeros_like(r_out[:, 0, :], tf.float32)]  # r_i redistributed to the individual units in t-1
     r_cc = [tf.zeros_like(r_out[:, 0, :], tf.float32)]  # r_ct<-ct+1
     
     r_y = []
     r_cy = []  # r_ct<-yt
     r_o = []
     r_c = []
     r_zi = []
     r_z = []
     r_i = []
     ttt = []
     
     rev_timesteps = np.arange(n_timesteps)[::-1]
     for t in rev_timesteps:
         #
         # for time t
         #
         ttt.append(r_out[:, t, :])
         if mul_rule is None:
             r_y.append(r_out[:, t, :])
             r_cy.append(r_y[-1])
             r_o.append(tf.zeros_like(r_y[-1]))
             
             r_c.append(r_cy[-1] + r_cc[-1])
             
             r_zi.append(r_c[-1] * (i[:, t, :] * z[:, t, :] / c[:, t, :]))
             r_zi[-1] = tf.where(tf.is_nan(r_zi[-1]), tf.zeros_like(r_zi[-1]), r_zi[
                 -1])  # TODO: This only holds for all-positive case! Otherwise we will need to consider r_zi[-2] to assign either full R or 0
             
             r_z.append(r_zi[-1])
             r_i.append(tf.zeros_like(r_zi[-1]))
         
         else:
             r_y.append(r_out[:, t, :] + r_from_o[-1] + r_from_i[-1])
             r_cy.append(mul_rule(act_h(c[:, t, :]), o[:, t, :], r_y[-1], c_min, c_max, o_min, o_max))
             r_o.append(mul_rule(o[:, t, :], act_h(c[:, t, :]), r_y[-1], o_min, o_max, c_min, c_max))
             
             r_c.append(r_cy[-1] + r_cc[-1])
             
             r_zi.append(r_c[-1] * (i[:, t, :] * z[:, t, :] / c[:, t, :]))
             r_zi[-1] = tf.where(tf.is_nan(r_zi[-1]), tf.zeros_like(r_zi[-1]), r_zi[
                 -1])  # TODO: This only holds for all-positive case! Otherwise we will need to consider r_zi[-2] to assign either full R or 0
             
             r_z.append(mul_rule(z[:, t, :], i[:, t, :], r_zi[-1], z_min, z_max, i_min, i_max))
             r_i.append(mul_rule(i[:, t, :], z[:, t, :], r_zi[-1], i_min, i_max, z_min, z_max))
         
         if t > 0:
             #
             # distribute R to units through recurrent connections
             #
             r_from_o_t = lrp(r=r_o[-1], w=w_o, x=o[:, t - 1, :], x_min=o_min, alpha=alpha, beta=beta)
             r_from_o.append(r_from_o_t)
             
             r_from_i_t = lrp(r=r_i[-1], w=w_i, x=i[:, t - 1, :], x_min=i_min, alpha=alpha, beta=beta)
             r_from_i.append(r_from_i_t)
             
             #
             # for time t-1
             #
             r_cc.append(c[:, t - 1, :] / c[:, t, :] * r_c[-1])
             r_cc[-1] = tf.where(tf.is_nan(r_cc[-1]), tf.zeros_like(r_cc[-1]),
                                 r_cc[-1])  # TODO: This only holds for all-positive case!
     
     r_collection = dict(r_from_o=tf.stack(r_from_o, axis=1), r_from_i=tf.stack(r_from_i, axis=1),
                         r_cc=tf.stack(r_cc, axis=1), r_y=tf.stack(r_y, axis=1), r_cy=tf.stack(r_cy, axis=1),
                         r_o=tf.stack(r_o, axis=1), r_c=tf.stack(r_c, axis=1), r_zi=tf.stack(r_zi, axis=1),
                         r_z=tf.stack(r_z, axis=1), r_i=tf.stack(r_i, axis=1), ttt=tf.stack(ttt, axis=1))
     
     # Relevance is stored with reversed time dimension - correct it
     r_collection = dict((k, v[:, ::-1, :]) for k, v in r_collection.items())
     
     return r_collection['r_z'], r_collection
Example #50
0
    def get_features_labels(self, sample_list, offset):
        '''get features and labels from the samples'''

        all_images = []
        all_labels = []

        for sample in sample_list:
            file_name = sample[0]
            for example in tf.python_io.tf_record_iterator(file_name):
                img_geom = self.iad_dimensions[str(self.layer)]
                features = dict()
                features['label'] = tf.FixedLenFeature((), tf.int64)

                features['img/{:02d}'.format(self.layer)] = tf.FixedLenFeature(
                    (), tf.string)
                features['num_rows/{:02d}'.format(
                    self.layer)] = tf.FixedLenFeature((), tf.int64)
                features['num_columns/{:02d}'.format(
                    self.layer)] = tf.FixedLenFeature((), tf.int64)

                parsed_features = tf.parse_single_example(example, features)
                num_rows = parsed_features['num_rows/{:02d}'.format(
                    self.layer)]
                num_columns = parsed_features['num_columns/{:02d}'.format(
                    self.layer)]

                # decode the image, get label
                img = tf.decode_raw(
                    parsed_features['img/{:02d}'.format(self.layer)],
                    tf.float32)
                img = tf.where(tf.is_nan(img), tf.zeros_like(img), img)
                img = tf.clip_by_value(img, 0.0, 1.0)
                #img = tf.subtract(img, 0.5)

                img = tf.reshape(img, (num_rows, num_columns, 1),
                                 "parse_reshape_test")
                print("img shape = %s" % img.get_shape())

                # random slice of the image
                #img = tf.random_crop(img, [img_geom[0], img_geom[1], 1])
                #column_offsets = list(range(num_columns))[::img_geom[1]]
                column_offsets = tf.range(0,
                                          num_columns - img_geom[1],
                                          delta=img_geom[1])

                # determine the offset for the IAD slice
                if offset == -1:
                    # select a random IAD slice
                    start_column = tf.cast(
                        tf.random_shuffle(column_offsets)[0], dtype=tf.int32)
                    new_offset = -1
                elif offset == -2:
                    start_column = 0
                    new_offset = -2
                else:
                    start_column = offset
                    new_offset = offset + img_geom[1]
                    if new_offset > img_geom[1]:
                        new_offset = 0

                # slice the image
                img = tf.slice(img, [0, start_column, 0],
                               [img_geom[0], img_geom[1], img_geom[2]])
                print("slice shape = %s" % img.get_shape())

                # get a random slice of the image, use column offsets
                #column_offsets = list(range(num_columns))[::img_geom[1]]
                #start_column = random.choice(column_offsets)
                #img = tf.slice(img, [0, start_column, 0], [img_geom[0], img_geom[1], img_geom[2]])

                #if NORMALIZE_IMAGE:
                #    img = tf.image.per_image_standardization(img)

                label = tf.cast(parsed_features['label'], tf.int64)
                label = tf.one_hot(label,
                                   depth=self.num_classes,
                                   dtype=tf.int32)

                all_images.append(img)
                all_labels.append(label)

        # convert list to ndarray
        all_images = np.array(all_images)
        all_labels = np.array(all_labels)

        return all_images, all_labels, new_offset
Example #51
0
 def depthLoss(self, y_true, y_pred):
     diff = tf.where(tf.is_nan(y_true), tf.zeros_like(y_true),
                     y_true - y_pred)
     mean = tf.sqrt(tf.reduce_mean(tf.square(diff)))
     return mean
Example #52
0
def replace_nan_values(gt):
    with tf.variable_scope('replace_nan'):
        gt = tf.where(tf.is_nan(gt), tf.zeros_like(gt), gt)
        return gt
    def tf_get_layer_distance(self, r_0, r_1, v, d):
        """
        Calculates the travel distance in each layer for each photon.

        Parameters
        ----------
        r_0 : TF Tensor, shape(?, 3)
            Photon starting positions (scattering point).
        r_1 : TF Tensor, shape(?, 3)
            Photon end positions after (next scattering or hit)>
        v : TF Tensor, shape(?, 3) or None
            Normalized direction vectors r_1 - r_0. Redundant but since it is
            already calculated before it should be passed and not calculated
            again.
        d : TF Tensor, shape(?)
            The distance between r_1 and r_0. Also redundant but already known
            beforehand.

        Returns
        -------
        TF Tensor of shape(?, N_layers) where each entry is the traveled
        distance of the corresponding photon in the corresponding layer.
        """
        # grab z coordinates from start and end vectors, make sure z_0 < z_1
        z_0 = tf.where(r_0[:, 2] < r_1[:, 2], r_0[:, 2], r_1[:, 2])
        z_1 = tf.where(r_0[:, 2] > r_1[:, 2], r_0[:, 2], r_1[:, 2])

        # initialize the distance vector (traveled distance in each layer)
        d_z = tf.zeros([settings.BATCH_SIZE, self.N_layer],
                       dtype=settings.FLOAT_PRECISION)

        # expand and tile for where
        z_0 = tf.tile(tf.expand_dims(z_0, 1), [1, self.N_layer])
        z_1 = tf.tile(tf.expand_dims(z_1, 1), [1, self.N_layer])

        z_l = tf.tile(tf.expand_dims(self._z_l, 0), [settings.BATCH_SIZE, 1])
        z_h = tf.tile(tf.expand_dims(self._z_h, 0), [settings.BATCH_SIZE, 1])

        # completely traversed layers
        d_z += tf.where(tf.logical_and(z_l > z_0, z_h < z_1),
                        self.dz * tf.ones_like(d_z), tf.zeros_like(d_z))

        # starting layer
        d_z += tf.where(tf.logical_and(z_l < z_0, z_h > z_0), z_h - z_0,
                        tf.zeros_like(d_z))

        # last layer
        d_z += tf.where(tf.logical_and(z_l < z_1, z_h > z_1), z_1 - z_l,
                        tf.zeros_like(d_z))

        # rescale to real direction, since v is normalized the dot product and
        # therefore cos of the angle is simply the z component of v
        d_layer = d_z / tf.expand_dims(tf.abs(v[:, 2]), 1)

        # OR only in one layer
        d_layer = tf.where(
            tf.logical_and(tf.logical_and(z_l < z_0, z_h > z_0),
                           tf.logical_and(z_l < z_1, z_h > z_1)),
            tf.tile(tf.expand_dims(d, 1), [1, self.N_layer]), d_layer)

        # quick & dirty nan protection...
        d_layer = tf.where(tf.is_nan(d_layer), tf.zeros_like(d_layer), d_layer)

        return d_layer
Example #54
0
def train(sess, net, is_training, keep_prob, train_layers=None, fine_tune=None):

    if not os.path.exists(FLAGS.train_dir):
        os.makedirs(FLAGS.train_dir)
    
    coord = tf.train.Coordinator()
    reader = load_images(coord, FLAGS.data_dir)
    corpus_size = reader.corpus_size
    #import IPython; IPython.embed()
    if FLAGS.in_memory:
        X, Y_true = load_data_to_memory(FLAGS.data_dir,pattern='*.npy', limit=1000000, dshape=SP2_BOX)
        corpus_size = Y_true.size
    if fine_tune is not None:
        train_batch_pipe, label_pipe = reader.dequeue(FLAGS.batch_size/2)
        tX, tY_true = load_data_to_memory(FLAGS.tune_dir,pattern='*.npy', limit=100000, dshape=SP2_BOX)
        tune_size = tX.shape[0]
    else:
        train_batch_pipe, label_pipe = reader.dequeue(FLAGS.batch_size)
    train_batch = tf.placeholder(reader.tfdtype, name='train_placeholder', shape=[None, SP2_BOX[0], SP2_BOX[1], SP2_BOX[2]])
    labels = tf.placeholder(dtype=reader.label_type, shape=[None], name='label_placeholder')
    if False:
        train_batch = tf.clip_by_value(train_batch, -1, 1)
    if False: #single image normalization
        mean, var = tf.nn.moments(train_batch**2, [1], keep_dims=True)
        train_batch /= tf.sqrt(mean)
    if False:
        mean, var = tf.nn.moments(input_placeholder, [1], keep_dims=True) #single image normalization
        train_batch = tf.div(tf.subtract(input_placeholder, mean), tf.sqrt(var))
        train_batch = tf.where(tf.is_nan(train_batch), tf.zeros_like(train_batch), train_batch)
        train_batch = tf.nn.avg_pool(train_batch, 
                                ksize=[1, FLAGS.tpool_chan, FLAGS.pool_chan, 1],
                                strides=[1, FLAGS.tpool_chan, FLAGS.pool_chan, 1],
                                padding='SAME')
        if FLAGS.crop:
            train_batch = tf.image.crop_and_resize(train_batch,
                                                 boxes=[SP2_BOX])

    if train_batch.dtype != tf.float32:
        train_batch = tf.cast(train_batch, tf.float32)
    if FLAGS.relu_input == 'relu':
        train_batch = tf.nn.relu(train_batch) 
    elif FLAGS.relu_input == 'lrelu':
        train_batch = lrelu(train_batch, alpha=0.2)

    global_step = tf.get_variable('global_step', [],
                                  initializer=tf.constant_initializer(0),
                                  trainable=False)
    val_step = tf.get_variable('val_step', [],
                                  initializer=tf.constant_initializer(0),
                                  trainable=False)

    
    logits = net.inference(train_batch, name='logits')
    #import IPython; IPython.embed() 
    loss_ = net.loss(logits, labels, name='weather_loss')
    predictions = tf.nn.softmax(logits, name='output')
    #import IPython; IPython.embed()
    top1_error = top_k_error(predictions, labels, 1)

    ema = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
    # loss_avg
    tf.add_to_collection(UPDATE_OPS_COLLECTION, ema.apply([loss_]))
    tf.summary.scalar('loss_avg', ema.average(loss_))

    # validation stats
    ema = tf.train.ExponentialMovingAverage(0.99, val_step)
    val_op = tf.group(val_step.assign_add(1), ema.apply([top1_error]))
    top1_error_avg = ema.average(top1_error)
    tf.summary.scalar('val_top1_error_avg', top1_error_avg)
    learning_rate = tf.placeholder(tf.float32, [], name='learning_rate')
    tf.summary.scalar('learning_rate', learning_rate)
    ###
    opt = tf.train.MomentumOptimizer(learning_rate, MOMENTUM, use_nesterov=True)
    all_grads = opt.compute_gradients(loss_)

    if not FLAGS.resume or train_layers is None:
        grads = all_grads
    else:
        grads = []
        layer_names = ['fc']
        if len(train_layers) > 0:
            layer_names += ["scale{}".format(i) for i in train_layers]
        for grad, var in all_grads:
            if any([n in var.name for n in layer_names]):
                grads.append([grad, var])

    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
    for grad, var in grads:
        if "weight" in var.name and grad is not None and not FLAGS.minimal_summaries:
            dims = len(grad.get_shape())
            grad_per_feat = tf.reduce_mean(grad, reduction_indices=range(dims), name="avg_pool")
            tf.summary.histogram(var.op.name + '/gradients/', grad)
            tf.summary.histogram(var.op.name + '/gradients_per_feat/', grad_per_feat)

    if not FLAGS.minimal_summaries and False:
        # Display the training images in the visualizer.
        #tf.image_summary('images', images)

        for var in tf.trainable_variables():
            tf.summary.histogram(var.op.name, var)

    batchnorm_updates = tf.get_collection(UPDATE_OPS_COLLECTION)
    batchnorm_updates_op = tf.group(*batchnorm_updates)
    train_op = tf.group(apply_gradient_op, batchnorm_updates_op)

    saver = tf.train.Saver(tf.global_variables())

    summary_op = tf.summary.merge_all()

    init = tf.global_variables_initializer()
    nparams = 0
    for v in tf.global_variables():
        #sh = np.asarray(v.get_shape()).astype(np.float)
        if len(v.get_shape())>0:
            #print(v.name, int(np.prod(v.get_shape())))
            nparams += int(np.prod(v.get_shape()))
    print("Number of parameters in network", nparams)
    #import IPython; IPython.embed()
    sess.run(init)

    summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph)

    if FLAGS.resume:
        latest = tf.train.latest_checkpoint(FLAGS.train_dir)
        if not latest:
            print("No checkpoint to continue from in", FLAGS.train_dir)
            sys.exit(1)
        print("resume", latest)
        saver.restore(sess, latest)

    if not FLAGS.in_memory: 
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        reader.start_threads(sess)
    try:
        for epoch in xrange(FLAGS.epoch):
            if FLAGS.in_memory:
                inds = np.arange(corpus_size)
                np.random.shuffle(inds)
                X, Y_true = X[inds], Y_true[inds]
            if epoch == 60:
                FLAGS.learning_rate /=  10. 
            if FLAGS.num_per_epoch:
                batch_idx = min(FLAGS.num_per_epoch, corpus_size) // FLAGS.batch_size
            else:
                batch_idx = corpus_size // FLAGS.batch_size
            for idx in xrange(batch_idx):
                start_time = time.time()

                step = sess.run(global_step)
                i = [train_op, loss_]

                write_summary = step % 100 and step > 1
                if write_summary:
                    i.append(summary_op)

                if FLAGS.in_memory:
                    inds = np.random.choice(np.arange(corpus_size), size=FLAGS.batch_size)
                    batch, batch_labels = X[inds], Y_true[inds]
                else:
                    batch, batch_labels = sess.run([train_batch_pipe, label_pipe])
                if fine_tune is not None:
                    inds = np.random.choice(np.arange(tune_size), size=FLAGS.batch_size/2)
                    tbatch, tlabels = tX[inds], tY_true[inds]
                    batch = np.vstack([batch, tbatch])
                    batch_labels = np.concatenate([batch_labels, tlabels])
                #import IPython; IPython.embed()
                o = sess.run(i, { train_batch:batch, labels:batch_labels, is_training: True, keep_prob: 0.5, learning_rate: FLAGS.learning_rate })
                #import IPython; IPython.embed()

                loss_value = o[1]
                duration = time.time() - start_time
                assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

                if step % 10 == 0:
                    examples_per_sec = FLAGS.batch_size / float(duration)
                    format_str = ('Epoch %d, [%d / %d], loss = %.2f (%.1f examples/sec; %.3f '
                                  'sec/batch)')
                    print(format_str % (epoch, idx, batch_idx, loss_value, examples_per_sec, duration))

                if write_summary:
                    summary_str = o[2]
                    summary_writer.add_summary(summary_str, step)

                # Save the model checkpoint periodically.
                if step > 1 and step % 500 == 0:
                    checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step=global_step)

                # Run validation periodically
                if step % 100 == 0:
                    _, top1_error_value, y_true, y_pred = sess.run([val_op, top1_error, labels, predictions], {train_batch:batch, labels:batch_labels, is_training: False, keep_prob: 1})
                    #pp, ll = sess.run([predictions, labels], {is_training:False})
                    #print('Predictions: ', pp)
                    #print('labels: ', ll)
                    y_pred = np.argmax(y_pred, axis=1)
                    print(sklearn.metrics.classification_report(y_true,y_pred))
                    print(sklearn.metrics.confusion_matrix(y_true,y_pred))
                    print("Classification accuracy: %0.6f" % sklearn.metrics.accuracy_score(y_true,y_pred) )
                    print('weather top1 error {}'.format(top1_error_value))

    except KeyboardInterrupt:
        # Introduce a line break after ^C is displayed so save message
        # is on its own line.
        print()
        #G
    finally:
        print('Finished, output see {}'.format(FLAGS.train_dir))
        if not FLAGS.in_memory:
            coord.request_stop()
            coord.join(threads)
Example #55
0
    def __init__(self,
                 batch_size=32,
                 length_data=3000,
                 n_channel=3,
                 is_training=True,
                 model_name="wavenet"):
        n_dim = 128
        self.graph = tf.Graph()
        self.model_name = model_name
        with self.graph.as_default():
            self.is_training = is_training
            self.input_data = tf.placeholder(
                dtype=tf.float32, shape=[batch_size, length_data, n_channel])
            self.label_p = tf.placeholder(dtype=tf.int32,
                                          shape=[batch_size, length_data])
            self.label_s = tf.placeholder(dtype=tf.int32,
                                          shape=[batch_size, length_data])
            self.label_pt = tf.placeholder(dtype=tf.float32,
                                           shape=[batch_size, length_data, 1])
            self.label_st = tf.placeholder(dtype=tf.float32,
                                           shape=[batch_size, length_data, 1])
            self.weight_p = tf.placeholder(dtype=tf.float32,
                                           shape=[batch_size, length_data])
            self.weight_s = tf.placeholder(dtype=tf.float32,
                                           shape=[batch_size, length_data])
            self.weight_pt = tf.placeholder(dtype=tf.float32,
                                            shape=[batch_size, length_data])
            self.weight_st = tf.placeholder(dtype=tf.float32,
                                            shape=[batch_size, length_data])

            if model_name == "wavenet":
                net = model.wavenet(self.input_data, is_training=is_training)
            elif model_name == "unet":
                net = model.unet(self.input_data, is_training=is_training)
            elif model_name == "brnn":
                net = model.brnn(self.input_data, is_training=is_training)
            elif model_name == "inception":
                net = model.inception(self.input_data, is_training=is_training)
            else:
                raise "Model name error"
            with tf.variable_scope('logit_p'):
                self.logit_p = tf.layers.conv1d(net,
                                                2,
                                                3,
                                                activation=None,
                                                padding="same")
            with tf.variable_scope('logit_s'):
                self.logit_s = tf.layers.conv1d(net,
                                                2,
                                                3,
                                                activation=None,
                                                padding="same")
            with tf.variable_scope('time_p'):
                self.times_p = tf.layers.conv1d(net,
                                                1,
                                                3,
                                                activation=None,
                                                padding="same")
            with tf.variable_scope('time_s'):
                self.times_s = tf.layers.conv1d(net,
                                                1,
                                                3,
                                                activation=None,
                                                padding="same")
            loss_p = tf.contrib.seq2seq.sequence_loss(self.logit_p,
                                                      self.label_p,
                                                      self.weight_p)
            loss_s = tf.contrib.seq2seq.sequence_loss(self.logit_s,
                                                      self.label_s,
                                                      self.weight_s)
            loss_tp = tf.reduce_mean(
                tf.reduce_sum(tf.squeeze(
                    (self.label_pt - self.times_p)**2) * self.weight_pt,
                              axis=1))
            loss_ts = tf.reduce_mean(
                tf.reduce_sum(tf.squeeze(
                    (self.label_st - self.times_s)**2) * self.weight_st,
                              axis=1))
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            reg_loss = tf.losses.get_regularization_loss()
            with tf.control_dependencies(update_ops):
                self.loss = loss_p * 1 + loss_s * 1 + loss_tp * 1 + loss_ts * 1 + 1e-6 * reg_loss

            # optimizer
            optimizer = tf.train.AdamOptimizer()
            self.optimize = optimizer.minimize(self.loss)

            self.logit_loss = loss_p + loss_s
            self.times_loss = loss_tp + loss_ts
            self.nan = tf.is_nan(self.loss)
            self.inf = tf.is_inf(self.loss)
            self.all_var = tf.trainable_variables()
            self.init = tf.global_variables_initializer()
            self.saver = tf.train.Saver()

            for itr in self.all_var:
                print(itr.name, itr.get_shape())
        self.summary = tf.summary.FileWriter("logdir", graph=self.graph)
Example #56
0
def nan_mask(gt):
    with tf.variable_scope('remove_nan'):
        nan_mask = tf.where(tf.is_nan(gt), tf.zeros_like(gt), tf.ones_like(gt))
    return nan_mask
Example #57
0
 def replace_none(self, t):
     """
     This method replaces None with 0.
     This can be used for sampling.  If sampling None, the viewer turns black and does not recover.
     """
     return tf.where(tf.is_nan(t),tf.zeros_like(t),t)
Example #58
0
def predict(sess, net, is_training, keep_prob, prefix='test_', append=False, from_fil=True):

    if not os.path.exists(FLAGS.train_dir):
        os.makedirs(FLAGS.train_dir)


    coord = tf.train.Coordinator()
    if from_fil:
        reader = load_filterbank(coord, FLAGS.fbfilename)
        test_batch, img_id = reader.dequeue(FLAGS.batch_size)


        
        if True:
            mean, var = tf.nn.moments(test_batch, [1], keep_dims=True) #single image normalization
            test_batch = tf.div(tf.subtract(test_batch, mean), tf.sqrt(var))
            test_batch = tf.where(tf.is_nan(test_batch), tf.zeros_like(test_batch), test_batch)
            test_batch = tf.nn.avg_pool(test_batch, 
                                    ksize=[1, FLAGS.tpool_chan, FLAGS.pool_chan, 1],
                                    strides=[1, FLAGS.tpool_chan, FLAGS.pool_chan, 1],
                                    padding='SAME')
            if FLAGS.crop:
                test_batch = tf.image.crop_and_resize(test_batch,
                                                     boxes=[SP2_BOX])


    else:
        reader = load_images(coord, FLAGS.data_dir, train=False)
        test_batch, img_id, _ = reader.dequeue(FLAGS.batch_size)
    if False:
        mean, var = tf.nn.moments(test_batch**2, [1], keep_dims=True)
        test_batch /= tf.sqrt(mean)

    corpus_size = reader.corpus_size

    global_step = tf.get_variable('global_step', [],
                                  initializer=tf.constant_initializer(0),
                                  trainable=False)

    logits = net.inference(test_batch)
    wpred = tf.nn.softmax(logits)

    
    init = tf.global_variables_initializer()
    sess.run(init)
    saver = tf.train.Saver(tf.global_variables())
    latest = tf.train.latest_checkpoint(FLAGS.train_dir)
    if not latest:
        print("No checkpoint to continue from in", FLAGS.train_dir)
        sys.exit(1)
    print("resume", latest)
    saver.restore(sess, latest)
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    reader.start_threads(sess)
    #import IPython; IPython.embed()
    sample_cnt = 0; add_cnt = FLAGS.batch_size
    OUTDIR = FLAGS.train_dir+'pos_5/'
    OUTFILE = FLAGS.train_dir+'pos_5.csv'
    if not append:
        outfile = open(OUTFILE, 'w')
        #outfile.write("image_name,tags\n")
    else:
        outfile = open(OUTFILE, 'a')
    detections = []
    persistent = False
    #OUTDIR = FLAGS.train_dir+'/frb20180301_3/'
    if not os.path.exists(OUTDIR):
        os.makedirs(OUTDIR)
    try:
        while True:
            print('from train', sample_cnt, corpus_size)
            if sample_cnt + FLAGS.batch_size*4 > corpus_size-1:
                break
            start_time = time.time()
            weather_scores, image_id, inputs = sess.run([wpred, img_id, test_batch], { is_training: False, keep_prob: 1 })
            string_list = get_predictions(weather_scores, batched=True)
            duration = time.time() - start_time
            #import IPython; IPython.embed()
            for n, label_str in enumerate(string_list):
                #print(prefix+str(image_id[n])+','+label_str)
                if n + sample_cnt >= corpus_size:
                    add_cnt = n 
                    break
                t_ind = int(image_id[n].split('_')[-1])
                #
                if weather_scores[n,1]> 0.5:
                    fname = prefix+str(image_id[n])#+','+label_str+'\n'
                    
                    if not persistent:
                        detections.append([t_ind, weather_scores[n][1]])
                        _save_pos(inputs[n], OUTDIR+fname, tstart=t_ind*0.0003495253)
                        #np.save(OUTDIR+fname, inputs[n])
                        print(t_ind*0.0003495253333333333, weather_scores[n][1])
                        outfile.write(','.join([str(t_ind*0.0003495253), str(weather_scores[n][1]), fname, '\n']))
                        #import IPython; IPython.embed()
                    persistent = True
                else:
                    persistent = False
                #outfile.write(prefix+str(image_id[n])+','+label_str+'\n')
            sample_cnt += add_cnt


            if sample_cnt % 20 == 0:
                perc = (FLAGS.batch_size/float(corpus_size))/(duration/(30.*60))
                qsize = sess.run(reader.queue.size())
                print("{}/{}, {} sec/batch, {} real time, queue size: {}".format(sample_cnt, corpus_size, duration, perc, qsize))
    except(ValueError):
        # Introduce a line break after ^C is displayed so save message
        # is on its own line.
        print()
    finally:
        print('Finished, output see {}'.format(fname))
        coord.request_stop()
        coord.join(threads)
        print('saving', OUTDIR+prefix)
        np.save(OUTDIR+prefix, np.asarray(detections))
        outfile.close()
Example #59
0
x_tf = tf.constant(x)
y_tf = tf.constant(y)
t_tf = x_tf / y_tf
m_tf = tf.reduce_mean(t_tf)
v_tf = tf.reduce_mean((t_tf - m_tf)**2)
final = (t_tf - m_tf) / tf.sqrt(v_tf)

# In[36]:

print(sess.run(final))

# So now we want to go nan/inf hunting again in TensorFlow.

# In[41]:

print(sess.run(tf.reduce_any(tf.logical_or(tf.is_inf(t_tf), tf.is_nan(t_tf)))))
print(sess.run(tf.logical_or(tf.is_inf(t_tf), tf.is_nan(t_tf))))
# Or using the shorthand for tf.logical_or
# print(sess.run(tf.is_inf(t_tf) | tf.is_nan(t_tf)))

# I can still print known elements of Tensors, but conditionals will be challenging mid-way through the computation graph.

# In[42]:

print(sess.run(t_tf[1]))

# What we did in NumPy is not strictly possible in TensorFlow (this will throw a lot of errors). However, we can still use things like `tf.cond` and `tf.where` along with any of the `tf.reduce_*` operations.

# In[57]:

# sess.run(t_tf[tf.where(tf.is_inf(t_tf) | tf.is_nan(t_tf))])
 def call(self, inputs):
     outputs = tf.where(tf.is_nan(inputs), K.zeros_like(inputs), inputs)
     return outputs