Beispiel #1
0
 def _forward(state_log_prob, obs_log_prob):
   state_log_prob = array_ops.expand_dims(state_log_prob, axis=1)  # Broadcast.
   state_log_prob += state_trans_log_probs
   state_log_prob = math_ops.reduce_logsumexp(state_log_prob, axis=-1)
   state_log_prob += obs_log_prob
   log_prob_sum = math_ops.reduce_logsumexp(
       state_log_prob, axis=-1, keepdims=True)
   state_log_prob -= log_prob_sum
   return state_log_prob
Beispiel #2
0
def _compute_energy_change(current_target_log_prob,
                           current_momentums,
                           proposed_target_log_prob,
                           proposed_momentums,
                           independent_chain_ndims,
                           name=None):
  """Helper to `kernel` which computes the energy change."""
  with ops.name_scope(
      name, "compute_energy_change",
      ([current_target_log_prob, proposed_target_log_prob,
        independent_chain_ndims] +
       current_momentums + proposed_momentums)):
    # Abbreviate lk0=log_kinetic_energy and lk1=proposed_log_kinetic_energy
    # since they're a mouthful and lets us inline more.
    lk0, lk1 = [], []
    for current_momentum, proposed_momentum in zip(current_momentums,
                                                   proposed_momentums):
      axis = math_ops.range(independent_chain_ndims,
                            array_ops.rank(current_momentum))
      lk0.append(_log_sum_sq(current_momentum, axis))
      lk1.append(_log_sum_sq(proposed_momentum, axis))

    lk0 = -np.log(2.) + math_ops.reduce_logsumexp(array_ops.stack(lk0, axis=-1),
                                                  axis=-1)
    lk1 = -np.log(2.) + math_ops.reduce_logsumexp(array_ops.stack(lk1, axis=-1),
                                                  axis=-1)
    lp0 = -current_target_log_prob   # log_potential
    lp1 = -proposed_target_log_prob  # proposed_log_potential
    x = array_ops.stack([lp1, math_ops.exp(lk1), -lp0, -math_ops.exp(lk0)],
                        axis=-1)

    # The sum is NaN if any element is NaN or we see both +Inf and -Inf.
    # Thus we will replace such rows with infinite energy change which implies
    # rejection. Recall that float-comparisons with NaN are always False.
    is_sum_determinate = (
        math_ops.reduce_all(math_ops.is_finite(x) | (x >= 0.), axis=-1) &
        math_ops.reduce_all(math_ops.is_finite(x) | (x <= 0.), axis=-1))
    is_sum_determinate = array_ops.tile(
        is_sum_determinate[..., array_ops.newaxis],
        multiples=array_ops.concat([
            array_ops.ones(array_ops.rank(is_sum_determinate),
                           dtype=dtypes.int32),
            [4],
        ], axis=0))
    x = array_ops.where(is_sum_determinate,
                        x,
                        array_ops.fill(array_ops.shape(x),
                                       value=x.dtype.as_numpy_dtype(np.inf)))

    return math_ops.reduce_sum(x, axis=-1)
Beispiel #3
0
def _state_to_olabel(labels, num_labels, states):
  """Sum state log probs to ilabel log probs."""

  num_label_states = _get_dim(labels, 1) + 1
  label_states = states[:, :, 1:num_label_states]
  blank_states = states[:, :, num_label_states:]
  one_hot = array_ops.one_hot(
      labels - 1, depth=(num_labels - 1),
      on_value=0.0, off_value=math_ops.log(0.0))
  one_hot = array_ops.expand_dims(one_hot, axis=0)
  label_states = array_ops.expand_dims(label_states, axis=3)
  label_olabels = math_ops.reduce_logsumexp(label_states + one_hot, axis=2)
  blank_olabels = math_ops.reduce_logsumexp(
      blank_states, axis=2, keepdims=True)
  return array_ops.concat([blank_olabels, label_olabels], axis=-1)
Beispiel #4
0
  def __call__(self, inputs, state, scope=None):
    """Build the CrfForwardRnnCell.

    Args:
      inputs: A [batch_size, num_tags] matrix of unary potentials.
      state: A [batch_size, num_tags] matrix containing the previous alpha
          values.
      scope: Unused variable scope of this cell.

    Returns:
      new_alphas, new_alphas: A pair of [batch_size, num_tags] matrices
          values containing the new alpha values.
    """
    state = array_ops.expand_dims(state, 2)

    # This addition op broadcasts self._transitions_params along the zeroth
    # dimension and state along the second dimension. This performs the
    # multiplication of previous alpha values and the current binary potentials
    # in log space.
    transition_scores = state + self._transition_params
    new_alphas = inputs + math_ops.reduce_logsumexp(transition_scores, [1])

    # Both the state and the output of this RNN cell contain the alphas values.
    # The output value is currently unused and simply satisfies the RNN API.
    # This could be useful in the future if we need to compute marginal
    # probabilities, which would require the accumulated alpha values at every
    # time step.
    return new_alphas, new_alphas
Beispiel #5
0
def crf_log_norm(inputs, sequence_lengths, transition_params):
  """Computes the normalization for a CRF.

  Args:
    inputs: A [batch_size, max_seq_len, num_tags] tensor of unary potentials
        to use as input to the CRF layer.
    sequence_lengths: A [batch_size] vector of true sequence lengths.
    transition_params: A [num_tags, num_tags] transition matrix.
  Returns:
    log_norm: A [batch_size] vector of normalizers for a CRF.
  """
  # Split up the first and rest of the inputs in preparation for the forward
  # algorithm.
  first_input = array_ops.slice(inputs, [0, 0, 0], [-1, 1, -1])
  first_input = array_ops.squeeze(first_input, [1])
  rest_of_input = array_ops.slice(inputs, [0, 1, 0], [-1, -1, -1])

  # Compute the alpha values in the forward algorithm in order to get the
  # partition function.
  forward_cell = CrfForwardRnnCell(transition_params)
  _, alphas = rnn.dynamic_rnn(
      cell=forward_cell,
      inputs=rest_of_input,
      sequence_length=sequence_lengths - 1,
      initial_state=first_input,
      dtype=dtypes.float32)
  log_norm = math_ops.reduce_logsumexp(alphas, [1])
  return log_norm
Beispiel #6
0
def _sum_states(idx, states):
  """Take logsumexp for each unique state out of all label states.

  Args:
    idx: tensor of shape [batch, label_length] For each sequence, indices into a
      set of unique labels as computed by calling unique.
    states: tensor of shape [frames, batch, label_length] Log probabilities for
      each label state.

  Returns:
    tensor of shape [frames, batch_size, label_length], log probabilites summed
      for each unique label of the sequence.
  """

  with ops.name_scope("sum_states"):
    idx = ops.convert_to_tensor(idx, name="idx")
    num_states = _get_dim(states, 2)
    states = array_ops.expand_dims(states, axis=2)
    one_hot = array_ops.one_hot(
        idx,
        depth=num_states,
        on_value=0.0,
        off_value=math_ops.log(0.0),
        axis=1)
    return math_ops.reduce_logsumexp(states + one_hot, axis=-1)
 def testReduceLogSumExp(self):
   for dtype in [np.float16, np.float32, np.double]:
     x_np = np.random.rand(5, 5).astype(dtype)
     with self.test_session(use_gpu=True):
       y_tf_np = math_ops.reduce_logsumexp(x_np).eval()
       y_np = log(np.sum(exp(x_np)))
       self.assertAllClose(y_tf_np, y_np)
Beispiel #8
0
 def _single_seq_fn():
   log_norm = math_ops.reduce_logsumexp(first_input, [1])
   # Mask `log_norm` of the sequences with length <= zero.
   log_norm = array_ops.where(math_ops.less_equal(sequence_lengths, 0),
                              array_ops.zeros_like(log_norm),
                              log_norm)
   return log_norm
 def _log_cdf(self, x):
   x = self._pad_sample_dims(x)
   log_cdf_x = self.components_distribution.log_cdf(x)      # [S, B, k]
   log_mix_prob = nn_ops.log_softmax(
       self.mixture_distribution.logits, axis=-1)           # [B, k]
   return math_ops.reduce_logsumexp(
       log_cdf_x + log_mix_prob, axis=-1)                   # [S, B]
Beispiel #10
0
def _state_to_olabel_unique(labels, num_labels, states, unique):
  """Sum state log probs to ilabel log probs using unique label indices."""

  num_label_states = _get_dim(labels, 1) + 1
  label_states = states[:, :, 1:num_label_states]
  blank_states = states[:, :, num_label_states:]

  unique_y, unique_idx = unique
  mul_reduce = _sum_states(unique_idx, label_states)

  num_frames = states.shape[0]
  batch_size = states.shape[1]
  num_states = num_label_states - 1
  batch_state_major = array_ops.transpose(mul_reduce, perm=[1, 2, 0])
  batch_state_major = array_ops.reshape(
      batch_state_major, [batch_size * num_states, num_frames])
  batch_offset = math_ops.range(batch_size, dtype=unique_y.dtype) * num_labels
  indices = unique_y + array_ops.expand_dims(batch_offset, axis=-1)
  indices = array_ops.reshape(indices, [-1, 1])
  scatter = array_ops.scatter_nd(
      indices=indices,
      updates=batch_state_major,
      shape=[batch_size * num_labels, num_frames])
  scatter = array_ops.reshape(scatter, [batch_size, num_labels, num_frames])
  scatter = array_ops.where(
      math_ops.equal(scatter, 0.0),
      array_ops.fill(array_ops.shape(scatter), math_ops.log(0.0)),
      scatter)
  label_olabels = array_ops.transpose(scatter, [2, 0, 1])
  label_olabels = label_olabels[:, :, 1:]

  blank_olabels = math_ops.reduce_logsumexp(
      blank_states, axis=2, keepdims=True)

  return array_ops.concat([blank_olabels, label_olabels], axis=-1)
Beispiel #11
0
 def _define_score_samples(self):
   """Defines the likelihood of each data sample."""
   op = []
   for shard_id, prior_probs in enumerate(self._prior_probs):
     op.append(prior_probs + math_ops.log(self._w[shard_id]))
   self._scores = array_ops.squeeze(
       math_ops.reduce_logsumexp(op, axis=2, keepdims=True), axis=0)
Beispiel #12
0
  def testCrfLogLikelihood(self):
    inputs = np.array(
        [[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=np.float32)
    transition_params = np.array(
        [[-3, 5, -2], [3, 4, 1], [1, 2, 1]], dtype=np.float32)
    sequence_lengths = np.array(3, dtype=np.int32)
    num_words = inputs.shape[0]
    num_tags = inputs.shape[1]
    with self.test_session() as sess:
      all_sequence_log_likelihoods = []

      # Make sure all probabilities sum to 1.
      for tag_indices in itertools.product(
          range(num_tags), repeat=sequence_lengths):
        tag_indices = list(tag_indices)
        tag_indices.extend([0] * (num_words - sequence_lengths))
        sequence_log_likelihood, _ = crf.crf_log_likelihood(
            inputs=array_ops.expand_dims(inputs, 0),
            tag_indices=array_ops.expand_dims(tag_indices, 0),
            sequence_lengths=array_ops.expand_dims(sequence_lengths, 0),
            transition_params=constant_op.constant(transition_params))
        all_sequence_log_likelihoods.append(sequence_log_likelihood)
      total_log_likelihood = math_ops.reduce_logsumexp(
          all_sequence_log_likelihoods)
      tf_total_log_likelihood = sess.run(total_log_likelihood)
      self.assertAllClose(tf_total_log_likelihood, 0.0)
Beispiel #13
0
 def _log_variance(self):
   # Following calculation is based on law of total variance:
   #
   # Var[Z] = E[Var[Z | V]] + Var[E[Z | V]]
   #
   # where,
   #
   # Z|v ~ interpolate_affine[v](distribution)
   # V ~ mixture_distribution
   #
   # thus,
   #
   # E[Var[Z | V]] = sum{ prob[d] Var[d] : d=0, ..., deg-1 }
   # Var[E[Z | V]] = sum{ prob[d] (Mean[d] - Mean)**2 : d=0, ..., deg-1 }
   v = array_ops.stack([
       # log(self.distribution.variance()) = log(Var[d]) = log(rate[d])
       self._log_rate,
       # log((Mean[d] - Mean)**2)
       2. * math_ops.log(
           math_ops.abs(self.distribution.mean()
                        - self._mean()[..., array_ops.newaxis])),
   ], axis=-1)
   return math_ops.reduce_logsumexp(
       self.mixture_distribution.logits[..., array_ops.newaxis] + v,
       axis=[-2, -1])
Beispiel #14
0
  def testCrfLogNorm(self):
    inputs = np.array(
        [[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=np.float32)
    transition_params = np.array(
        [[-3, 5, -2], [3, 4, 1], [1, 2, 1]], dtype=np.float32)
    num_words = inputs.shape[0]
    num_tags = inputs.shape[1]
    sequence_lengths = np.array(3, dtype=np.int32)
    with self.test_session() as sess:
      all_sequence_scores = []

      # Compare the dynamic program with brute force computation.
      for tag_indices in itertools.product(
          range(num_tags), repeat=sequence_lengths):
        tag_indices = list(tag_indices)
        tag_indices.extend([0] * (num_words - sequence_lengths))
        all_sequence_scores.append(
            crf.crf_sequence_score(
                inputs=array_ops.expand_dims(inputs, 0),
                tag_indices=array_ops.expand_dims(tag_indices, 0),
                sequence_lengths=array_ops.expand_dims(sequence_lengths, 0),
                transition_params=constant_op.constant(transition_params)))

      brute_force_log_norm = math_ops.reduce_logsumexp(all_sequence_scores)
      log_norm = crf.crf_log_norm(
          inputs=array_ops.expand_dims(inputs, 0),
          sequence_lengths=array_ops.expand_dims(sequence_lengths, 0),
          transition_params=constant_op.constant(transition_params))
      log_norm = array_ops.squeeze(log_norm, [0])
      tf_brute_force_log_norm, tf_log_norm = sess.run(
          [brute_force_log_norm, log_norm])

      self.assertAllClose(tf_log_norm, tf_brute_force_log_norm)
 def _assert_valid_sample(self, x):
   if not self.validate_args: return x
   return control_flow_ops.with_dependencies([
       check_ops.assert_non_positive(x),
       distribution_util.assert_close(
           array_ops.zeros((), dtype=self.dtype),
           math_ops.reduce_logsumexp(x, reduction_indices=[-1])),
   ], x)
 def testKeepDims(self):
   for dtype in [np.float16, np.float32, np.double]:
     x_np = np.random.rand(5, 5).astype(dtype)
     with self.test_session(use_gpu=True):
       y_tf_np = math_ops.reduce_logsumexp(x_np, keepdims=True).eval()
       self.assertEqual(y_tf_np.ndim, x_np.ndim)
       y_np = log(np.sum(exp(x_np), keepdims=True))
       self.assertAllClose(y_tf_np, y_np)
 def _log_prob(self, x):
   with ops.control_dependencies(self._runtime_assertions):
     x = self._pad_sample_dims(x)
     log_prob_x = self.components_distribution.log_prob(x)  # [S, B, k]
     log_mix_prob = nn_ops.log_softmax(
         self.mixture_distribution.logits, axis=-1)         # [B, k]
     return math_ops.reduce_logsumexp(
         log_prob_x + log_mix_prob, axis=-1)                # [S, B]
Beispiel #18
0
 def testReductionIndices2(self):
     for dtype in [np.float16, np.float32, np.double]:
         x_np = np.random.rand(5, 5).astype(dtype)
         with self.test_session(use_gpu=True):
             y_tf = math_ops.reduce_logsumexp(x_np, reduction_indices=0)
             y_np = log(np.sum(exp(x_np), axis=0))
             self.assertShapeEqual(y_np, y_tf)
             y_tf_np = y_tf.eval()
             self.assertAllClose(y_tf_np, y_np)
Beispiel #19
0
  def _backward(accs, elems):
    """Calculate log probs and cumulative sum masked for sequence length."""
    state_log_prob, cum_log_sum = accs
    obs_log_prob, mask = elems
    state_log_prob += obs_log_prob
    state_log_prob = array_ops.expand_dims(state_log_prob, axis=1)  # Broadcast.
    state_log_prob += bwd_state_trans_log_probs
    state_log_prob = math_ops.reduce_logsumexp(state_log_prob, axis=-1)

    log_prob_sum = math_ops.reduce_logsumexp(
        state_log_prob, axis=-1, keepdims=True)
    state_log_prob -= log_prob_sum

    cum_log_sum += array_ops.squeeze(log_prob_sum) * mask
    batched_mask = array_ops.expand_dims(mask, axis=1)
    out = state_log_prob * batched_mask
    out += final_state_log_probs * (1.0 - batched_mask)
    return out, cum_log_sum
Beispiel #20
0
 def _log_prob(self, x):
     with ops.control_dependencies(self._assertions):
         x = ops.convert_to_tensor(x, name="x")
         distribution_log_probs = [d.log_prob(x) for d in self.components]
         cat_log_probs = self._cat_probs(log_probs=True)
         final_log_probs = [cat_lp + d_lp for (cat_lp, d_lp) in zip(cat_log_probs, distribution_log_probs)]
         concat_log_probs = array_ops.stack(final_log_probs, 0)
         log_sum_exp = math_ops.reduce_logsumexp(concat_log_probs, [0])
         return log_sum_exp
Beispiel #21
0
 def testReductionIndices(self):
   for dtype in [np.float16, np.float32, np.double]:
     x_np = np.random.rand(5, 5).astype(dtype)
     with test_util.use_gpu():
       y_tf = math_ops.reduce_logsumexp(x_np, axis=[0])
       y_np = np.log(np.sum(np.exp(x_np), axis=0))
       self.assertShapeEqual(y_np, y_tf)
       y_tf_np = self.evaluate(y_tf)
       self.assertAllClose(y_tf_np, y_np)
 def _assert_valid_sample(self, x):
     if not self.validate_args:
         return x
     return control_flow_ops.with_dependencies([
         check_ops.assert_non_positive(x),
         distribution_util.assert_close(
             array_ops.zeros([], dtype=self.dtype),
             math_ops.reduce_logsumexp(x, axis=[-1])),
     ], x)
 def testReductionIndices2(self):
   for dtype in [np.float16, np.float32, np.double]:
     x_np = np.random.rand(5, 5).astype(dtype)
     with self.test_session(use_gpu=True):
       y_tf = math_ops.reduce_logsumexp(x_np, reduction_indices=0)
       y_np = log(np.sum(exp(x_np), axis=0))
       self.assertShapeEqual(y_np, y_tf)
       y_tf_np = y_tf.eval()
       self.assertAllClose(y_tf_np, y_np)
Beispiel #24
0
 def testKeepDims(self):
     for dtype in [np.float16, np.float32, np.double]:
         x_np = np.random.rand(5, 5).astype(dtype)
         with self.test_session(use_gpu=True):
             y_tf_np = math_ops.reduce_logsumexp(x_np,
                                                 keep_dims=True).eval()
             self.assertEqual(y_tf_np.ndim, x_np.ndim)
             y_np = log(np.sum(exp(x_np), keepdims=True))
             self.assertAllClose(y_tf_np, y_np)
 def _assert_valid_sample(self, x):
   if not self.validate_args:
     return x
   return control_flow_ops.with_dependencies([
       check_ops.assert_non_positive(x),
       check_ops.assert_near(
           array_ops.zeros([], dtype=self.dtype),
           math_ops.reduce_logsumexp(x, axis=[-1])),
   ], x)
Beispiel #26
0
  def _backward(accs, elems):
    """Calculate log probs and cumulative sum masked for sequence length."""
    state_log_prob, cum_log_sum = accs
    obs_log_prob, mask = elems
    state_log_prob += obs_log_prob
    state_log_prob = array_ops.expand_dims(state_log_prob, axis=1)  # Broadcast.
    state_log_prob += bwd_state_trans_log_probs
    state_log_prob = math_ops.reduce_logsumexp(state_log_prob, axis=-1)

    log_prob_sum = math_ops.reduce_logsumexp(
        state_log_prob, axis=-1, keepdims=True)
    state_log_prob -= log_prob_sum

    cum_log_sum += array_ops.squeeze(log_prob_sum) * mask
    batched_mask = array_ops.expand_dims(mask, axis=1)
    out = state_log_prob * batched_mask
    out += final_state_log_probs * (1.0 - batched_mask)
    return out, cum_log_sum
Beispiel #27
0
def get_backwards_probabilities(inputs, sequence_lengths, transitions):
    '''
    CRF backwards probabilities and log normalizer
    
    inputs: bs x L x V unaries
    sequence_length: bs
    transitions: An object implementing CRF transitions
    
    returns: bs x L and bs 
  '''

    batch_size = array_ops.shape(inputs)[0]

    # Split up the first and rest of the inputs in preparation for the forward
    # algorithm.
    first_input = inputs[:, 0, :]

    num_tags = transitions.num_tags

    pairwise = transitions.pack_to_parameter_sequence()
    rest_of_pairwise = pairwise[:, 1:, :]

    rest_of_input = array_ops.slice(inputs, [0, 1, 0], [-1, -1, -1])

    sequence_lengths_minus_one = math_ops.maximum(
        array_ops.constant(0, dtype=sequence_lengths.dtype),
        sequence_lengths - 1)

    # Compute the alpha values in the forward algorithm in order to get the
    # partition function.
    forward_cell = CrfBackwardsRnnCell(transitions)
    # Sequence length is not allowed to be less than zero.
    #

    concatenated_rest_of_input = array_ops.concat(
        [rest_of_input, rest_of_pairwise], axis=2)
    reversed_concatenated_rest_of_input = reverse_and_repad(
        concatenated_rest_of_input, sequence_lengths_minus_one, 0)

    initial_state = array_ops.zeros([batch_size, num_tags],
                                    dtype=dtypes.float32)

    all_betas, betas = rnn.dynamic_rnn(
        cell=forward_cell,
        inputs=reversed_concatenated_rest_of_input,
        sequence_length=sequence_lengths_minus_one,
        initial_state=initial_state,
        dtype=dtypes.float32)
    log_norm = math_ops.reduce_logsumexp(first_input + betas, [1])
    # Mask `log_norm` of the sequences with length <= zero.
    log_norm = array_ops.where(math_ops.less_equal(sequence_lengths, 0),
                               array_ops.zeros_like(log_norm), log_norm)

    all_betas = reverse_and_repad(all_betas, sequence_lengths_minus_one, 0)

    return all_betas, log_norm
    def __sample_w3(self, n, seed=0):
        shape = array_ops.concat(([n], self.batch_shape_tensor()[:-1], [1]), 0)
        u = random_ops.random_uniform(shape, dtype=self.dtype, seed=seed)
        u = tf.clip_by_value(u, 1e-16, 1 - 1e-16)

        self.__w = 1 + math_ops.reduce_logsumexp([math_ops.log(u),
                                                  math_ops.log(1 - u) -
                                                  2 * self.scale],
                                                 axis=0) / self.scale
        return self.__w
Beispiel #29
0
  def _define_prior_log_prob_operation(self, shard_id):
    """Computes the prior probability of all samples.

    Updates a vector where each item is the prior probability of an
    input example.

    Args:
      shard_id: id of current shard_id.
    """
    self._prior_probs[shard_id] = math_ops.reduce_logsumexp(
        self._probs[shard_id], axis=1, keepdims=True)
Beispiel #30
0
    def _define_prior_log_prob_operation(self, shard_id):
        """Computes the prior probability of all samples.

    Updates a vector where each item is the prior probability of an
    input example.

    Args:
      shard_id: id of current shard_id.
    """
        self._prior_probs[shard_id] = math_ops.reduce_logsumexp(
            self._probs[shard_id], axis=1, keepdims=True)
Beispiel #31
0
 def _crf_log_norm(self, inputs, seq_lens):
     first_input = array_ops.slice(inputs, [0, 0, 0], [-1, 1, -1])
     first_input = array_ops.squeeze(first_input, [1])
     rest_of_input = array_ops.slice(inputs, [0, 1, 0], [-1, -1, -1])
     forward_cell = CrfForwardRnnCell(self.transition_params)
     seq_lens_less_one = math_ops.maximum(constant_op.constant(0, dtype=seq_lens.dtype), seq_lens - 1)
     _, alphas = rnn.dynamic_rnn(cell=forward_cell, inputs=rest_of_input, sequence_length=seq_lens_less_one,
                                 initial_state=first_input, dtype=dtypes.float32)
     log_norm = math_ops.reduce_logsumexp(alphas, [1])
     log_norm = array_ops.where(math_ops.less_equal(seq_lens, 0), array_ops.zeros_like(log_norm), log_norm)
     return log_norm
Beispiel #32
0
 def _forward_log_det_jacobian(self, x):
     # This code is similar to nn_ops.log_softmax but different because we have
     # an implicit zero column to handle. I.e., instead of:
     #   reduce_sum(logits - reduce_sum(exp(logits), dim))
     # we must do:
     #   log_normalization = 1 + reduce_sum(exp(logits))
     #   -log_normalization + reduce_sum(logits - log_normalization)
     log_normalization = nn_ops.softplus(
         math_ops.reduce_logsumexp(x, axis=-1, keep_dims=True))
     return array_ops.squeeze((-log_normalization + math_ops.reduce_sum(
         x - log_normalization, axis=-1, keepdims=True)),
                              axis=-1)
Beispiel #33
0
 def _log_cdf(self, x):
   with ops.control_dependencies(self._assertions):
     x = ops.convert_to_tensor(x, name="x")
     distribution_log_cdfs = [d.log_cdf(x) for d in self.components]
     cat_log_probs = self._cat_probs(log_probs=True)
     final_log_cdfs = [
         cat_lp + d_lcdf
         for (cat_lp, d_lcdf) in zip(cat_log_probs, distribution_log_cdfs)
     ]
     concatted_log_cdfs = array_ops.stack(final_log_cdfs, axis=0)
     mixture_log_cdf = math_ops.reduce_logsumexp(concatted_log_cdfs, [0])
     return mixture_log_cdf
Beispiel #34
0
 def _benchmark_tf_reduce_logsumexp(self,
                                    device=CPU,
                                    execution_mode=None,
                                    defunc=False):
     with context.device(device):
         x = constant_op.constant([[1, 0.], [0., 0.]])
         if defunc:
             reduce_func = def_function.function(math_ops.reduce_logsumexp)
             func = lambda: reduce_func(x)
         else:
             func = lambda: math_ops.reduce_logsumexp(x)
         self._run(func, 3000, execution_mode=execution_mode)
Beispiel #35
0
 def _log_cdf(self, x):
   with ops.control_dependencies(self._assertions):
     x = ops.convert_to_tensor(x, name="x")
     distribution_log_cdfs = [d.log_cdf(x) for d in self.components]
     cat_log_probs = self._cat_probs(log_probs=True)
     final_log_cdfs = [
         cat_lp + d_lcdf
         for (cat_lp, d_lcdf) in zip(cat_log_probs, distribution_log_cdfs)
     ]
     concatted_log_cdfs = array_ops.stack(final_log_cdfs, axis=0)
     mixture_log_cdf = math_ops.reduce_logsumexp(concatted_log_cdfs, [0])
     return mixture_log_cdf
 def _forward_log_det_jacobian(self, x):
   # This code is similar to nn_ops.log_softmax but different because we have
   # an implicit zero column to handle. I.e., instead of:
   #   reduce_sum(logits - reduce_sum(exp(logits), dim))
   # we must do:
   #   log_normalization = 1 + reduce_sum(exp(logits))
   #   -log_normalization + reduce_sum(logits - log_normalization)
   log_normalization = nn_ops.softplus(
       math_ops.reduce_logsumexp(x, axis=-1, keep_dims=True))
   return array_ops.squeeze(
       (-log_normalization + math_ops.reduce_sum(
           x - log_normalization, axis=-1, keepdims=True)), axis=-1)
Beispiel #37
0
 def _log_prob(self, x):
   with ops.control_dependencies(self._assertions):
     x = ops.convert_to_tensor(x, name="x")
     distribution_log_probs = [d.log_prob(x) for d in self.components]
     cat_log_probs = self._cat_probs(log_probs=True)
     final_log_probs = [
         cat_lp + d_lp
         for (cat_lp, d_lp) in zip(cat_log_probs, distribution_log_probs)
     ]
     concat_log_probs = array_ops.stack(final_log_probs, 0)
     log_sum_exp = math_ops.reduce_logsumexp(concat_log_probs, [0])
     return log_sum_exp
  def _log_prob(self, y):
    # For caching to work, it is imperative that the bijector is the first to
    # modify the input.
    x = self.bijector.inverse(y)
    ildj = self.bijector.inverse_log_det_jacobian(y)
    if self.bijector._is_injective:  # pylint: disable=protected-access
      return self._finish_log_prob_for_one_fiber(y, x, ildj)

    lp_on_fibers = [
        self._finish_log_prob_for_one_fiber(y, x_i, ildj_i)
        for x_i, ildj_i in zip(x, ildj)]
    return math_ops.reduce_logsumexp(array_ops.stack(lp_on_fibers), axis=0)
  def _log_prob(self, y):
    # For caching to work, it is imperative that the bijector is the first to
    # modify the input.
    x = self.bijector.inverse(y)
    ildj = self.bijector.inverse_log_det_jacobian(y)
    if self.bijector._is_injective:  # pylint: disable=protected-access
      return self._finish_log_prob_for_one_fiber(y, x, ildj)

    lp_on_fibers = [
        self._finish_log_prob_for_one_fiber(y, x_i, ildj_i)
        for x_i, ildj_i in zip(x, ildj)]
    return math_ops.reduce_logsumexp(array_ops.stack(lp_on_fibers), axis=0)
    def _multi_seq_fn():
        # Split up the first and rest of the inputs in preparation for the forward
        # algorithm.
        batch_size = array_ops.shape(inputs)[0]
        num_tags = array_ops.shape(inputs)[2]

        first_input = array_ops.slice(inputs, [0, 0, 0], [-1, 1, -1])
        first_input = array_ops.squeeze(first_input, [1])
        rest_of_input = array_ops.slice(inputs, [0, 1, 0], [-1, -1, -1])

        # Compute the alpha values in the forward algorithm
        forward_cell = CrfForwardRnnCell(transition_params)
        alphas_seq, alphas = rnn.dynamic_rnn(cell=forward_cell,
                                             inputs=rest_of_input,
                                             sequence_length=sequence_lengths -
                                             1,
                                             initial_state=first_input,
                                             dtype=dtypes.float32)
        # Get all alphas in each time steps
        alphas_seq = tf.concat(
            [tf.expand_dims(first_input, axis=1), alphas_seq], axis=1)

        # Compute the betas values in the backward algorithm
        first_input = tf.constant(
            0.0, shape=[1, 1])  # as we use log, so 0.0 for beta initialization
        first_input = tf.tile(first_input, multiples=[batch_size, num_tags])

        # reverse the sequence of inputs in forward algorithm for backward algorithm
        rest_of_input = gen_array_ops.reverse_sequence(rest_of_input,
                                                       sequence_lengths - 1,
                                                       seq_dim=1)

        # transpose transition parameters for backward algorithm
        backward_cell = CrfBackwardRnnCell(
            tf.transpose(transition_params, perm=[1, 0]))
        betas_seq, betas = rnn.dynamic_rnn(cell=backward_cell,
                                           inputs=rest_of_input,
                                           sequence_length=sequence_lengths -
                                           1,
                                           initial_state=first_input,
                                           dtype=dtypes.float32)

        betas_seq = tf.concat([tf.expand_dims(first_input, axis=1), betas_seq],
                              axis=1)

        # reverse betas that follows same index as alphas
        betas_seq = tf.reverse_sequence(betas_seq, sequence_lengths, seq_dim=1)

        # crf log norm
        log_norm = math_ops.reduce_logsumexp(alphas, [1])

        return alphas_seq, betas_seq, log_norm
 def _log_prob(self, x):
   # By convention, we always put the grid points right-most.
   y = array_ops.stack(
       [aff.inverse(x) for aff in self.interpolated_affine],
       axis=-1)
   log_prob = math_ops.reduce_sum(self.distribution.log_prob(y), axis=-2)
   # Because the affine transformation has a constant Jacobian, it is the case
   # that `affine.fldj(x) = -affine.ildj(x)`. This is not true in general.
   fldj = array_ops.stack(
       [aff.forward_log_det_jacobian(x) for aff in self.interpolated_affine],
       axis=-1)
   return math_ops.reduce_logsumexp(
       self.mixture_distribution.logits - fldj + log_prob, axis=-1)
Beispiel #42
0
 def _log_prob(self, x):
   # By convention, we always put the grid points right-most.
   y = array_ops.stack(
       [aff.inverse(x) for aff in self.interpolated_affine],
       axis=-1)
   log_prob = math_ops.reduce_sum(self.distribution.log_prob(y), axis=-2)
   # Because the affine transformation has a constant Jacobian, it is the case
   # that `affine.fldj(x) = -affine.ildj(x)`. This is not true in general.
   fldj = array_ops.stack(
       [aff.forward_log_det_jacobian(x) for aff in self.interpolated_affine],
       axis=-1)
   return math_ops.reduce_logsumexp(
       self.mixture_distribution.logits - fldj + log_prob, axis=-1)
Beispiel #43
0
    def __sample_w3(self, n, seed, eps=1e-8):
        shape = array_ops.concat(([n], self.batch_shape_tensor()[:-1], [1]), 0)
        u = random_ops.random_uniform(shape,
                                      eps,
                                      1 - eps,
                                      dtype=self.dtype,
                                      seed=seed)
        self.__w = 1 + math_ops.reduce_logsumexp(
            array_ops.stack(
                [math_ops.log(u),
                 math_ops.log(1 - u) - 2 * self.scale], -1), -1) / self.scale

        return self.__w
Beispiel #44
0
    def testCrfLogNorm(self):
        transition_params = np.array([[-3, 5, -2], [3, 4, 1], [1, 2, 1]],
                                     dtype=np.float32)
        # Test both the length-1 and regular cases.
        sequence_lengths_list = [
            np.array(3, dtype=np.int32),
            np.array(1, dtype=np.int32)
        ]
        inputs_list = [
            np.array([[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]],
                     dtype=np.float32),
            np.array([[3, -1, 3]], dtype=np.float32),
        ]
        tag_indices_list = [
            np.array([1, 2, 1, 0], dtype=np.int32),
            np.array([2], dtype=np.int32)
        ]

        for sequence_lengths, inputs, tag_indices in zip(
                sequence_lengths_list, inputs_list, tag_indices_list):
            num_words = inputs.shape[0]
            num_tags = inputs.shape[1]
            with self.test_session() as sess:
                all_sequence_scores = []

                # Compare the dynamic program with brute force computation.
                for tag_indices in itertools.product(range(num_tags),
                                                     repeat=sequence_lengths):
                    tag_indices = list(tag_indices)
                    tag_indices.extend([0] * (num_words - sequence_lengths))
                    all_sequence_scores.append(
                        crf.crf_sequence_score(
                            inputs=array_ops.expand_dims(inputs, 0),
                            tag_indices=array_ops.expand_dims(tag_indices, 0),
                            sequence_lengths=array_ops.expand_dims(
                                sequence_lengths, 0),
                            transition_params=constant_op.constant(
                                transition_params)))

                brute_force_log_norm = math_ops.reduce_logsumexp(
                    all_sequence_scores)
                log_norm = crf.crf_log_norm(
                    inputs=array_ops.expand_dims(inputs, 0),
                    sequence_lengths=array_ops.expand_dims(
                        sequence_lengths, 0),
                    transition_params=constant_op.constant(transition_params))
                log_norm = array_ops.squeeze(log_norm, [0])
                tf_brute_force_log_norm, tf_log_norm = sess.run(
                    [brute_force_log_norm, log_norm])

                self.assertAllClose(tf_log_norm, tf_brute_force_log_norm)
    def _single_seq_fn():
        batch_size = array_ops.shape(inputs)[0]
        num_tags = array_ops.shape(inputs)[2]

        alphas_seq = array_ops.slice(inputs, [0, 0, 0], [-1, 1, -1])

        betas = tf.constant(
            0.0, shape=[1, 1])  # as we use log, so 0.0 for beta initialization
        betas = tf.tile(betas, multiples=[batch_size, num_tags])
        betas_seq = array_ops.expand_dims(betas, 1)
        # crf log norm
        log_norm = math_ops.reduce_logsumexp(alphas_seq, [2])

        return alphas_seq, betas_seq, log_norm
Beispiel #46
0
    def call(self, inputs, mask=None):
        if mask is not None:

            adder = (math_ops.cast(mask, inputs.dtype)) * (
                _large_compatible_negative(inputs.dtype))

            inputs += adder
        if isinstance(self.axis, (tuple, list)):
            if len(self.axis) > 1:
                return math_ops.exp(inputs - math_ops.reduce_logsumexp(
                    inputs, axis=self.axis, keepdims=True))
            else:
                return K.softmax(inputs, axis=self.axis[0])
        return K.softmax(inputs, axis=self.axis)
Beispiel #47
0
    def _multi_seq_fn():
        """Forward computation of alpha values."""
        rest_of_input = array_ops.slice(inputs, [0, 1, 0], [-1, -1, -1])

        # Compute the alpha values in the forward algorithm in order to get the
        # partition function.
        forward_cell = CrfForwardRnnCell(transition_params)
        _, alphas = rnn.dynamic_rnn(cell=forward_cell,
                                    inputs=rest_of_input,
                                    sequence_length=sequence_lengths - 1,
                                    initial_state=first_input,
                                    dtype=dtypes.float32)
        log_norm = math_ops.reduce_logsumexp(alphas, [1])
        return log_norm
Beispiel #48
0
    def testUnderflow(self):
        x = [-1000, -1001, -1002, -1003]
        for dtype in [np.float16, np.float32, np.double]:
            x_np = np.array(x, dtype=dtype)
            max_np = np.max(x_np)
            with self.assertRaisesRegexp(RuntimeWarning, "divide by zero encountered in log"):
                out = log(np.sum(exp(x_np)))
                if out == -np.inf:
                    raise RuntimeWarning("divide by zero encountered in log")

            with self.test_session(use_gpu=True):
                x_tf = constant_op.constant(x_np, shape=x_np.shape)
                y_tf_np = math_ops.reduce_logsumexp(x_tf).eval()
                y_np = log(np.sum(exp(x_np - max_np))) + max_np
                self.assertAllClose(y_tf_np, y_np)
Beispiel #49
0
  def _multi_seq_fn():
    """Forward computation of alpha values."""
    rest_of_input = array_ops.slice(inputs, [0, 1, 0], [-1, -1, -1])

    # Compute the alpha values in the forward algorithm in order to get the
    # partition function.
    forward_cell = CrfForwardRnnCell(transition_params)
    _, alphas = rnn.dynamic_rnn(
        cell=forward_cell,
        inputs=rest_of_input,
        sequence_length=sequence_lengths - 1,
        initial_state=first_input,
        dtype=dtypes.float32)
    log_norm = math_ops.reduce_logsumexp(alphas, [1])
    return log_norm
Beispiel #50
0
  def testOverflow(self):
    x = [1000, 1001, 1002, 1003]
    for dtype in [np.float16, np.float32, np.double]:
      x_np = np.array(x, dtype=dtype)
      max_np = np.max(x_np)
      with self.assertRaisesRegexp(RuntimeWarning,
                                   "overflow encountered in exp"):
        out = log(np.sum(exp(x_np)))
        if out == np.inf:
          raise RuntimeWarning("overflow encountered in exp")

      with self.test_session():
        x_tf = constant_op.constant(x_np, shape=x_np.shape)
        y_tf_np = math_ops.reduce_logsumexp(x_tf).eval()
        y_np = log(np.sum(exp(x_np - max_np))) + max_np
        self.assertAllClose(y_tf_np, y_np)
 def testLogCdf(self):
   with self.cached_session() as sess:
     gm = mixture_same_family_lib.MixtureSameFamily(
         mixture_distribution=categorical_lib.Categorical(probs=[0.3, 0.7]),
         components_distribution=normal_lib.Normal(
             loc=[-1., 1], scale=[0.1, 0.5]))
     x = gm.sample(10, seed=42)
     actual_log_cdf = gm.log_cdf(x)
     expected_log_cdf = math_ops.reduce_logsumexp(
         (gm.mixture_distribution.logits +
          gm.components_distribution.log_cdf(x[..., array_ops.newaxis])),
         axis=1)
     actual_log_cdf_, expected_log_cdf_ = sess.run([
         actual_log_cdf, expected_log_cdf])
     self.assertAllClose(actual_log_cdf_, expected_log_cdf_,
                         rtol=1e-6, atol=0.0)
Beispiel #52
0
 def testLogCdf(self):
   with self.test_session() as sess:
     gm = mixture_same_family_lib.MixtureSameFamily(
         mixture_distribution=categorical_lib.Categorical(probs=[0.3, 0.7]),
         components_distribution=normal_lib.Normal(
             loc=[-1., 1], scale=[0.1, 0.5]))
     x = gm.sample(10, seed=42)
     actual_log_cdf = gm.log_cdf(x)
     expected_log_cdf = math_ops.reduce_logsumexp(
         (gm.mixture_distribution.logits +
          gm.components_distribution.log_cdf(x[..., array_ops.newaxis])),
         axis=1)
     actual_log_cdf_, expected_log_cdf_ = sess.run([
         actual_log_cdf, expected_log_cdf])
     self.assertAllClose(actual_log_cdf_, expected_log_cdf_,
                         rtol=1e-6, atol=0.0)
Beispiel #53
0
    def testOverflow(self):
        x = [1000, 1001, 1002, 1003]
        for dtype in [np.float16, np.float32, np.double]:
            x_np = np.array(x, dtype=dtype)
            max_np = np.max(x_np)
            with self.assertRaisesRegexp(RuntimeWarning,
                                         "overflow encountered in exp"):
                out = np.log(np.sum(np.exp(x_np)))
                if out == np.inf:
                    raise RuntimeWarning("overflow encountered in exp")

            with test_util.use_gpu():
                x_tf = constant_op.constant(x_np, shape=x_np.shape)
                y_tf_np = math_ops.reduce_logsumexp(x_tf)
                y_np = np.log(np.sum(np.exp(x_np - max_np))) + max_np
                self.assertAllClose(y_tf_np, y_np)
    def testUnderflow(self):
        x = [-1000, -1001, -1002, -1003]
        for dtype in [np.float16, np.float32, np.double]:
            x_np = np.array(x, dtype=dtype)
            max_np = np.max(x_np)
            with self.assertRaisesRegexp(RuntimeWarning,
                                         "divide by zero encountered in log"):
                out = log(np.sum(exp(x_np)))
                if out == -np.inf:
                    raise RuntimeWarning("divide by zero encountered in log")

            with self.test_session(use_gpu=True):
                x_tf = constant_op.constant(x_np, shape=x_np.shape)
                y_tf_np = math_ops.reduce_logsumexp(x_tf).eval()
                y_np = log(np.sum(exp(x_np - max_np))) + max_np
                self.assertAllClose(y_tf_np, y_np)
Beispiel #55
0
 def testNoWeights(self):
     logx_ = np.array([[0., -1, 1000.], [0, 1, -1000.], [-5, 0, 5]])
     with self.test_session() as sess:
         logx = constant_op.constant(logx_)
         expected = math_ops.reduce_logsumexp(logx, axis=-1)
         grad_expected = gradients_impl.gradients(expected, logx)[0]
         actual, actual_sgn = du.reduce_weighted_logsumexp(logx,
                                                           axis=-1,
                                                           return_sign=True)
         grad_actual = gradients_impl.gradients(actual, logx)[0]
         [actual_, actual_sgn_, grad_actual_, expected_,
          grad_expected_] = sess.run(
              [actual, actual_sgn, grad_actual, expected, grad_expected])
     self.assertAllEqual(expected_, actual_)
     self.assertAllEqual(grad_expected_, grad_actual_)
     self.assertAllEqual([1., 1, 1], actual_sgn_)
Beispiel #56
0
  def _log_prob(self, y, bijector_kwargs=None, distribution_kwargs=None):
    # For caching to work, it is imperative that the bijector is the first to
    # modify the input.
    bijector_kwargs = bijector_kwargs or {}
    distribution_kwargs = distribution_kwargs or {}
    x = self.bijector.inverse(y, **bijector_kwargs)
    event_ndims = self._maybe_get_static_event_ndims()
    ildj = self.bijector.inverse_log_det_jacobian(
        y, event_ndims=event_ndims, **bijector_kwargs)
    if self.bijector._is_injective:  # pylint: disable=protected-access
      return self._finish_log_prob_for_one_fiber(y, x, ildj,
                                                 distribution_kwargs)

    lp_on_fibers = [
        self._finish_log_prob_for_one_fiber(y, x_i, ildj_i, distribution_kwargs)
        for x_i, ildj_i in zip(x, ildj)]
    return math_ops.reduce_logsumexp(array_ops.stack(lp_on_fibers), axis=0)
Beispiel #57
0
 def _forward_log_det_jacobian(self, x):
     if self._static_event_ndims == 0:
         return x - 2. * nn_ops.softplus(x)
     else:
         # This code is similar to nn_ops.log_softmax but different because we have
         # an implicit zero column to handle. I.e., instead of:
         #   reduce_sum(logits - reduce_sum(exp(logits), dim))
         # we must do:
         #   log_normalization = 1 + reduce_sum(exp(logits))
         #   -log_normalization + reduce_sum(logits - log_normalization)
         log_normalization = nn_ops.softplus(
             math_ops.reduce_logsumexp(x,
                                       reduction_indices=-1,
                                       keep_dims=True))
         fldj = (-log_normalization + math_ops.reduce_sum(
             x - log_normalization, reduction_indices=-1, keep_dims=True))
         return array_ops.squeeze(fldj, squeeze_dims=-1)
  def call(self, inputs, mask=None):
    if mask is not None:
      # Since mask is 1.0 for positions we want to keep and 0.0 for
      # masked positions, this operation will create a tensor which is 0.0 for
      # positions we want to attend and -1e.9 for masked positions.
      adder = (1.0 - math_ops.cast(mask, inputs.dtype)) * (
          _large_compatible_negative(inputs.dtype))

      # Since we are adding it to the raw scores before the softmax, this is
      # effectively the same as removing these entirely.
      inputs += adder
    if isinstance(self.axis, (tuple, list)):
      if len(self.axis) > 1:
        return math_ops.exp(inputs - math_ops.reduce_logsumexp(
            inputs, axis=self.axis, keepdims=True))
      else:
        return K.softmax(inputs, axis=self.axis[0])
    return K.softmax(inputs, axis=self.axis)
Beispiel #59
0
    def _multi_seq_fn():
        """Forward computation of alpha values."""
        rest_of_input = array_ops.slice(inputs, [0, 1, 0], [-1, -1, -1])

        # Compute the alpha values in the forward algorithm in order to get the
        # partition function.
        forward_cell = CrfForwardRnnCell(transition_params)
        # Sequence length is not allowed to be less than zero.
        sequence_lengths_less_one = math_ops.maximum(0, sequence_lengths - 1)
        _, alphas = rnn.dynamic_rnn(cell=forward_cell,
                                    inputs=rest_of_input,
                                    sequence_length=sequence_lengths_less_one,
                                    initial_state=first_input,
                                    dtype=dtypes.float32)
        log_norm = math_ops.reduce_logsumexp(alphas, [1])
        # Mask `log_norm` of the sequences with length <= zero.
        log_norm = array_ops.where(math_ops.less_equal(sequence_lengths, 0),
                                   array_ops.zeros_like(log_norm), log_norm)
        return log_norm
Beispiel #60
0
def get_forwards_probabilities(inputs, sequence_lengths, transitions):
    '''
    CRF forward probabilities and log normalizer
    
    inputs: bs x L x V unaries
    sequence_length: bs
    transitions: An object implementing CRF transitions
    
    returns: bs x L and bs 
  '''

    # Split up the first and rest of the inputs in preparation for the forward
    # algorithm.
    first_input = array_ops.slice(inputs, [0, 0, 0], [-1, 1, -1])
    first_input = array_ops.squeeze(first_input, [1])
    """Forward computation of alpha values."""
    unary = array_ops.slice(inputs, [0, 1, 0], [-1, -1, -1])

    pairwise = transitions.pack_to_parameter_sequence()
    pairwise = pairwise[:, 1:, :]

    rnn_inputs = array_ops.concat([unary, pairwise], axis=2)

    # Compute the alpha values in the forward algorithm in order to get the
    # partition function.
    forward_cell = CrfForwardRnnCell(transitions)
    # Sequence length is not allowed to be less than zero.
    sequence_lengths_less_one = math_ops.maximum(
        constant_op.constant(0, dtype=sequence_lengths.dtype),
        sequence_lengths - 1)
    all_alphas, alphas = rnn.dynamic_rnn(
        cell=forward_cell,
        inputs=rnn_inputs,
        sequence_length=sequence_lengths_less_one,
        initial_state=first_input,
        dtype=dtypes.float32)

    log_norm = math_ops.reduce_logsumexp(alphas, [1])
    # Mask `log_norm` of the sequences with length <= zero.
    log_norm = array_ops.where(math_ops.less_equal(sequence_lengths, 0),
                               array_ops.zeros_like(log_norm), log_norm)

    return all_alphas, log_norm