def _forward(state_log_prob, obs_log_prob): state_log_prob = array_ops.expand_dims(state_log_prob, axis=1) # Broadcast. state_log_prob += state_trans_log_probs state_log_prob = math_ops.reduce_logsumexp(state_log_prob, axis=-1) state_log_prob += obs_log_prob log_prob_sum = math_ops.reduce_logsumexp( state_log_prob, axis=-1, keepdims=True) state_log_prob -= log_prob_sum return state_log_prob
def _compute_energy_change(current_target_log_prob, current_momentums, proposed_target_log_prob, proposed_momentums, independent_chain_ndims, name=None): """Helper to `kernel` which computes the energy change.""" with ops.name_scope( name, "compute_energy_change", ([current_target_log_prob, proposed_target_log_prob, independent_chain_ndims] + current_momentums + proposed_momentums)): # Abbreviate lk0=log_kinetic_energy and lk1=proposed_log_kinetic_energy # since they're a mouthful and lets us inline more. lk0, lk1 = [], [] for current_momentum, proposed_momentum in zip(current_momentums, proposed_momentums): axis = math_ops.range(independent_chain_ndims, array_ops.rank(current_momentum)) lk0.append(_log_sum_sq(current_momentum, axis)) lk1.append(_log_sum_sq(proposed_momentum, axis)) lk0 = -np.log(2.) + math_ops.reduce_logsumexp(array_ops.stack(lk0, axis=-1), axis=-1) lk1 = -np.log(2.) + math_ops.reduce_logsumexp(array_ops.stack(lk1, axis=-1), axis=-1) lp0 = -current_target_log_prob # log_potential lp1 = -proposed_target_log_prob # proposed_log_potential x = array_ops.stack([lp1, math_ops.exp(lk1), -lp0, -math_ops.exp(lk0)], axis=-1) # The sum is NaN if any element is NaN or we see both +Inf and -Inf. # Thus we will replace such rows with infinite energy change which implies # rejection. Recall that float-comparisons with NaN are always False. is_sum_determinate = ( math_ops.reduce_all(math_ops.is_finite(x) | (x >= 0.), axis=-1) & math_ops.reduce_all(math_ops.is_finite(x) | (x <= 0.), axis=-1)) is_sum_determinate = array_ops.tile( is_sum_determinate[..., array_ops.newaxis], multiples=array_ops.concat([ array_ops.ones(array_ops.rank(is_sum_determinate), dtype=dtypes.int32), [4], ], axis=0)) x = array_ops.where(is_sum_determinate, x, array_ops.fill(array_ops.shape(x), value=x.dtype.as_numpy_dtype(np.inf))) return math_ops.reduce_sum(x, axis=-1)
def _state_to_olabel(labels, num_labels, states): """Sum state log probs to ilabel log probs.""" num_label_states = _get_dim(labels, 1) + 1 label_states = states[:, :, 1:num_label_states] blank_states = states[:, :, num_label_states:] one_hot = array_ops.one_hot( labels - 1, depth=(num_labels - 1), on_value=0.0, off_value=math_ops.log(0.0)) one_hot = array_ops.expand_dims(one_hot, axis=0) label_states = array_ops.expand_dims(label_states, axis=3) label_olabels = math_ops.reduce_logsumexp(label_states + one_hot, axis=2) blank_olabels = math_ops.reduce_logsumexp( blank_states, axis=2, keepdims=True) return array_ops.concat([blank_olabels, label_olabels], axis=-1)
def __call__(self, inputs, state, scope=None): """Build the CrfForwardRnnCell. Args: inputs: A [batch_size, num_tags] matrix of unary potentials. state: A [batch_size, num_tags] matrix containing the previous alpha values. scope: Unused variable scope of this cell. Returns: new_alphas, new_alphas: A pair of [batch_size, num_tags] matrices values containing the new alpha values. """ state = array_ops.expand_dims(state, 2) # This addition op broadcasts self._transitions_params along the zeroth # dimension and state along the second dimension. This performs the # multiplication of previous alpha values and the current binary potentials # in log space. transition_scores = state + self._transition_params new_alphas = inputs + math_ops.reduce_logsumexp(transition_scores, [1]) # Both the state and the output of this RNN cell contain the alphas values. # The output value is currently unused and simply satisfies the RNN API. # This could be useful in the future if we need to compute marginal # probabilities, which would require the accumulated alpha values at every # time step. return new_alphas, new_alphas
def crf_log_norm(inputs, sequence_lengths, transition_params): """Computes the normalization for a CRF. Args: inputs: A [batch_size, max_seq_len, num_tags] tensor of unary potentials to use as input to the CRF layer. sequence_lengths: A [batch_size] vector of true sequence lengths. transition_params: A [num_tags, num_tags] transition matrix. Returns: log_norm: A [batch_size] vector of normalizers for a CRF. """ # Split up the first and rest of the inputs in preparation for the forward # algorithm. first_input = array_ops.slice(inputs, [0, 0, 0], [-1, 1, -1]) first_input = array_ops.squeeze(first_input, [1]) rest_of_input = array_ops.slice(inputs, [0, 1, 0], [-1, -1, -1]) # Compute the alpha values in the forward algorithm in order to get the # partition function. forward_cell = CrfForwardRnnCell(transition_params) _, alphas = rnn.dynamic_rnn( cell=forward_cell, inputs=rest_of_input, sequence_length=sequence_lengths - 1, initial_state=first_input, dtype=dtypes.float32) log_norm = math_ops.reduce_logsumexp(alphas, [1]) return log_norm
def _sum_states(idx, states): """Take logsumexp for each unique state out of all label states. Args: idx: tensor of shape [batch, label_length] For each sequence, indices into a set of unique labels as computed by calling unique. states: tensor of shape [frames, batch, label_length] Log probabilities for each label state. Returns: tensor of shape [frames, batch_size, label_length], log probabilites summed for each unique label of the sequence. """ with ops.name_scope("sum_states"): idx = ops.convert_to_tensor(idx, name="idx") num_states = _get_dim(states, 2) states = array_ops.expand_dims(states, axis=2) one_hot = array_ops.one_hot( idx, depth=num_states, on_value=0.0, off_value=math_ops.log(0.0), axis=1) return math_ops.reduce_logsumexp(states + one_hot, axis=-1)
def testReduceLogSumExp(self): for dtype in [np.float16, np.float32, np.double]: x_np = np.random.rand(5, 5).astype(dtype) with self.test_session(use_gpu=True): y_tf_np = math_ops.reduce_logsumexp(x_np).eval() y_np = log(np.sum(exp(x_np))) self.assertAllClose(y_tf_np, y_np)
def _single_seq_fn(): log_norm = math_ops.reduce_logsumexp(first_input, [1]) # Mask `log_norm` of the sequences with length <= zero. log_norm = array_ops.where(math_ops.less_equal(sequence_lengths, 0), array_ops.zeros_like(log_norm), log_norm) return log_norm
def _log_cdf(self, x): x = self._pad_sample_dims(x) log_cdf_x = self.components_distribution.log_cdf(x) # [S, B, k] log_mix_prob = nn_ops.log_softmax( self.mixture_distribution.logits, axis=-1) # [B, k] return math_ops.reduce_logsumexp( log_cdf_x + log_mix_prob, axis=-1) # [S, B]
def _state_to_olabel_unique(labels, num_labels, states, unique): """Sum state log probs to ilabel log probs using unique label indices.""" num_label_states = _get_dim(labels, 1) + 1 label_states = states[:, :, 1:num_label_states] blank_states = states[:, :, num_label_states:] unique_y, unique_idx = unique mul_reduce = _sum_states(unique_idx, label_states) num_frames = states.shape[0] batch_size = states.shape[1] num_states = num_label_states - 1 batch_state_major = array_ops.transpose(mul_reduce, perm=[1, 2, 0]) batch_state_major = array_ops.reshape( batch_state_major, [batch_size * num_states, num_frames]) batch_offset = math_ops.range(batch_size, dtype=unique_y.dtype) * num_labels indices = unique_y + array_ops.expand_dims(batch_offset, axis=-1) indices = array_ops.reshape(indices, [-1, 1]) scatter = array_ops.scatter_nd( indices=indices, updates=batch_state_major, shape=[batch_size * num_labels, num_frames]) scatter = array_ops.reshape(scatter, [batch_size, num_labels, num_frames]) scatter = array_ops.where( math_ops.equal(scatter, 0.0), array_ops.fill(array_ops.shape(scatter), math_ops.log(0.0)), scatter) label_olabels = array_ops.transpose(scatter, [2, 0, 1]) label_olabels = label_olabels[:, :, 1:] blank_olabels = math_ops.reduce_logsumexp( blank_states, axis=2, keepdims=True) return array_ops.concat([blank_olabels, label_olabels], axis=-1)
def _define_score_samples(self): """Defines the likelihood of each data sample.""" op = [] for shard_id, prior_probs in enumerate(self._prior_probs): op.append(prior_probs + math_ops.log(self._w[shard_id])) self._scores = array_ops.squeeze( math_ops.reduce_logsumexp(op, axis=2, keepdims=True), axis=0)
def testCrfLogLikelihood(self): inputs = np.array( [[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=np.float32) transition_params = np.array( [[-3, 5, -2], [3, 4, 1], [1, 2, 1]], dtype=np.float32) sequence_lengths = np.array(3, dtype=np.int32) num_words = inputs.shape[0] num_tags = inputs.shape[1] with self.test_session() as sess: all_sequence_log_likelihoods = [] # Make sure all probabilities sum to 1. for tag_indices in itertools.product( range(num_tags), repeat=sequence_lengths): tag_indices = list(tag_indices) tag_indices.extend([0] * (num_words - sequence_lengths)) sequence_log_likelihood, _ = crf.crf_log_likelihood( inputs=array_ops.expand_dims(inputs, 0), tag_indices=array_ops.expand_dims(tag_indices, 0), sequence_lengths=array_ops.expand_dims(sequence_lengths, 0), transition_params=constant_op.constant(transition_params)) all_sequence_log_likelihoods.append(sequence_log_likelihood) total_log_likelihood = math_ops.reduce_logsumexp( all_sequence_log_likelihoods) tf_total_log_likelihood = sess.run(total_log_likelihood) self.assertAllClose(tf_total_log_likelihood, 0.0)
def _log_variance(self): # Following calculation is based on law of total variance: # # Var[Z] = E[Var[Z | V]] + Var[E[Z | V]] # # where, # # Z|v ~ interpolate_affine[v](distribution) # V ~ mixture_distribution # # thus, # # E[Var[Z | V]] = sum{ prob[d] Var[d] : d=0, ..., deg-1 } # Var[E[Z | V]] = sum{ prob[d] (Mean[d] - Mean)**2 : d=0, ..., deg-1 } v = array_ops.stack([ # log(self.distribution.variance()) = log(Var[d]) = log(rate[d]) self._log_rate, # log((Mean[d] - Mean)**2) 2. * math_ops.log( math_ops.abs(self.distribution.mean() - self._mean()[..., array_ops.newaxis])), ], axis=-1) return math_ops.reduce_logsumexp( self.mixture_distribution.logits[..., array_ops.newaxis] + v, axis=[-2, -1])
def testCrfLogNorm(self): inputs = np.array( [[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=np.float32) transition_params = np.array( [[-3, 5, -2], [3, 4, 1], [1, 2, 1]], dtype=np.float32) num_words = inputs.shape[0] num_tags = inputs.shape[1] sequence_lengths = np.array(3, dtype=np.int32) with self.test_session() as sess: all_sequence_scores = [] # Compare the dynamic program with brute force computation. for tag_indices in itertools.product( range(num_tags), repeat=sequence_lengths): tag_indices = list(tag_indices) tag_indices.extend([0] * (num_words - sequence_lengths)) all_sequence_scores.append( crf.crf_sequence_score( inputs=array_ops.expand_dims(inputs, 0), tag_indices=array_ops.expand_dims(tag_indices, 0), sequence_lengths=array_ops.expand_dims(sequence_lengths, 0), transition_params=constant_op.constant(transition_params))) brute_force_log_norm = math_ops.reduce_logsumexp(all_sequence_scores) log_norm = crf.crf_log_norm( inputs=array_ops.expand_dims(inputs, 0), sequence_lengths=array_ops.expand_dims(sequence_lengths, 0), transition_params=constant_op.constant(transition_params)) log_norm = array_ops.squeeze(log_norm, [0]) tf_brute_force_log_norm, tf_log_norm = sess.run( [brute_force_log_norm, log_norm]) self.assertAllClose(tf_log_norm, tf_brute_force_log_norm)
def _assert_valid_sample(self, x): if not self.validate_args: return x return control_flow_ops.with_dependencies([ check_ops.assert_non_positive(x), distribution_util.assert_close( array_ops.zeros((), dtype=self.dtype), math_ops.reduce_logsumexp(x, reduction_indices=[-1])), ], x)
def testKeepDims(self): for dtype in [np.float16, np.float32, np.double]: x_np = np.random.rand(5, 5).astype(dtype) with self.test_session(use_gpu=True): y_tf_np = math_ops.reduce_logsumexp(x_np, keepdims=True).eval() self.assertEqual(y_tf_np.ndim, x_np.ndim) y_np = log(np.sum(exp(x_np), keepdims=True)) self.assertAllClose(y_tf_np, y_np)
def _log_prob(self, x): with ops.control_dependencies(self._runtime_assertions): x = self._pad_sample_dims(x) log_prob_x = self.components_distribution.log_prob(x) # [S, B, k] log_mix_prob = nn_ops.log_softmax( self.mixture_distribution.logits, axis=-1) # [B, k] return math_ops.reduce_logsumexp( log_prob_x + log_mix_prob, axis=-1) # [S, B]
def testReductionIndices2(self): for dtype in [np.float16, np.float32, np.double]: x_np = np.random.rand(5, 5).astype(dtype) with self.test_session(use_gpu=True): y_tf = math_ops.reduce_logsumexp(x_np, reduction_indices=0) y_np = log(np.sum(exp(x_np), axis=0)) self.assertShapeEqual(y_np, y_tf) y_tf_np = y_tf.eval() self.assertAllClose(y_tf_np, y_np)
def _backward(accs, elems): """Calculate log probs and cumulative sum masked for sequence length.""" state_log_prob, cum_log_sum = accs obs_log_prob, mask = elems state_log_prob += obs_log_prob state_log_prob = array_ops.expand_dims(state_log_prob, axis=1) # Broadcast. state_log_prob += bwd_state_trans_log_probs state_log_prob = math_ops.reduce_logsumexp(state_log_prob, axis=-1) log_prob_sum = math_ops.reduce_logsumexp( state_log_prob, axis=-1, keepdims=True) state_log_prob -= log_prob_sum cum_log_sum += array_ops.squeeze(log_prob_sum) * mask batched_mask = array_ops.expand_dims(mask, axis=1) out = state_log_prob * batched_mask out += final_state_log_probs * (1.0 - batched_mask) return out, cum_log_sum
def _log_prob(self, x): with ops.control_dependencies(self._assertions): x = ops.convert_to_tensor(x, name="x") distribution_log_probs = [d.log_prob(x) for d in self.components] cat_log_probs = self._cat_probs(log_probs=True) final_log_probs = [cat_lp + d_lp for (cat_lp, d_lp) in zip(cat_log_probs, distribution_log_probs)] concat_log_probs = array_ops.stack(final_log_probs, 0) log_sum_exp = math_ops.reduce_logsumexp(concat_log_probs, [0]) return log_sum_exp
def testReductionIndices(self): for dtype in [np.float16, np.float32, np.double]: x_np = np.random.rand(5, 5).astype(dtype) with test_util.use_gpu(): y_tf = math_ops.reduce_logsumexp(x_np, axis=[0]) y_np = np.log(np.sum(np.exp(x_np), axis=0)) self.assertShapeEqual(y_np, y_tf) y_tf_np = self.evaluate(y_tf) self.assertAllClose(y_tf_np, y_np)
def _assert_valid_sample(self, x): if not self.validate_args: return x return control_flow_ops.with_dependencies([ check_ops.assert_non_positive(x), distribution_util.assert_close( array_ops.zeros([], dtype=self.dtype), math_ops.reduce_logsumexp(x, axis=[-1])), ], x)
def testKeepDims(self): for dtype in [np.float16, np.float32, np.double]: x_np = np.random.rand(5, 5).astype(dtype) with self.test_session(use_gpu=True): y_tf_np = math_ops.reduce_logsumexp(x_np, keep_dims=True).eval() self.assertEqual(y_tf_np.ndim, x_np.ndim) y_np = log(np.sum(exp(x_np), keepdims=True)) self.assertAllClose(y_tf_np, y_np)
def _assert_valid_sample(self, x): if not self.validate_args: return x return control_flow_ops.with_dependencies([ check_ops.assert_non_positive(x), check_ops.assert_near( array_ops.zeros([], dtype=self.dtype), math_ops.reduce_logsumexp(x, axis=[-1])), ], x)
def get_backwards_probabilities(inputs, sequence_lengths, transitions): ''' CRF backwards probabilities and log normalizer inputs: bs x L x V unaries sequence_length: bs transitions: An object implementing CRF transitions returns: bs x L and bs ''' batch_size = array_ops.shape(inputs)[0] # Split up the first and rest of the inputs in preparation for the forward # algorithm. first_input = inputs[:, 0, :] num_tags = transitions.num_tags pairwise = transitions.pack_to_parameter_sequence() rest_of_pairwise = pairwise[:, 1:, :] rest_of_input = array_ops.slice(inputs, [0, 1, 0], [-1, -1, -1]) sequence_lengths_minus_one = math_ops.maximum( array_ops.constant(0, dtype=sequence_lengths.dtype), sequence_lengths - 1) # Compute the alpha values in the forward algorithm in order to get the # partition function. forward_cell = CrfBackwardsRnnCell(transitions) # Sequence length is not allowed to be less than zero. # concatenated_rest_of_input = array_ops.concat( [rest_of_input, rest_of_pairwise], axis=2) reversed_concatenated_rest_of_input = reverse_and_repad( concatenated_rest_of_input, sequence_lengths_minus_one, 0) initial_state = array_ops.zeros([batch_size, num_tags], dtype=dtypes.float32) all_betas, betas = rnn.dynamic_rnn( cell=forward_cell, inputs=reversed_concatenated_rest_of_input, sequence_length=sequence_lengths_minus_one, initial_state=initial_state, dtype=dtypes.float32) log_norm = math_ops.reduce_logsumexp(first_input + betas, [1]) # Mask `log_norm` of the sequences with length <= zero. log_norm = array_ops.where(math_ops.less_equal(sequence_lengths, 0), array_ops.zeros_like(log_norm), log_norm) all_betas = reverse_and_repad(all_betas, sequence_lengths_minus_one, 0) return all_betas, log_norm
def __sample_w3(self, n, seed=0): shape = array_ops.concat(([n], self.batch_shape_tensor()[:-1], [1]), 0) u = random_ops.random_uniform(shape, dtype=self.dtype, seed=seed) u = tf.clip_by_value(u, 1e-16, 1 - 1e-16) self.__w = 1 + math_ops.reduce_logsumexp([math_ops.log(u), math_ops.log(1 - u) - 2 * self.scale], axis=0) / self.scale return self.__w
def _define_prior_log_prob_operation(self, shard_id): """Computes the prior probability of all samples. Updates a vector where each item is the prior probability of an input example. Args: shard_id: id of current shard_id. """ self._prior_probs[shard_id] = math_ops.reduce_logsumexp( self._probs[shard_id], axis=1, keepdims=True)
def _crf_log_norm(self, inputs, seq_lens): first_input = array_ops.slice(inputs, [0, 0, 0], [-1, 1, -1]) first_input = array_ops.squeeze(first_input, [1]) rest_of_input = array_ops.slice(inputs, [0, 1, 0], [-1, -1, -1]) forward_cell = CrfForwardRnnCell(self.transition_params) seq_lens_less_one = math_ops.maximum(constant_op.constant(0, dtype=seq_lens.dtype), seq_lens - 1) _, alphas = rnn.dynamic_rnn(cell=forward_cell, inputs=rest_of_input, sequence_length=seq_lens_less_one, initial_state=first_input, dtype=dtypes.float32) log_norm = math_ops.reduce_logsumexp(alphas, [1]) log_norm = array_ops.where(math_ops.less_equal(seq_lens, 0), array_ops.zeros_like(log_norm), log_norm) return log_norm
def _forward_log_det_jacobian(self, x): # This code is similar to nn_ops.log_softmax but different because we have # an implicit zero column to handle. I.e., instead of: # reduce_sum(logits - reduce_sum(exp(logits), dim)) # we must do: # log_normalization = 1 + reduce_sum(exp(logits)) # -log_normalization + reduce_sum(logits - log_normalization) log_normalization = nn_ops.softplus( math_ops.reduce_logsumexp(x, axis=-1, keep_dims=True)) return array_ops.squeeze((-log_normalization + math_ops.reduce_sum( x - log_normalization, axis=-1, keepdims=True)), axis=-1)
def _log_cdf(self, x): with ops.control_dependencies(self._assertions): x = ops.convert_to_tensor(x, name="x") distribution_log_cdfs = [d.log_cdf(x) for d in self.components] cat_log_probs = self._cat_probs(log_probs=True) final_log_cdfs = [ cat_lp + d_lcdf for (cat_lp, d_lcdf) in zip(cat_log_probs, distribution_log_cdfs) ] concatted_log_cdfs = array_ops.stack(final_log_cdfs, axis=0) mixture_log_cdf = math_ops.reduce_logsumexp(concatted_log_cdfs, [0]) return mixture_log_cdf
def _benchmark_tf_reduce_logsumexp(self, device=CPU, execution_mode=None, defunc=False): with context.device(device): x = constant_op.constant([[1, 0.], [0., 0.]]) if defunc: reduce_func = def_function.function(math_ops.reduce_logsumexp) func = lambda: reduce_func(x) else: func = lambda: math_ops.reduce_logsumexp(x) self._run(func, 3000, execution_mode=execution_mode)
def _forward_log_det_jacobian(self, x): # This code is similar to nn_ops.log_softmax but different because we have # an implicit zero column to handle. I.e., instead of: # reduce_sum(logits - reduce_sum(exp(logits), dim)) # we must do: # log_normalization = 1 + reduce_sum(exp(logits)) # -log_normalization + reduce_sum(logits - log_normalization) log_normalization = nn_ops.softplus( math_ops.reduce_logsumexp(x, axis=-1, keep_dims=True)) return array_ops.squeeze( (-log_normalization + math_ops.reduce_sum( x - log_normalization, axis=-1, keepdims=True)), axis=-1)
def _log_prob(self, x): with ops.control_dependencies(self._assertions): x = ops.convert_to_tensor(x, name="x") distribution_log_probs = [d.log_prob(x) for d in self.components] cat_log_probs = self._cat_probs(log_probs=True) final_log_probs = [ cat_lp + d_lp for (cat_lp, d_lp) in zip(cat_log_probs, distribution_log_probs) ] concat_log_probs = array_ops.stack(final_log_probs, 0) log_sum_exp = math_ops.reduce_logsumexp(concat_log_probs, [0]) return log_sum_exp
def _log_prob(self, y): # For caching to work, it is imperative that the bijector is the first to # modify the input. x = self.bijector.inverse(y) ildj = self.bijector.inverse_log_det_jacobian(y) if self.bijector._is_injective: # pylint: disable=protected-access return self._finish_log_prob_for_one_fiber(y, x, ildj) lp_on_fibers = [ self._finish_log_prob_for_one_fiber(y, x_i, ildj_i) for x_i, ildj_i in zip(x, ildj)] return math_ops.reduce_logsumexp(array_ops.stack(lp_on_fibers), axis=0)
def _multi_seq_fn(): # Split up the first and rest of the inputs in preparation for the forward # algorithm. batch_size = array_ops.shape(inputs)[0] num_tags = array_ops.shape(inputs)[2] first_input = array_ops.slice(inputs, [0, 0, 0], [-1, 1, -1]) first_input = array_ops.squeeze(first_input, [1]) rest_of_input = array_ops.slice(inputs, [0, 1, 0], [-1, -1, -1]) # Compute the alpha values in the forward algorithm forward_cell = CrfForwardRnnCell(transition_params) alphas_seq, alphas = rnn.dynamic_rnn(cell=forward_cell, inputs=rest_of_input, sequence_length=sequence_lengths - 1, initial_state=first_input, dtype=dtypes.float32) # Get all alphas in each time steps alphas_seq = tf.concat( [tf.expand_dims(first_input, axis=1), alphas_seq], axis=1) # Compute the betas values in the backward algorithm first_input = tf.constant( 0.0, shape=[1, 1]) # as we use log, so 0.0 for beta initialization first_input = tf.tile(first_input, multiples=[batch_size, num_tags]) # reverse the sequence of inputs in forward algorithm for backward algorithm rest_of_input = gen_array_ops.reverse_sequence(rest_of_input, sequence_lengths - 1, seq_dim=1) # transpose transition parameters for backward algorithm backward_cell = CrfBackwardRnnCell( tf.transpose(transition_params, perm=[1, 0])) betas_seq, betas = rnn.dynamic_rnn(cell=backward_cell, inputs=rest_of_input, sequence_length=sequence_lengths - 1, initial_state=first_input, dtype=dtypes.float32) betas_seq = tf.concat([tf.expand_dims(first_input, axis=1), betas_seq], axis=1) # reverse betas that follows same index as alphas betas_seq = tf.reverse_sequence(betas_seq, sequence_lengths, seq_dim=1) # crf log norm log_norm = math_ops.reduce_logsumexp(alphas, [1]) return alphas_seq, betas_seq, log_norm
def _log_prob(self, x): # By convention, we always put the grid points right-most. y = array_ops.stack( [aff.inverse(x) for aff in self.interpolated_affine], axis=-1) log_prob = math_ops.reduce_sum(self.distribution.log_prob(y), axis=-2) # Because the affine transformation has a constant Jacobian, it is the case # that `affine.fldj(x) = -affine.ildj(x)`. This is not true in general. fldj = array_ops.stack( [aff.forward_log_det_jacobian(x) for aff in self.interpolated_affine], axis=-1) return math_ops.reduce_logsumexp( self.mixture_distribution.logits - fldj + log_prob, axis=-1)
def __sample_w3(self, n, seed, eps=1e-8): shape = array_ops.concat(([n], self.batch_shape_tensor()[:-1], [1]), 0) u = random_ops.random_uniform(shape, eps, 1 - eps, dtype=self.dtype, seed=seed) self.__w = 1 + math_ops.reduce_logsumexp( array_ops.stack( [math_ops.log(u), math_ops.log(1 - u) - 2 * self.scale], -1), -1) / self.scale return self.__w
def testCrfLogNorm(self): transition_params = np.array([[-3, 5, -2], [3, 4, 1], [1, 2, 1]], dtype=np.float32) # Test both the length-1 and regular cases. sequence_lengths_list = [ np.array(3, dtype=np.int32), np.array(1, dtype=np.int32) ] inputs_list = [ np.array([[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=np.float32), np.array([[3, -1, 3]], dtype=np.float32), ] tag_indices_list = [ np.array([1, 2, 1, 0], dtype=np.int32), np.array([2], dtype=np.int32) ] for sequence_lengths, inputs, tag_indices in zip( sequence_lengths_list, inputs_list, tag_indices_list): num_words = inputs.shape[0] num_tags = inputs.shape[1] with self.test_session() as sess: all_sequence_scores = [] # Compare the dynamic program with brute force computation. for tag_indices in itertools.product(range(num_tags), repeat=sequence_lengths): tag_indices = list(tag_indices) tag_indices.extend([0] * (num_words - sequence_lengths)) all_sequence_scores.append( crf.crf_sequence_score( inputs=array_ops.expand_dims(inputs, 0), tag_indices=array_ops.expand_dims(tag_indices, 0), sequence_lengths=array_ops.expand_dims( sequence_lengths, 0), transition_params=constant_op.constant( transition_params))) brute_force_log_norm = math_ops.reduce_logsumexp( all_sequence_scores) log_norm = crf.crf_log_norm( inputs=array_ops.expand_dims(inputs, 0), sequence_lengths=array_ops.expand_dims( sequence_lengths, 0), transition_params=constant_op.constant(transition_params)) log_norm = array_ops.squeeze(log_norm, [0]) tf_brute_force_log_norm, tf_log_norm = sess.run( [brute_force_log_norm, log_norm]) self.assertAllClose(tf_log_norm, tf_brute_force_log_norm)
def _single_seq_fn(): batch_size = array_ops.shape(inputs)[0] num_tags = array_ops.shape(inputs)[2] alphas_seq = array_ops.slice(inputs, [0, 0, 0], [-1, 1, -1]) betas = tf.constant( 0.0, shape=[1, 1]) # as we use log, so 0.0 for beta initialization betas = tf.tile(betas, multiples=[batch_size, num_tags]) betas_seq = array_ops.expand_dims(betas, 1) # crf log norm log_norm = math_ops.reduce_logsumexp(alphas_seq, [2]) return alphas_seq, betas_seq, log_norm
def call(self, inputs, mask=None): if mask is not None: adder = (math_ops.cast(mask, inputs.dtype)) * ( _large_compatible_negative(inputs.dtype)) inputs += adder if isinstance(self.axis, (tuple, list)): if len(self.axis) > 1: return math_ops.exp(inputs - math_ops.reduce_logsumexp( inputs, axis=self.axis, keepdims=True)) else: return K.softmax(inputs, axis=self.axis[0]) return K.softmax(inputs, axis=self.axis)
def _multi_seq_fn(): """Forward computation of alpha values.""" rest_of_input = array_ops.slice(inputs, [0, 1, 0], [-1, -1, -1]) # Compute the alpha values in the forward algorithm in order to get the # partition function. forward_cell = CrfForwardRnnCell(transition_params) _, alphas = rnn.dynamic_rnn(cell=forward_cell, inputs=rest_of_input, sequence_length=sequence_lengths - 1, initial_state=first_input, dtype=dtypes.float32) log_norm = math_ops.reduce_logsumexp(alphas, [1]) return log_norm
def testUnderflow(self): x = [-1000, -1001, -1002, -1003] for dtype in [np.float16, np.float32, np.double]: x_np = np.array(x, dtype=dtype) max_np = np.max(x_np) with self.assertRaisesRegexp(RuntimeWarning, "divide by zero encountered in log"): out = log(np.sum(exp(x_np))) if out == -np.inf: raise RuntimeWarning("divide by zero encountered in log") with self.test_session(use_gpu=True): x_tf = constant_op.constant(x_np, shape=x_np.shape) y_tf_np = math_ops.reduce_logsumexp(x_tf).eval() y_np = log(np.sum(exp(x_np - max_np))) + max_np self.assertAllClose(y_tf_np, y_np)
def _multi_seq_fn(): """Forward computation of alpha values.""" rest_of_input = array_ops.slice(inputs, [0, 1, 0], [-1, -1, -1]) # Compute the alpha values in the forward algorithm in order to get the # partition function. forward_cell = CrfForwardRnnCell(transition_params) _, alphas = rnn.dynamic_rnn( cell=forward_cell, inputs=rest_of_input, sequence_length=sequence_lengths - 1, initial_state=first_input, dtype=dtypes.float32) log_norm = math_ops.reduce_logsumexp(alphas, [1]) return log_norm
def testOverflow(self): x = [1000, 1001, 1002, 1003] for dtype in [np.float16, np.float32, np.double]: x_np = np.array(x, dtype=dtype) max_np = np.max(x_np) with self.assertRaisesRegexp(RuntimeWarning, "overflow encountered in exp"): out = log(np.sum(exp(x_np))) if out == np.inf: raise RuntimeWarning("overflow encountered in exp") with self.test_session(): x_tf = constant_op.constant(x_np, shape=x_np.shape) y_tf_np = math_ops.reduce_logsumexp(x_tf).eval() y_np = log(np.sum(exp(x_np - max_np))) + max_np self.assertAllClose(y_tf_np, y_np)
def testLogCdf(self): with self.cached_session() as sess: gm = mixture_same_family_lib.MixtureSameFamily( mixture_distribution=categorical_lib.Categorical(probs=[0.3, 0.7]), components_distribution=normal_lib.Normal( loc=[-1., 1], scale=[0.1, 0.5])) x = gm.sample(10, seed=42) actual_log_cdf = gm.log_cdf(x) expected_log_cdf = math_ops.reduce_logsumexp( (gm.mixture_distribution.logits + gm.components_distribution.log_cdf(x[..., array_ops.newaxis])), axis=1) actual_log_cdf_, expected_log_cdf_ = sess.run([ actual_log_cdf, expected_log_cdf]) self.assertAllClose(actual_log_cdf_, expected_log_cdf_, rtol=1e-6, atol=0.0)
def testLogCdf(self): with self.test_session() as sess: gm = mixture_same_family_lib.MixtureSameFamily( mixture_distribution=categorical_lib.Categorical(probs=[0.3, 0.7]), components_distribution=normal_lib.Normal( loc=[-1., 1], scale=[0.1, 0.5])) x = gm.sample(10, seed=42) actual_log_cdf = gm.log_cdf(x) expected_log_cdf = math_ops.reduce_logsumexp( (gm.mixture_distribution.logits + gm.components_distribution.log_cdf(x[..., array_ops.newaxis])), axis=1) actual_log_cdf_, expected_log_cdf_ = sess.run([ actual_log_cdf, expected_log_cdf]) self.assertAllClose(actual_log_cdf_, expected_log_cdf_, rtol=1e-6, atol=0.0)
def testOverflow(self): x = [1000, 1001, 1002, 1003] for dtype in [np.float16, np.float32, np.double]: x_np = np.array(x, dtype=dtype) max_np = np.max(x_np) with self.assertRaisesRegexp(RuntimeWarning, "overflow encountered in exp"): out = np.log(np.sum(np.exp(x_np))) if out == np.inf: raise RuntimeWarning("overflow encountered in exp") with test_util.use_gpu(): x_tf = constant_op.constant(x_np, shape=x_np.shape) y_tf_np = math_ops.reduce_logsumexp(x_tf) y_np = np.log(np.sum(np.exp(x_np - max_np))) + max_np self.assertAllClose(y_tf_np, y_np)
def testNoWeights(self): logx_ = np.array([[0., -1, 1000.], [0, 1, -1000.], [-5, 0, 5]]) with self.test_session() as sess: logx = constant_op.constant(logx_) expected = math_ops.reduce_logsumexp(logx, axis=-1) grad_expected = gradients_impl.gradients(expected, logx)[0] actual, actual_sgn = du.reduce_weighted_logsumexp(logx, axis=-1, return_sign=True) grad_actual = gradients_impl.gradients(actual, logx)[0] [actual_, actual_sgn_, grad_actual_, expected_, grad_expected_] = sess.run( [actual, actual_sgn, grad_actual, expected, grad_expected]) self.assertAllEqual(expected_, actual_) self.assertAllEqual(grad_expected_, grad_actual_) self.assertAllEqual([1., 1, 1], actual_sgn_)
def _log_prob(self, y, bijector_kwargs=None, distribution_kwargs=None): # For caching to work, it is imperative that the bijector is the first to # modify the input. bijector_kwargs = bijector_kwargs or {} distribution_kwargs = distribution_kwargs or {} x = self.bijector.inverse(y, **bijector_kwargs) event_ndims = self._maybe_get_static_event_ndims() ildj = self.bijector.inverse_log_det_jacobian( y, event_ndims=event_ndims, **bijector_kwargs) if self.bijector._is_injective: # pylint: disable=protected-access return self._finish_log_prob_for_one_fiber(y, x, ildj, distribution_kwargs) lp_on_fibers = [ self._finish_log_prob_for_one_fiber(y, x_i, ildj_i, distribution_kwargs) for x_i, ildj_i in zip(x, ildj)] return math_ops.reduce_logsumexp(array_ops.stack(lp_on_fibers), axis=0)
def _forward_log_det_jacobian(self, x): if self._static_event_ndims == 0: return x - 2. * nn_ops.softplus(x) else: # This code is similar to nn_ops.log_softmax but different because we have # an implicit zero column to handle. I.e., instead of: # reduce_sum(logits - reduce_sum(exp(logits), dim)) # we must do: # log_normalization = 1 + reduce_sum(exp(logits)) # -log_normalization + reduce_sum(logits - log_normalization) log_normalization = nn_ops.softplus( math_ops.reduce_logsumexp(x, reduction_indices=-1, keep_dims=True)) fldj = (-log_normalization + math_ops.reduce_sum( x - log_normalization, reduction_indices=-1, keep_dims=True)) return array_ops.squeeze(fldj, squeeze_dims=-1)
def call(self, inputs, mask=None): if mask is not None: # Since mask is 1.0 for positions we want to keep and 0.0 for # masked positions, this operation will create a tensor which is 0.0 for # positions we want to attend and -1e.9 for masked positions. adder = (1.0 - math_ops.cast(mask, inputs.dtype)) * ( _large_compatible_negative(inputs.dtype)) # Since we are adding it to the raw scores before the softmax, this is # effectively the same as removing these entirely. inputs += adder if isinstance(self.axis, (tuple, list)): if len(self.axis) > 1: return math_ops.exp(inputs - math_ops.reduce_logsumexp( inputs, axis=self.axis, keepdims=True)) else: return K.softmax(inputs, axis=self.axis[0]) return K.softmax(inputs, axis=self.axis)
def _multi_seq_fn(): """Forward computation of alpha values.""" rest_of_input = array_ops.slice(inputs, [0, 1, 0], [-1, -1, -1]) # Compute the alpha values in the forward algorithm in order to get the # partition function. forward_cell = CrfForwardRnnCell(transition_params) # Sequence length is not allowed to be less than zero. sequence_lengths_less_one = math_ops.maximum(0, sequence_lengths - 1) _, alphas = rnn.dynamic_rnn(cell=forward_cell, inputs=rest_of_input, sequence_length=sequence_lengths_less_one, initial_state=first_input, dtype=dtypes.float32) log_norm = math_ops.reduce_logsumexp(alphas, [1]) # Mask `log_norm` of the sequences with length <= zero. log_norm = array_ops.where(math_ops.less_equal(sequence_lengths, 0), array_ops.zeros_like(log_norm), log_norm) return log_norm
def get_forwards_probabilities(inputs, sequence_lengths, transitions): ''' CRF forward probabilities and log normalizer inputs: bs x L x V unaries sequence_length: bs transitions: An object implementing CRF transitions returns: bs x L and bs ''' # Split up the first and rest of the inputs in preparation for the forward # algorithm. first_input = array_ops.slice(inputs, [0, 0, 0], [-1, 1, -1]) first_input = array_ops.squeeze(first_input, [1]) """Forward computation of alpha values.""" unary = array_ops.slice(inputs, [0, 1, 0], [-1, -1, -1]) pairwise = transitions.pack_to_parameter_sequence() pairwise = pairwise[:, 1:, :] rnn_inputs = array_ops.concat([unary, pairwise], axis=2) # Compute the alpha values in the forward algorithm in order to get the # partition function. forward_cell = CrfForwardRnnCell(transitions) # Sequence length is not allowed to be less than zero. sequence_lengths_less_one = math_ops.maximum( constant_op.constant(0, dtype=sequence_lengths.dtype), sequence_lengths - 1) all_alphas, alphas = rnn.dynamic_rnn( cell=forward_cell, inputs=rnn_inputs, sequence_length=sequence_lengths_less_one, initial_state=first_input, dtype=dtypes.float32) log_norm = math_ops.reduce_logsumexp(alphas, [1]) # Mask `log_norm` of the sequences with length <= zero. log_norm = array_ops.where(math_ops.less_equal(sequence_lengths, 0), array_ops.zeros_like(log_norm), log_norm) return all_alphas, log_norm