def cal_scores_indices(scores_to_0,scores_to_1): next_beam_scores_1, word_indices_1 = nn_ops.top_k(scores_to_0, k=5) print ("ori next_beam_scores_1,word_indices_1",next_beam_scores_1) print ("ori word_indices_1",word_indices_1) next_beam_scores_2, word_indices_2 = nn_ops.top_k(scores_to_1, k=5) next_beam_scores=tf.concat([next_beam_scores_1,next_beam_scores_2],1) word_indices=tf.concat([word_indices_1,word_indices_2+9*vocab_size],1) return next_beam_scores,word_indices
def _batch_sort_vector(x, ascending=True, name=None): with ops.name_scope(name, "sort_each_row", [x]): x = ops.convert_to_tensor(x, name="x") n = array_ops.shape(x)[-1] if ascending: y, _ = nn_ops.top_k(-x, k=n, sorted=True) y = -y else: y, _ = nn_ops.top_k(x, k=n, sorted=True) y.set_shape(x.shape) return y
def sort_by_scores(scores, features_list, topn=None): """Sorts example features according to per-example scores. Args: scores: A `Tensor` of shape [batch_size, list_size] representing the per-example scores. features_list: A list of `Tensor`s with the same shape as scores to be sorted. topn: An integer as the cutoff of examples in the sorted list. Returns: A list of `Tensor`s as the list of sorted features by `scores`. """ scores = ops.convert_to_tensor(scores) scores.get_shape().assert_has_rank(2) batch_size, list_size = array_ops.unstack(array_ops.shape(scores)) if topn is None: topn = list_size topn = math_ops.minimum(topn, list_size) _, indices = nn_ops.top_k(scores, topn, sorted=True) list_offsets = array_ops.expand_dims( math_ops.range(batch_size) * list_size, 1) # The shape of `indices` is [batch_size, topn] and the shape of # `list_offsets` is [batch_size, 1]. Broadcasting is used here. gather_indices = array_ops.reshape(indices + list_offsets, [-1]) output_shape = array_ops.stack([batch_size, topn]) # Each feature is first flattened to a 1-D vector and then gathered by the # indices from sorted scores and then re-shaped. return [ array_ops.reshape( array_ops.gather(array_ops.reshape(feature, [-1]), gather_indices), output_shape) for feature in features_list ]
def get_best(self, n): """Return the indices and values of the n highest scores in the TopN.""" def refresh_shortlist(): """Update the shortlist with the highest scores in id_to_score.""" new_scores, new_ids = nn_ops.top_k(self.id_to_score, self.shortlist_size) smallest_new_score = math_ops.reduce_min(new_scores) new_length = math_ops.reduce_sum( math_ops.to_int32(math_ops.greater(new_scores, dtypes.float32.min))) u1 = self.sl_ids.assign( math_ops.to_int64(array_ops.concat([[new_length], new_ids], 0))) u2 = self.sl_scores.assign( array_ops.concat([[smallest_new_score], new_scores], 0)) self.last_ops = [u1, u2] return control_flow_ops.group(u1, u2) # We only need to refresh the shortlist if n is greater than the # current shortlist size (which is stored in sl_ids[0]). with ops.control_dependencies(self.last_ops): cond_op = control_flow_ops.cond(n > self.sl_ids[0], refresh_shortlist, control_flow_ops.no_op) with ops.control_dependencies([cond_op]): topk_values, topk_indices = nn_ops.top_k( self.sl_scores, math_ops.minimum(n, math_ops.to_int32(self.sl_ids[0]))) # topk_indices are the indices into the shortlist, we want to return # the indices into id_to_score gathered_indices = array_ops.gather(self.sl_ids, topk_indices) return gathered_indices, topk_values
def testTopKInfinities(self): """Tests that positive and negative infinity sort correctly.""" supported_types = set([ dtypes.bfloat16.as_numpy_dtype, np.float16, np.float32, np.float64 ]) for dtype in supported_types.intersection(self.numeric_types): # TPU implementation is not supported for double precision if (dtype == np.float64 or dtype == np.float16) and self.device == "TPU": continue with self.session() as sess: p = array_ops.placeholder(dtype) with self.test_scope(): topk = nn_ops.top_k(p, k=6) results = sess.run( topk, { p: np.array( [1, 2, float("inf"), -float("inf"), -1, -2], dtype=dtype) }) self.assertAllEqual( np.array( [float("inf"), 2.0, 1.0, -1.0, -2.0, -float("inf")], dtype=dtype), results[0]) self.assertEqual(list([2, 1, 0, 4, 5, 3]), list(results[1]))
def call(self, inputs, training=False): input_dim = inputs.get_shape()[-1].value _, indices = nn_ops.top_k(inputs, self.k, sorted=False) mask = array_ops.one_hot(indices, input_dim, axis=-1) mask = math_ops.reduce_sum(mask, axis=-2) return utils.smart_cond(training, lambda: mask * inputs, lambda: array_ops.identity(inputs))
def shuffle_valid_indices(is_valid, seed=None): """Returns a shuffle of indices with valid ones on top. Args: is_valid: A boolen `Tensor` for entry validity with shape [batch_size, list_size]. seed: An int for random seed at the op level. It works together with the seed at global graph level together to determine the random number generation. See `tf.set_random_seed`. Returns: A tensor of indices with shape [batch_size, list_size, 2]. The returned tensor can be used with `tf.gather_nd` and `tf.scatter_nd` to compose a new [batch_size, list_size] tensor. The values in the last dimension are the indices for an element in the input tensor. """ is_valid = ops.convert_to_tensor(is_valid) is_valid.get_shape().assert_has_rank(2) output_shape = array_ops.shape(is_valid) rand = array_ops.where(is_valid, random_ops.random_uniform(output_shape, seed=seed), array_ops.ones(output_shape) * -1e-6) # shape(indices) = [batch_size, list_size] _, indices = nn_ops.top_k(rand, output_shape[1], sorted=True) # shape(batch_ids) = [batch_size, list_size] batch_ids = array_ops.ones_like(indices) * array_ops.expand_dims( math_ops.range(output_shape[0]), 1) return array_ops.concat([ array_ops.expand_dims(batch_ids, 2), array_ops.expand_dims(indices, 2) ], axis=2)
def testKNegative(self): inputs = [[0.1, 0.2], [0.3, 0.4]] with self.session(use_gpu=True): k = array_ops.placeholder(dtypes.int32) values, _ = nn_ops.top_k(inputs, k) with self.assertRaisesOpError("Need k >= 0, got -7"): values.eval(feed_dict={k: -7})
def benchmarkTopK(self): for (m, n, p, use_gpu) in itertools.product([128], [10, 100, 1000, 10000, 100000], [0.001, 0.01, 0.5, 0.99, 1.0], [False, True]): k = int(p * n) if k == 0: continue name = "m_%d_n_%d_k_%g_use_gpu_%s" % (m, n, k, use_gpu) device = "/%s:0" % ("gpu" if use_gpu else "cpu") with ops.Graph().as_default(): with ops.device(device): x = random_ops.random_uniform((m, n)) v = resource_variable_ops.ResourceVariable(x) op = nn_ops.top_k(v, k) with session.Session() as sess: v.initializer.run() r = self.run_op_benchmark(sess, op, min_iters=100, name=name) gb_processed_input = m * n / 1.0e9 throughput = gb_processed_input / r["wall_time"] print("Benchmark: %s \t wall_time: %0.03g s \t " "Throughput: %0.03g GB/s" % (name, r["wall_time"], throughput)) sys.stdout.flush()
def get_best(self, n): """Return the indices and values of the n highest scores in the TopN.""" def refresh_shortlist(): """Update the shortlist with the highest scores in id_to_score.""" new_scores, new_ids = nn_ops.top_k(self.id_to_score, self.shortlist_size) smallest_new_score = math_ops.reduce_min(new_scores) new_length = math_ops.reduce_sum( math_ops.to_int32( math_ops.greater(new_scores, dtypes.float32.min))) u1 = self.sl_ids.assign( math_ops.to_int64(array_ops.concat([[new_length], new_ids], 0))) u2 = self.sl_scores.assign( array_ops.concat([[smallest_new_score], new_scores], 0)) self.last_ops = [u1, u2] return control_flow_ops.group(u1, u2) # We only need to refresh the shortlist if n is greater than the # current shortlist size (which is stored in sl_ids[0]). with ops.control_dependencies(self.last_ops): cond_op = control_flow_ops.cond(n > self.sl_ids[0], refresh_shortlist, control_flow_ops.no_op) with ops.control_dependencies([cond_op]): topk_values, topk_indices = nn_ops.top_k( self.sl_scores, math_ops.minimum(n, math_ops.to_int32(self.sl_ids[0]))) # topk_indices are the indices into the shortlist, we want to return # the indices into id_to_score gathered_indices = array_ops.gather(self.sl_ids, topk_indices) return gathered_indices, topk_values
def GetParams(self): """Testing that output type of engine using Top-K is set correctly.""" dtype = dtypes.float32 input_name = "input" input_dims = [100, 100] k = 5 g = ops.Graph() with g.as_default(): x = array_ops.placeholder(dtype=dtype, shape=input_dims, name=input_name) k_tensor = constant_op.constant(k, dtype=dtypes.int32, name="Const") values, indices = nn_ops.top_k(x, k_tensor, name="TopK") # Reshape will act as a layer between the TopK output and the engine # output, requiring the output tensor of reshape to be set explicitly to # int32. indices = array_ops.reshape(indices, [100, 1, 5], name="Reshape") values = array_ops.identity(values, name="output_values") indices = array_ops.identity(indices, name="output_indices") return trt_test.TfTrtIntegrationTestParams( gdef=g.as_graph_def(), input_names=[input_name], input_dims=[[input_dims]], output_names=["output_values", "output_indices"], expected_output_dims=[[[100, k], [100, 1, k]]])
def _cond_restrict_fn(self): """ This will only be execute when size of variable is larger than trigger. """ restrict_var_ops, restrict_status_ops, restrict_slot_ops = [], [], [] for i, dev in enumerate(self.freq_var.devices): with ops.device(dev): partial_keys, partial_counts = self.freq_var.tables[i].export() partial_reserved = int(self._num_reserved / self.freq_var.shard_num) partial_counts = array_ops.reshape(partial_counts, (-1,)) first_dim = array_ops.shape(partial_counts)[0] k_on_top = math_ops.cast(first_dim - partial_reserved, dtype=dtypes.int32) k_on_top = math_ops.maximum(k_on_top, 0) _, removed_key_indices = nn_ops.top_k(-partial_counts, k_on_top, sorted=False) removed_keys = array_ops.gather(partial_keys, removed_key_indices) restrict_var_ops.append(self.var.tables[i].remove(removed_keys)) restrict_status_ops.append(self.freq_var.tables[i].remove(removed_keys)) for slot_param in self.params_in_slots: restrict_slot_ops.append(slot_param.tables[i].remove(removed_keys)) return control_flow_ops.group(restrict_var_ops, restrict_status_ops, restrict_slot_ops)
def allocmem(u_tm1, ww_tm1, wr_tm1_ls, fg_ls): """Allocate Memory. Parameters ---------- u_tm1 : `[batch_size, mem_size]`. ww_tm1 : `[batch_size, mem_size]`. wr_tm1_ls : a list of R read weights. each element in the list has size of: `[batch_size, mem_size]`. fg_ls : a list of R free gates. each element in the list has size of: `[batch_size, 1]`. Returns ------- u : `[batch_size, mem_size]`. alloc_vec : `[batch_size, mem_size]` """ mem_size = shape(u_tm1)[1] retention = functools.reduce( multiply, [1 - fg * wr_tm1 for fg, wr_tm1 in zip(fg_ls, wr_tm1_ls)]) u = (u_tm1 + ww_tm1 - u_tm1 * ww_tm1) * retention asd_u, asd_u_idx = top_k(u, k=mem_size) idx = reverse(asd_u_idx, axis=[1]) prod_phi = cumprod(reverse(asd_u, axis=[1]), axis=1, exclusive=True) alloc_vec = (1 - u) * prod_phi return alloc_vec, u
def dnw_fn(mask, sparsity, dtype): """Creates a mask with smallest magnitudes with deterministic sparsity. Args: mask: tf.Tensor, used to obtain correct corresponding gradient. sparsity: float, between 0 and 1. dtype: tf.dtype, type of the return value. Returns: tf.Tensor """ del dtype var_name = sparse_utils.mask_extract_name_fn(mask.name) v = vars_dict[var_name] score_drop = math_ops.abs(v) n_total = np.prod(score_drop.shape.as_list()) n_prune = sparse_utils.get_n_zeros(n_total, sparsity) n_keep = n_total - n_prune # Sort the entire array since the k needs to be constant for TPU. _, sorted_indices = nn_ops.top_k(array_ops.reshape( score_drop, [-1]), k=n_total) sorted_indices_ex = array_ops.expand_dims(sorted_indices, 1) # We will have zeros after having `n_keep` many ones. new_values = array_ops.where( math_ops.range(n_total) < n_keep, array_ops.ones_like(sorted_indices, dtype=mask.dtype), array_ops.zeros_like(sorted_indices, dtype=mask.dtype)) new_mask = array_ops.scatter_nd(sorted_indices_ex, new_values, new_values.shape) return array_ops.reshape(new_mask, mask.shape)
def GraphFn(self, x): k = 5 k_tensor = constant_op.constant(k, dtype=dtypes.int32, name="Const") values, indices = nn_ops.top_k(x, k_tensor, name="TopK") values = array_ops.identity(values, name="output_0") indices = array_ops.identity(indices, name="output_1") return values, indices
def testKNegative(self): inputs = [[0.1, 0.2], [0.3, 0.4]] with self.test_session(use_gpu=True): k = array_ops.placeholder(dtypes.int32) values, _ = nn_ops.top_k(inputs, k) with self.assertRaisesOpError("Need k >= 0, got -7"): values.eval(feed_dict={k: -7})
def testTopKInfinities(self): """Tests that positive and negative infinity sort correctly.""" # TODO(b/26783907): The Sort HLO is not implemented on CPU or GPU. if self.device in ["XLA_CPU", "XLA_GPU"]: return # Only bfloat16 is implemented. bfloat16 = dtypes.bfloat16.as_numpy_dtype if bfloat16 not in self.numeric_types: return with self.test_session() as sess: p = array_ops.placeholder(dtypes.bfloat16) with self.test_scope(): topk = nn_ops.top_k(p, k=6) results = sess.run( topk, { p: np.array([1, 2, float("inf"), -float("inf"), -1, -2], dtype=bfloat16) }) self.assertAllEqual( np.array([float("inf"), 2.0, 1.0, -1.0, -2.0, -float("inf")], dtype=bfloat16), results[0]) self.assertEqual(list([2, 1, 0, 4, 5, 3]), list(results[1]))
def _descending_sort(values, axis, return_argsort=False): """Sorts values in reverse using `top_k`. Args: values: Tensor of numeric values. axis: Index of the axis which values should be sorted along. return_argsort: If False, return the sorted values. If True, return the indices that would sort the values. Returns: The sorted values. """ k = array_ops.shape(values)[axis] rank = array_ops.rank(values) static_rank = values.shape.ndims # Fast path: sorting the last axis. if axis == -1 or axis + 1 == values.get_shape().ndims: top_k_input = values transposition = None else: # Otherwise, transpose the array. Swap axes `axis` and `rank - 1`. if axis < 0: # Calculate the actual axis index if counting from the end. Use the static # rank if available, or else make the axis back into a tensor. axis += static_rank or rank if static_rank is not None: # Prefer to calculate the transposition array in NumPy and make it a # constant. transposition = constant_op.constant( np.r_[ # Axes up to axis are unchanged. np.arange(axis), # Swap axis and rank - 1. [static_rank - 1], # Axes in [axis + 1, rank - 1) are unchanged. np.arange(axis + 1, static_rank - 1), # Swap axis and rank - 1. [axis]], name='transposition') else: # Generate the transposition array from the tensors. transposition = array_ops.concat( [ # Axes up to axis are unchanged. math_ops.range(axis), # Swap axis and rank - 1. [rank - 1], # Axes in [axis + 1, rank - 1) are unchanged. math_ops.range(axis + 1, rank - 1), # Swap axis and rank - 1. [axis] ], axis=0) top_k_input = array_ops.transpose(values, transposition) values, indices = nn_ops.top_k(top_k_input, k) return_value = indices if return_argsort else values if transposition is not None: # transposition contains a single cycle of length 2 (swapping 2 elements), # so it is an involution (it is its own inverse). return_value = array_ops.transpose(return_value, transposition) return return_value
def cal_scores_indices_t1(scores_final,next_beam_size): next_beam_scores_1, word_indices_1=nn_ops.top_k(scores_final, k=5) #next_beam_scores_1, word_indices_1=sample(next_beam_scores_1,word_indices_1) print ("next_beam_scores_1", next_beam_scores_1) print ("word_indices_1",word_indices_1) next_beam_scores=tf.concat([next_beam_scores_1,next_beam_scores_1],1) word_indices=tf.concat([word_indices_1,word_indices_1+5*vocab_size],1) return next_beam_scores, word_indices
def _validateTopK(self, inputs, k, expected_values, expected_indices, sorted=True): # pylint: disable=redefined-builtin np_expected_values = np.array(expected_values) np_expected_indices = np.array(expected_indices) with self.cached_session(use_gpu=True) as sess: values_op, indices_op = nn_ops.top_k(inputs, k, sorted=sorted) values, indices = self.evaluate([values_op, indices_op]) self.assertShapeEqual(np_expected_values, values_op) self.assertShapeEqual(np_expected_indices, indices_op) if sorted: self.assertAllClose(np_expected_values, values) # Do some special casing of equality of indices: if indices # are not the same, but values are floating type, ensure that # the values are within epsilon of each other. if not np.issubdtype(np_expected_values.dtype, np.floating): # Values are not floating point type; check indices exactly self.assertAllEqual(np_expected_indices, indices) else: # Values are floating point; indices may be swapped for # values near each other. indices_not_equal = np_expected_indices != indices if np.any(indices_not_equal): values_unsure = values[indices_not_equal] expected_values_unsure = expected_values[ indices_not_equal] self.assertAllClose(expected_values_unsure, values_unsure) else: np_inputs = np.array(inputs) # Check that the indices are valid. for result_index, src_index in np.ndenumerate(indices): value = values[result_index] expected_value = np_inputs[result_index[0], src_index] np.testing.assert_almost_equal(value, expected_value) # Check that if two elements are equal, the lower-index element appears # first. shape = values.shape for batch_index in range(shape[0]): for index in range(shape[1] - 1): if np.isclose(values[batch_index, index], values[batch_index, index + 1]): self.assertLess(indices[batch_index, index], indices[batch_index, index + 1]) # Now check the results, ignoring order. self.assertAllEqual(np.sort(np_expected_indices), np.sort(indices)) self.assertAllClose(np.sort(np_expected_values), np.sort(values))
def testTopKGradients(self): with self.test_session(use_gpu=True) as sess: inputs = array_ops.placeholder(dtypes.int32, shape=[2, 5]) values, _ = nn_ops.top_k(inputs, 3) grad = sess.run( gradients_impl.gradients( values, inputs, grad_ys=[[[1, 2, 3], [4, 5, 6]]]), feed_dict={inputs: [[2, -1, 1000, 3, 4], [1, 5, 2, 4, 3]]})[0] self.assertEqual(grad.tolist(), [[0, 0, 1, 3, 2], [0, 4, 0, 5, 6]])
def prune_by_bbb(variable_metadata, percentage): """Prune a percentage of variables based on their signal to noise ratios. Arguments: variable_metadata: `list` of `bbb._VariableMetadata`, suggest using `bbb.get_variable_metadata()`. percentage: a `tf.Tensor` that is scalar representing what percentage of variables to prune. """ if not variable_metadata: return [] signal_to_noise_ratios = [] variable_estimates = [] variable_info = [] # get signal to noise and mean posterior for meta in variable_metadata: posterior_dist = meta.posterior signal_to_noise_ratios.append( array_utils.flatten( distribution_utils.signal_to_noise_ratio(posterior_dist))) variable_estimates.append(array_utils.flatten(meta.posterior_estimate)) variable_info.append((meta.raw_variable_name, meta.raw_variable_shape)) # flatten variables flat_variable_estimates = array_ops.concat(variable_estimates, 0) flat_signal_to_noise_ratios = array_ops.concat(signal_to_noise_ratios, 0) flat_variable_size = flat_variable_estimates.get_shape().as_list()[-1] flat_drop_size = math_ops.cast(flat_variable_size * percentage, dtypes.int32) # sort by signal to noise ratio _, indices = nn_ops.top_k(flat_signal_to_noise_ratios, k=flat_variable_size, sorted=True) zero_indices = array_ops.expand_dims(indices[:flat_drop_size], -1) mask = math_ops.cast( sparse_ops.sparse_to_dense(zero_indices, [flat_variable_size], sparse_values=0, default_value=1, validate_indices=False), flat_variable_estimates.dtype) flat_variable_estimates *= mask # unflatten variables start = 0 dsts = [] for name, shape in variable_info: end = array_utils.product(shape) dst = gen_array_ops.reshape(flat_variable_estimates[start:start + end], shape, name=name) dsts.append(dst) start += end return dsts
def call(self, inputs, training=False): input_dim = inputs.get_shape()[-1].value k = random_ops.random_uniform([1], maxval=input_dim, dtype=dtypes.int32)[0] _, indices = nn_ops.top_k(inputs, k, sorted=False) mask = array_ops.one_hot(indices, input_dim, axis=-1) mask = math_ops.reduce_sum(mask, axis=-2) return utils.smart_cond(training, lambda: mask * inputs, lambda: array_ops.identity(inputs))
def GraphFn(self, x): k = 5 k_tensor = constant_op.constant(k, dtype=dtypes.int32, name="Const") values, indices = nn_ops.top_k(x, k_tensor, name="TopK") # Reshape will act as a layer between the TopK output and the engine # output, requiring the output tensor of reshape to be set explicitly to # int32. indices = array_ops.reshape(indices, [100, 1, 5], name="Reshape") values = array_ops.identity(values, name="output_0") indices = array_ops.identity(indices, name="output_1") return values, indices
def _sort_rows(matrix, num_rows): """Sort matrix rows by the last column. Args: matrix: a matrix of values (row,col). num_rows: (int) number of sorted rows to return from the matrix. Returns: Tensor (num_rows, col) of the sorted matrix top K rows. """ tmatrix = array_ops.transpose(matrix, [1, 0]) sorted_tmatrix = nn_ops.top_k(tmatrix, num_rows)[0] return array_ops.transpose(sorted_tmatrix, [1, 0])
def testTopKGradients(self): with self.session(use_gpu=True) as sess: inputs = array_ops.placeholder(dtypes.float32, shape=[2, 5]) values, _ = nn_ops.top_k(inputs, 3) grad = sess.run( gradients_impl.gradients( values, inputs, grad_ys=[[[1., 2., 3.], [4., 5., 6.]]]), feed_dict={inputs: [[2., -1., 1000., 3., 4.], [1., 5., 2., 4., 3.]]})[0] self.assertEqual( grad.tolist(), [[0., 0., 1., 3., 2.], [0., 4., 0., 5., 6.]])
def _validateTopK(self, inputs, k, expected_values, expected_indices, sorted=True): # pylint: disable=redefined-builtin np_expected_values = np.array(expected_values) np_expected_indices = np.array(expected_indices) with self.test_session(use_gpu=True) as sess: values_op, indices_op = nn_ops.top_k(inputs, k, sorted=sorted) values, indices = sess.run([values_op, indices_op]) self.assertShapeEqual(np_expected_values, values_op) self.assertShapeEqual(np_expected_indices, indices_op) if sorted: self.assertAllClose(np_expected_values, values) # Do some special casing of equality of indices: if indices # are not the same, but values are floating type, ensure that # the values are within epsilon of each other. if not np.issubdtype(np_expected_values.dtype, np.float): # Values are not floating point type; check indices exactly self.assertAllEqual(np_expected_indices, indices) else: # Values are floating point; indices may be swapped for # values near each other. indices_not_equal = np_expected_indices != indices if np.any(indices_not_equal): values_unsure = values[indices_not_equal] expected_values_unsure = expected_values[indices_not_equal] self.assertAllClose(expected_values_unsure, values_unsure) else: np_inputs = np.array(inputs) # Check that the indices are valid. for result_index, src_index in np.ndenumerate(indices): value = values[result_index] expected_value = np_inputs[result_index[0], src_index] np.testing.utils.assert_almost_equal(value, expected_value) # Check that if two elements are equal, the lower-index element appears # first. shape = values.shape for batch_index in range(shape[0]): for index in range(shape[1] - 1): if np.isclose(values[batch_index, index], values[batch_index, index + 1]): self.assertLess(indices[batch_index, index], indices[batch_index, index + 1]) # Now check the results, ignoring order. self.assertAllEqual(np.sort(np_expected_indices), np.sort(indices)) self.assertAllClose(np.sort(np_expected_values), np.sort(values))
def refresh_shortlist(): """Update the shortlist with the highest scores in id_to_score.""" new_scores, new_ids = nn_ops.top_k(self.id_to_score, self.shortlist_size) smallest_new_score = math_ops.reduce_min(new_scores) new_length = math_ops.reduce_sum( math_ops.to_int32(math_ops.greater(new_scores, dtypes.float32.min))) u1 = self.sl_ids.assign( math_ops.to_int64(array_ops.concat([[new_length], new_ids], 0))) u2 = self.sl_scores.assign( array_ops.concat([[smallest_new_score], new_scores], 0)) self.last_ops = [u1, u2] return control_flow_ops.group(u1, u2)
def testTopKZeros(self): """Tests that positive and negative zeros sort correctly.""" supported_types = set([dtypes.bfloat16.as_numpy_dtype, np.float32]) for dtype in supported_types.intersection(self.numeric_types): with self.session() as sess: p = array_ops.placeholder(dtype) with self.test_scope(): topk = nn_ops.top_k(p, k=4) results = sess.run( topk, {p: np.array([0., -0., 0., 3., -0., -4., 0., -0.], dtype=dtype)}) self.assertAllEqual(np.array([3., 0., 0., 0.], dtype=dtype), results[0]) self.assertEqual(list([3, 0, 2, 6]), list(results[1]))
def _descending_sort(values, axis): """Sorts values in reverse using `top_k`. Args: values: Tensor of numeric values. axis: Index of the axis which values should be sorted along. Returns: The sorted values. """ k = array_ops.shape(values)[axis] rank = array_ops.rank(values) # Fast path: sorting the last axis. if axis == -1 or axis + 1 == values.get_shape().ndims: return nn_ops.top_k(values, k)[0] # Otherwise, transpose the array. Swap axes `axis` and `rank - 1`. if axis < 0: # Make axis a Tensor with the real axis index if needed. axis += rank transposition = array_ops.concat( [ # Axes up to axis are unchanged. math_ops.range(axis), # Swap axis and rank - 1. [rank - 1], # Axes in [axis + 1, rank - 1) are unchanged. math_ops.range(axis + 1, rank - 1), # Swap axis and rank - 1. [axis] ], axis=0) top_k_input = array_ops.transpose(values, transposition) values, unused_indices = nn_ops.top_k(top_k_input, k) # transposition contains a single cycle of length 2 (swapping 2 elements), # so it is an involution (it is its own inverse). return array_ops.transpose(values, transposition)
def _update_mask(self, weights, threshold): """Updates the mask for a given weight tensor. This functions first computes the cdf of the weight tensor, and estimates the threshold value such that 'desired_sparsity' fraction of weights have magnitude less than the threshold. Args: weights: The weight tensor that needs to be masked. threshold: The current threshold value. The function will compute a new threshold and return the exponential moving average using the current value of threshold Returns: new_threshold: The new value of the threshold based on weights, and sparsity at the current global_step new_mask: A numpy array of the same size and shape as weights containing 0 or 1 to indicate which of the values in weights falls below the threshold Raises: ValueError: if sparsity is not defined """ if self._sparsity is None: raise ValueError('Sparsity variable undefined') sparsity = self._get_sparsity(weights.op.name) with ops.name_scope(weights.op.name + '_pruning_ops'): abs_weights = math_ops.abs(weights) k = math_ops.cast( math_ops.round( math_ops.cast(array_ops.size(abs_weights), dtypes.float32) * (1 - sparsity)), dtypes.int32) # Sort the entire array values, _ = nn_ops.top_k(array_ops.reshape(abs_weights, [-1]), k=array_ops.size(abs_weights)) # Grab the (k-1) th value current_threshold = array_ops.gather(values, k - 1) smoothed_threshold = math_ops.add_n([ math_ops.multiply(current_threshold, 1 - self._spec.threshold_decay), math_ops.multiply(threshold, self._spec.threshold_decay) ]) new_mask = math_ops.cast( math_ops.greater_equal(abs_weights, smoothed_threshold), dtypes.float32) return smoothed_threshold, new_mask
def refresh_shortlist(): """Update the shortlist with the highest scores in id_to_score.""" new_scores, new_ids = nn_ops.top_k(self.id_to_score, self.shortlist_size) smallest_new_score = math_ops.reduce_min(new_scores) new_length = math_ops.reduce_sum( math_ops.to_int32( math_ops.greater(new_scores, dtypes.float32.min))) u1 = self.sl_ids.assign( math_ops.to_int64(array_ops.concat([[new_length], new_ids], 0))) u2 = self.sl_scores.assign( array_ops.concat([[smallest_new_score], new_scores], 0)) self.last_ops = [u1, u2] return control_flow_ops.group(u1, u2)
def _update_mask(self, weights, threshold): """Updates the mask for a given weight tensor. This functions first computes the cdf of the weight tensor, and estimates the threshold value such that 'desired_sparsity' fraction of weights have magnitude less than the threshold. Args: weights: The weight tensor that needs to be masked. threshold: The current threshold value. The function will compute a new threshold and return the exponential moving average using the current value of threshold Returns: new_threshold: The new value of the threshold based on weights, and sparsity at the current global_step new_mask: A numpy array of the same size and shape as weights containing 0 or 1 to indicate which of the values in weights falls below the threshold Raises: ValueError: if sparsity is not defined """ if self._sparsity is None: raise ValueError('Sparsity variable undefined') sparsity = self._get_sparsity(weights.op.name) with ops.name_scope(weights.op.name + '_pruning_ops'): abs_weights = math_ops.abs(weights) k = math_ops.cast( math_ops.round( math_ops.cast(array_ops.size(abs_weights), dtypes.float32) * (1 - sparsity)), dtypes.int32) # Sort the entire array values, _ = nn_ops.top_k( array_ops.reshape(abs_weights, [-1]), k=array_ops.size(abs_weights)) # Grab the (k-1) th value current_threshold = array_ops.gather(values, k - 1) smoothed_threshold = math_ops.add_n([ math_ops.multiply(current_threshold, 1 - self._spec.threshold_decay), math_ops.multiply(threshold, self._spec.threshold_decay) ]) new_mask = math_ops.cast( math_ops.greater_equal(abs_weights, smoothed_threshold), dtypes.float32) return smoothed_threshold, new_mask
def _validateTopK(self, inputs, k, expected_values, expected_indices, sorted=True): np_values = np.array(expected_values) np_indices = np.array(expected_indices) with self.test_session(): values_op, indices_op = nn_ops.top_k(inputs, k, sorted=sorted) values = values_op.eval() indices = indices_op.eval() self.assertAllClose(np_values, values) self.assertAllEqual(np_indices, indices) self.assertShapeEqual(np_values, values_op) self.assertShapeEqual(np_indices, indices_op)
def _validateTopK(self, inputs, k, expected_values, expected_indices, sorted=True): np_values = np.array(expected_values) np_indices = np.array(expected_indices) with self.test_session(): values_op, indices_op = nn_ops.top_k(inputs, k, sorted=sorted) values = values_op.eval() indices = indices_op.eval() self.assertShapeEqual(np_values, values_op) self.assertShapeEqual(np_indices, indices_op) self.assertAllEqual(np_indices, indices) self.assertAllClose(np_values, values)
def _filter_top_k(x, k): """Filters top-k values in the last dim of x and set the rest to NEG_INF. Used for computing top-k prediction values in dense labels (which has the same shape as predictions) for recall and precision top-k metrics. Args: x: tensor with any dimensions. k: the number of values to keep. Returns: tensor with same shape and dtype as x. """ _, top_k_idx = nn_ops.top_k(x, k, sorted=False) top_k_mask = math_ops.reduce_sum( array_ops.one_hot(top_k_idx, x.shape[-1], axis=-1), axis=-2) return x * top_k_mask + NEG_INF * (1 - top_k_mask)
def _filter_top_k(x, k): """Filters top-k values in the last dim of x and set the rest to NEG_INF. Used for computing top-k prediction values in dense labels (which has the same shape as predictions) for recall and precision top-k metrics. Args: x: tensor with any dimensions. k: the number of values to keep. Returns: tensor with same shape and dtype as x. """ _, top_k_idx = nn_ops.top_k(x, k, sorted=False) top_k_mask = math_ops.reduce_sum( array_ops.one_hot(top_k_idx, array_ops.shape(x)[-1], axis=-1), axis=-2) return x * top_k_mask + NEG_INF * (1 - top_k_mask)
def testTopKZeros(self): """Tests that positive and negative zeros sort correctly.""" # Only bfloat16 is implemented. bfloat16 = dtypes.bfloat16.as_numpy_dtype if bfloat16 not in self.numeric_types: return with self.cached_session() as sess: p = array_ops.placeholder(dtypes.bfloat16) with self.test_scope(): topk = nn_ops.top_k(p, k=4) results = sess.run( topk, {p: np.array([0., -0., 0., 3., -0., -4., 0., -0.], dtype=bfloat16)}) self.assertAllEqual( np.array([3., 0., 0., 0.], dtype=bfloat16), results[0]) self.assertEqual(list([3, 0, 2, 6]), list(results[1]))
def __init__(self, permutation, validate_args=False, name=None): """Creates the `Permute` bijector. Args: permutation: An `int`-like vector-shaped `Tensor` representing the permutation to apply to the rightmost dimension of the transformed `Tensor`. validate_args: Python `bool` indicating whether arguments should be checked for correctness. name: Python `str`, name given to ops managed by this object. Raises: TypeError: if `not permutation.dtype.is_integer`. ValueError: if `permutation` does not contain exactly one of each of `{0, 1, ..., d}`. """ with ops.name_scope(name, "permute", values=[permutation]): permutation = ops.convert_to_tensor( permutation, name="permutation") if not permutation.dtype.is_integer: raise TypeError("permutation.dtype ({}) should be `int`-like.".format( permutation.dtype.name)) p = tensor_util.constant_value(permutation) if p is not None: if set(p) != set(np.arange(p.size)): raise ValueError("Permutation over `d` must contain exactly one of " "each of `{0, 1, ..., d}`.") elif validate_args: p, _ = nn_ops.top_k(-permutation, k=array_ops.shape(permutation)[-1], sorted=True) permutation = control_flow_ops.with_dependencies([ check_ops.assert_equal( -p, math_ops.range(array_ops.size(p)), message=("Permutation over `d` must contain exactly one of " "each of `{0, 1, ..., d}`.")), ], permutation) self._permutation = permutation super(Permute, self).__init__( forward_min_event_ndims=1, is_constant_jacobian=True, validate_args=validate_args, name=name or "permute")
def GetParams(self): """Testing Top-K in TF-TRT conversion.""" dtype = dtypes.float32 input_name = "input" input_dims = [100, 100] k = 5 g = ops.Graph() with g.as_default(): x = array_ops.placeholder(dtype=dtype, shape=input_dims, name=input_name) k_tensor = constant_op.constant(k, dtype=dtypes.int32, name="Const") values, indices = nn_ops.top_k(x, k_tensor, name="TopK") values = array_ops.identity(values, name="output_values") indices = array_ops.identity(indices, name="output_indices") return trt_test.TfTrtIntegrationTestParams( gdef=g.as_graph_def(), input_names=[input_name], input_dims=[[input_dims]], output_names=["output_values", "output_indices"], expected_output_dims=[[[100, k], [100, k]]])
def testTopKInfinities(self): """Tests that positive and negative infinity sort correctly.""" # Only bfloat16 is implemented. bfloat16 = dtypes.bfloat16.as_numpy_dtype if bfloat16 not in self.numeric_types: return with self.cached_session() as sess: p = array_ops.placeholder(dtypes.bfloat16) with self.test_scope(): topk = nn_ops.top_k(p, k=6) results = sess.run(topk, { p: np.array( [1, 2, float("inf"), -float("inf"), -1, -2], dtype=bfloat16) }) self.assertAllEqual( np.array( [float("inf"), 2.0, 1.0, -1.0, -2.0, -float("inf")], dtype=bfloat16), results[0]) self.assertEqual(list([2, 1, 0, 4, 5, 3]), list(results[1]))
def testTopKZeros(self): """Tests that positive and negative zeros sort correctly.""" # TODO(b/26783907): The Sort HLO is not implemented on CPU or GPU. if self.device in ["XLA_CPU", "XLA_GPU"]: return # Only bfloat16 is implemented. bfloat16 = dtypes.bfloat16.as_numpy_dtype if bfloat16 not in self.numeric_types: return with self.test_session() as sess: p = array_ops.placeholder(dtypes.bfloat16) with self.test_scope(): topk = nn_ops.top_k(p, k=4) results = sess.run( topk, {p: np.array([0., -0., 0., 3., -0., -4., 0., -0.], dtype=bfloat16)}) self.assertAllEqual( np.array([3., 0., 0., 0.], dtype=bfloat16), results[0]) self.assertEqual(list([3, 0, 1, 2]), list(results[1]))
def benchmarkTopK(self): for (m, n, p, use_gpu) in itertools.product( [128], [10, 100, 1000, 10000, 100000], [0.001, 0.01, 0.5, 0.99, 1.0], [False, True]): k = int(p * n) if k == 0: continue name = "m_%d_n_%d_k_%g_use_gpu_%s" % (m, n, k, use_gpu) device = "/%s:0" % ("gpu" if use_gpu else "cpu") with ops.Graph().as_default(): with ops.device(device): x = random_ops.random_uniform((m, n)) v = resource_variable_ops.ResourceVariable(x) op = nn_ops.top_k(v, k) with session.Session() as sess: v.initializer.run() r = self.run_op_benchmark(sess, op, min_iters=100, name=name) gb_processed_input = m * n / 1.0e9 throughput = gb_processed_input / r["wall_time"] print("Benchmark: %s \t wall_time: %0.03g s \t " "Throughput: %0.03g GB/s" % (name, r["wall_time"], throughput)) sys.stdout.flush()
def sample_symbols_new(logits, log_probs, finished, lengths, time): """ :param logits: [batch_size * beam_size, target_vocab_size] :param log_probs: [batch_size * beam_size, ] :param finished: [batch_size * beam_size, ] :param lengths: decoding length [batch_size * beam_size, ] :param time: :return: """ # [batch_size * beam_size,] prev_finished_float = math_ops.to_float(finished) # [batch_size * beam_size, ] prev_log_probs = log_probs # [batch_size * beam_size, target_vocab_size] probs = advanced_log_softmax(logits) # negative # mask the finished beam except only one entrance (target_eos_id) # [target_vocab_size, ]: [float_min, float_min, float_min, ..., 0] # this forces the beam with EOS continue to generate EOS finished_beam_bias = finished_beam_one_entry_bias( on_entry=eos_id, num_entries=vocab_size) # [batch_size * beam_size, target_vocab_size]: outer product finished_beam_bias = expand_to_beam_size( finished_beam_bias, beam_size * batch_size, axis=0) finished_beam_bias *= array_ops.expand_dims(prev_finished_float, 1) # compute new probs, with finished flags & mask probs = probs * array_ops.expand_dims(1. - prev_finished_float, 1) + finished_beam_bias # [batch_size * beam_size, target_vocab_size] # compute new log_probs log_probs = probs + array_ops.expand_dims(prev_log_probs, 1) # new decoding length: [batch_size * beam_size] lengths = lengths + 1 - math_ops.to_int32(finished) # compute beam score # length_penalty: [batch_size * beam_size,] length_penalty = math_ops.pow( ((5.0 + math_ops.to_float(lengths)) / 6.0), -alpha) scores = log_probs * array_ops.expand_dims(length_penalty, axis=1) # flatten # [batch_size, beam_size * target_vocab_size] scores = array_ops.reshape(array_ops.reshape(scores, [-1]), [batch_size, -1]) ret_log_probs = array_ops.reshape(array_ops.reshape(log_probs, [-1]), [batch_size, -1]) scores_flat = control_flow_ops.cond( ops.convert_to_tensor(time) > 0, lambda: scores, # time > 0: all lambda: array_ops.slice(scores, [0, 0], [-1, vocab_size])) # time = 0: first logits in each batch # [batch_size, beam_size] will restore top live_k sample_scores, sample_ids = nn_ops.top_k(scores_flat, k=beam_size) ret_sample_ids = array_ops.reshape(sample_ids, [-1]) # flatten: [batch_size * beam_size,] sample_ids = array_ops.reshape(sample_ids, [-1]) # because we do topk to scores with dim:[batch, beam * vocab] # we need to cover the true word ids word_ids = math_ops.mod(sample_ids, vocab_size) # beam ids should be adjusted according to batch_size # batch_pos, [batch_size, beam_size]: [[0, 0, ...], [1, 1,...], [batch_size,...] ] batch_pos = compute_batch_indices(batch_size, beam_size) # compute new beam_ids, [batch_size * beam_size, ] beam_ids = math_ops.div(sample_ids, vocab_size) \ + array_ops.reshape(batch_pos * beam_size, [-1]) # we need to recover log_probs from score # flatten sample_scores: [batch_size * beam_size,] sample_scores_flatten = array_ops.reshape(sample_scores, [-1]) # gather each length penalty length_penalty = gather_states(length_penalty, beam_ids) # recover log probabilities next_log_probs = sample_scores_flatten / length_penalty # gather states according to beam_ids next_lengths = gather_states(lengths, beam_ids) # [batch_size * beam_size * vocab_size, ] log_probs_flat = array_ops.reshape(log_probs, [-1]) log_probs_index = array_ops.reshape(batch_pos, [-1]) * beam_size * vocab_size + sample_ids next_log_probs = array_ops.gather(log_probs_flat, log_probs_index) return word_ids, beam_ids, next_log_probs, next_lengths, ret_log_probs, ret_sample_ids, length_penalty
def topk(v, k=k): return nn_ops.top_k(v, k=k, sorted=True)
def _beam_search_step(time, logits, beam_state, batch_size, beam_width, end_token, length_penalty_weight): """Performs a single step of Beam Search Decoding. Args: time: Beam search time step, should start at 0. At time 0 we assume that all beams are equal and consider only the first beam for continuations. logits: Logits at the current time step. A tensor of shape `[B, vocab_size]` beam_state: Current state of the beam search. An instance of `BeamState` batch_size: The batch size for this input. beam_width: The size of the beams. end_token: The int32 end token. length_penalty_weight: Float weight to penalize length. Disabled with 0.0. Returns: A new beam state. """ static_batch_size = tensor_util.constant_value(batch_size) # Calculate the current lengths of the predictions prediction_lengths = beam_state.lengths previously_finished = beam_state.finished # Calculate the total log probs for the new hypotheses # Final Shape: [batch_size, beam_width, vocab_size] probs = nn_ops.log_softmax(logits) probs = _mask_probs(probs, end_token, previously_finished) total_probs = array_ops.expand_dims(beam_state.log_probs, 2) + probs # Calculate the continuation lengths by adding to all continuing beams. vocab_size = logits.get_shape().as_list()[-1] lengths_to_add = array_ops.one_hot( array_ops.tile( array_ops.reshape(end_token, [1, 1]), [batch_size, beam_width]), vocab_size, 0, 1) add_mask = (1 - math_ops.to_int32(previously_finished)) lengths_to_add = array_ops.expand_dims(add_mask, 2) * lengths_to_add new_prediction_lengths = array_ops.expand_dims(prediction_lengths, 2) + lengths_to_add # Calculate the scores for each beam scores = _get_scores( log_probs=total_probs, sequence_lengths=new_prediction_lengths, length_penalty_weight=length_penalty_weight) scores_flat = array_ops.reshape(scores, [batch_size, -1]) # During the first time step we only consider the initial beam scores_flat = control_flow_ops.cond( ops.convert_to_tensor(time) > 0, lambda: scores_flat, lambda: scores[:, 0]) # Pick the next beams according to the specified successors function next_beam_scores, word_indices = nn_ops.top_k(scores_flat, k=beam_width) next_beam_scores.set_shape([static_batch_size, beam_width]) word_indices.set_shape([static_batch_size, beam_width]) # Pick out the probs, beam_ids, and states according to the chosen predictions next_beam_probs = _tensor_gather_helper( gather_indices=word_indices, gather_from=total_probs, range_input=batch_size, range_size=beam_width * vocab_size, final_shape=[static_batch_size, beam_width]) next_word_ids = math_ops.to_int32(word_indices % vocab_size) next_beam_ids = math_ops.to_int32(word_indices / vocab_size) # Append new ids to current predictions previously_finished = _tensor_gather_helper( gather_indices=next_beam_ids, gather_from=previously_finished, range_input=batch_size, range_size=beam_width, final_shape=[static_batch_size, beam_width]) next_finished = math_ops.logical_or(previously_finished, math_ops.equal(next_word_ids, end_token)) # Calculate the length of the next predictions. # 1. Finished beams remain unchanged # 2. Beams that are now finished (EOS predicted) remain unchanged # 3. Beams that are not yet finished have their length increased by 1 lengths_to_add = math_ops.to_int32( math_ops.not_equal(next_word_ids, end_token)) lengths_to_add = (1 - math_ops.to_int32(next_finished)) * lengths_to_add next_prediction_len = _tensor_gather_helper( gather_indices=next_beam_ids, gather_from=beam_state.lengths, range_input=batch_size, range_size=beam_width, final_shape=[static_batch_size, beam_width]) next_prediction_len += lengths_to_add next_state = BeamSearchDecoderState( cell_state=beam_state.cell_state, log_probs=next_beam_probs, lengths=next_prediction_len, finished=next_finished) output = BeamSearchDecoderOutput( scores=next_beam_scores, predicted_ids=next_word_ids, parent_ids=next_beam_ids) return output, next_state
def _beam_search_step(time, logits, next_cell_state, beam_state, batch_size, beam_width, end_token, length_penalty_weight): """Performs a single step of Beam Search Decoding. Args: time: Beam search time step, should start at 0. At time 0 we assume that all beams are equal and consider only the first beam for continuations. logits: Logits at the current time step. A tensor of shape `[batch_size, beam_width, vocab_size]` next_cell_state: The next state from the cell, e.g. an instance of AttentionWrapperState if the cell is attentional. beam_state: Current state of the beam search. An instance of `BeamSearchDecoderState`. batch_size: The batch size for this input. beam_width: Python int. The size of the beams. end_token: The int32 end token. length_penalty_weight: Float weight to penalize length. Disabled with 0.0. Returns: A new beam state. """ static_batch_size = tensor_util.constant_value(batch_size) # Calculate the current lengths of the predictions prediction_lengths = beam_state.lengths previously_finished = beam_state.finished # Calculate the total log probs for the new hypotheses # Final Shape: [batch_size, beam_width, vocab_size] step_log_probs = nn_ops.log_softmax(logits) step_log_probs = _mask_probs( step_log_probs, end_token, previously_finished) total_probs = tf.expand_dims( beam_state.log_probs, axis=2) + step_log_probs # Calculate the continuation lengths by adding to all continuing beams. vocab_size = logits.shape[-1].value lengths_to_add = tf.one_hot( indices=tf.tile( tf.reshape(end_token, [1, 1]), [batch_size, beam_width]), depth=vocab_size, on_value=0, off_value=1) add_mask = (1 - tf.to_int32(previously_finished)) lengths_to_add = tf.expand_dims(add_mask, 2) * lengths_to_add new_prediction_lengths = ( lengths_to_add + tf.expand_dims(prediction_lengths, 2)) # Calculate the scores for each beam scores = _get_scores( log_probs=total_probs, sequence_lengths=new_prediction_lengths, length_penalty_weight=length_penalty_weight) time = ops.convert_to_tensor(time, name="time") # During the first time step we only consider the initial beam scores_shape = tf.shape(scores) scores_flat = tf.cond( time > 0, lambda: tf.reshape(scores, [batch_size, -1]), lambda: scores[:, 0]) num_available_beam = tf.cond( time > 0, lambda: tf.reduce_prod(scores_shape[1:]), lambda: tf.reduce_prod(scores_shape[2:])) # Pick the next beams according to the specified successors function next_beam_size = tf.minimum( ops.convert_to_tensor( beam_width, dtype=dtypes.int32, name="beam_width"), num_available_beam) next_beam_scores, word_indices = nn_ops.top_k( scores_flat, k=next_beam_size) next_beam_scores.set_shape([static_batch_size, beam_width]) word_indices.set_shape([static_batch_size, beam_width]) # Pick out the probs, beam_ids, and states according to the chosen # predictions next_beam_probs = _tensor_gather_helper( gather_indices=word_indices, gather_from=total_probs, batch_size=batch_size, range_size=beam_width * vocab_size, gather_shape=[-1]) next_word_ids = tf.to_int32(word_indices % vocab_size) next_beam_ids = tf.to_int32(word_indices / vocab_size) # Append new ids to current predictions previously_finished = _tensor_gather_helper( gather_indices=next_beam_ids, gather_from=previously_finished, batch_size=batch_size, range_size=beam_width, gather_shape=[-1]) next_finished = tf.logical_or(previously_finished, tf.equal(next_word_ids, end_token)) # Calculate the length of the next predictions. # 1. Finished beams remain unchanged # 2. Beams that are now finished (EOS predicted) remain unchanged # 3. Beams that are not yet finished have their length increased by 1 lengths_to_add = tf.to_int32( tf.not_equal(next_word_ids, end_token)) lengths_to_add = (1 - tf.to_int32(next_finished)) * lengths_to_add next_prediction_len = _tensor_gather_helper( gather_indices=next_beam_ids, gather_from=beam_state.lengths, batch_size=batch_size, range_size=beam_width, gather_shape=[-1]) next_prediction_len += lengths_to_add # Pick out the cell_states according to the next_beam_ids. We use a # different gather_shape here because the cell_state tensors, i.e. # the tensors that would be gathered from, all have dimension # greater than two and we need to preserve those dimensions. next_cell_state = nest.map_structure( lambda gather_from: _maybe_tensor_gather_helper( gather_indices=next_beam_ids, gather_from=gather_from, batch_size=batch_size, range_size=beam_width, gather_shape=[batch_size * beam_width, -1]), next_cell_state) next_state = BeamSearchDecoderState( cell_state=next_cell_state, log_probs=next_beam_probs, lengths=next_prediction_len, finished=next_finished) output = BeamSearchDecoderOutput( scores=next_beam_scores, predicted_ids=next_word_ids, parent_ids=next_beam_ids) return output, next_state
def testKTooLarge(self): inputs = [[0.1, 0.2], [0.3, 0.4]] with self.assertRaisesRegexp(ValueError, r"must have last dimension >= k = 4"): nn_ops.top_k(inputs, 4)
def tftop_k(_): x = array_ops.placeholder(dtypes.int32, shape=[5], name='x') output = nn_ops.top_k(x, 2, name='values') array_ops.identity(output[1], name='indices')
def _sort_tensor(tensor): """Use `top_k` to sort a `Tensor` along the last dimension.""" sorted_, _ = nn_ops.top_k(tensor, k=array_ops.shape(tensor)[-1]) return sorted_
def _beam_search_step(time, logits, next_cell_state, beam_state, batch_size, beam_width, end_token, length_penalty_weight): """Performs a single step of Beam Search Decoding. Args: time: Beam search time step, should start at 0. At time 0 we assume that all beams are equal and consider only the first beam for continuations. logits: Logits at the current time step. A tensor of shape `[batch_size, beam_width, vocab_size]` next_cell_state: The next state from the cell, e.g. an instance of AttentionWrapperState if the cell is attentional. beam_state: Current state of the beam search. An instance of `BeamSearchDecoderState`. batch_size: The batch size for this input. beam_width: Python int. The size of the beams. end_token: The int32 end token. length_penalty_weight: Float weight to penalize length. Disabled with 0.0. Returns: A new beam state. """ static_batch_size = tensor_util.constant_value(batch_size) # Calculate the current lengths of the predictions prediction_lengths = beam_state.lengths previously_finished = beam_state.finished # Calculate the total log probs for the new hypotheses # Final Shape: [batch_size, beam_width, vocab_size] step_log_probs = nn_ops.log_softmax(logits) step_log_probs = _mask_probs(step_log_probs, end_token, previously_finished) total_probs = array_ops.expand_dims(beam_state.log_probs, 2) + step_log_probs # Calculate the continuation lengths by adding to all continuing beams. vocab_size = logits.shape[-1].value or array_ops.shape(logits)[-1] lengths_to_add = array_ops.one_hot( indices=array_ops.fill([batch_size, beam_width], end_token), depth=vocab_size, on_value=np.int64(0), off_value=np.int64(1), dtype=dtypes.int64) add_mask = math_ops.to_int64(math_ops.logical_not(previously_finished)) lengths_to_add *= array_ops.expand_dims(add_mask, 2) new_prediction_lengths = ( lengths_to_add + array_ops.expand_dims(prediction_lengths, 2)) # Calculate the scores for each beam scores = _get_scores( log_probs=total_probs, sequence_lengths=new_prediction_lengths, length_penalty_weight=length_penalty_weight) time = ops.convert_to_tensor(time, name="time") # During the first time step we only consider the initial beam scores_flat = array_ops.reshape(scores, [batch_size, -1]) # Pick the next beams according to the specified successors function next_beam_size = ops.convert_to_tensor( beam_width, dtype=dtypes.int32, name="beam_width") next_beam_scores, word_indices = nn_ops.top_k(scores_flat, k=next_beam_size) next_beam_scores.set_shape([static_batch_size, beam_width]) word_indices.set_shape([static_batch_size, beam_width]) # Pick out the probs, beam_ids, and states according to the chosen predictions next_beam_probs = _tensor_gather_helper( gather_indices=word_indices, gather_from=total_probs, batch_size=batch_size, range_size=beam_width * vocab_size, gather_shape=[-1], name="next_beam_probs") # Note: just doing the following # math_ops.to_int32(word_indices % vocab_size, # name="next_beam_word_ids") # would be a lot cleaner but for reasons unclear, that hides the results of # the op which prevents capturing it with tfdbg debug ops. raw_next_word_ids = math_ops.mod( word_indices, vocab_size, name="next_beam_word_ids") next_word_ids = math_ops.to_int32(raw_next_word_ids) next_beam_ids = math_ops.to_int32( word_indices / vocab_size, name="next_beam_parent_ids") # Append new ids to current predictions previously_finished = _tensor_gather_helper( gather_indices=next_beam_ids, gather_from=previously_finished, batch_size=batch_size, range_size=beam_width, gather_shape=[-1]) next_finished = math_ops.logical_or( previously_finished, math_ops.equal(next_word_ids, end_token), name="next_beam_finished") # Calculate the length of the next predictions. # 1. Finished beams remain unchanged. # 2. Beams that are now finished (EOS predicted) have their length # increased by 1. # 3. Beams that are not yet finished have their length increased by 1. lengths_to_add = math_ops.to_int64(math_ops.logical_not(previously_finished)) next_prediction_len = _tensor_gather_helper( gather_indices=next_beam_ids, gather_from=beam_state.lengths, batch_size=batch_size, range_size=beam_width, gather_shape=[-1]) next_prediction_len += lengths_to_add # Pick out the cell_states according to the next_beam_ids. We use a # different gather_shape here because the cell_state tensors, i.e. # the tensors that would be gathered from, all have dimension # greater than two and we need to preserve those dimensions. # pylint: disable=g-long-lambda next_cell_state = nest.map_structure( lambda gather_from: _maybe_tensor_gather_helper( gather_indices=next_beam_ids, gather_from=gather_from, batch_size=batch_size, range_size=beam_width, gather_shape=[batch_size * beam_width, -1]), next_cell_state) # pylint: enable=g-long-lambda next_state = BeamSearchDecoderState( cell_state=next_cell_state, log_probs=next_beam_probs, lengths=next_prediction_len, finished=next_finished) output = BeamSearchDecoderOutput( scores=next_beam_scores, predicted_ids=next_word_ids, parent_ids=next_beam_ids) return output, next_state