def _testSerializeDeserializeNestedBatchHelper(self, serialize_fn, deserialize_fn, out_type=dtypes.string): with self.cached_session(use_gpu=False) as sess: sp_input = self._SparseTensorValue_5x6(np.arange(6)) serialized = serialize_fn(sp_input, out_type=out_type) serialized = array_ops.stack([serialized, serialized]) serialized = array_ops.stack([serialized, serialized]) sp_deserialized = deserialize_fn(serialized, dtype=dtypes.int32) combined_indices, combined_values, combined_shape = sess.run( sp_deserialized) # minibatch 0 self.assertAllEqual(combined_indices[:6, :2], [[0, 0]] * 6) self.assertAllEqual(combined_indices[:6, 2:], sp_input[0]) self.assertAllEqual(combined_values[:6], sp_input[1]) # minibatch 1 self.assertAllEqual(combined_indices[6:12, :2], [[0, 1]] * 6) self.assertAllEqual(combined_indices[6:12, 2:], sp_input[0]) self.assertAllEqual(combined_values[6:12], sp_input[1]) # minibatch 2 self.assertAllEqual(combined_indices[12:18, :2], [[1, 0]] * 6) self.assertAllEqual(combined_indices[12:18, 2:], sp_input[0]) self.assertAllEqual(combined_values[12:18], sp_input[1]) # minibatch 3 self.assertAllEqual(combined_indices[18:, :2], [[1, 1]] * 6) self.assertAllEqual(combined_indices[18:, 2:], sp_input[0]) self.assertAllEqual(combined_values[18:], sp_input[1]) self.assertAllEqual(combined_shape, [2, 2, 5, 6])
def __call__(self, inputs, initial_state=None, dtype=None, sequence_length=None, scope=None): is_list = isinstance(inputs, list) if self._use_dynamic_rnn: if is_list: inputs = array_ops.stack(inputs) outputs, state = rnn.dynamic_rnn( self._cell, inputs, sequence_length=sequence_length, initial_state=initial_state, dtype=dtype, time_major=True, scope=scope) if is_list: # Convert outputs back to list outputs = array_ops.unstack(outputs) else: # non-dynamic rnn if not is_list: inputs = array_ops.unstack(inputs) outputs, state = contrib_rnn.static_rnn(self._cell, inputs, initial_state=initial_state, dtype=dtype, sequence_length=sequence_length, scope=scope) if not is_list: # Convert outputs back to tensor outputs = array_ops.stack(outputs) return outputs, state
def _prepare_inputs_for_rnn(sequence_features, context_features, num_unroll): """Prepares features batched by the SQSS for input to a state-saving RNN. Args: sequence_features: A dict of sequence feature name to `Tensor`, with tensors of shape `[batch_size, num_unroll, ...]` and type float32. context_features: A dict of context feature name to `Tensor`, with tensors of shape `[batch_size, 1, ...]` and type float32. num_unroll: Python integer, how many time steps to unroll at a time. The input sequences of length `k` are then split into `k / num_unroll` many segments. Returns: features_by_time: A list of length `num_unroll` with `Tensor` entries of shape `[batch_size, len(sequence_features) + len(context_features)]` of type float32. Features are stored in lexicographic order by their corresponding feature dict keys, first in the `sequence_features` and then in the `context_features` dicts. Context features are copied into each time step. """ def _tile(feature): return array_ops.squeeze( array_ops.tile(array_ops.expand_dims(feature, 1), [1, num_unroll, 1]), axis=2) sequence_features = [sequence_features[k] for k in sorted(sequence_features)] if not context_features: return array_ops.unstack(array_ops.stack(sequence_features, 2), axis=1) context_features = [ _tile(context_features[k]) for k in sorted(context_features) ] return array_ops.unstack( array_ops.stack(sequence_features + context_features, 2), axis=1)
def zero_state(self, batch_size, dtype): """Return zero-filled state tensor(s). Args: batch_size: int, float, or unit Tensor representing the batch size. dtype: the data type to use for the state. Returns: If `state_size` is an int or TensorShape, then the return value is a `N-D` tensor of shape `[batch_size x state_size]` filled with zeros. If `state_size` is a nested list or tuple, then the return value is a nested list or tuple (of the same structure) of `2-D` tensors with the shapes `[batch_size x s]` for each s in `state_size`. """ state_size = self.state_size if nest.is_sequence(state_size): state_size_flat = nest.flatten(state_size) zeros_flat = [ array_ops.zeros( array_ops.stack(_state_size_with_prefix( s, prefix=[batch_size])), dtype=dtype) for s in state_size_flat ] for s, z in zip(state_size_flat, zeros_flat): z.set_shape(_state_size_with_prefix(s, prefix=[None])) zeros = nest.pack_sequence_as(structure=state_size, flat_sequence=zeros_flat) else: zeros_size = _state_size_with_prefix(state_size, prefix=[batch_size]) zeros = array_ops.zeros(array_ops.stack(zeros_size), dtype=dtype) zeros.set_shape(_state_size_with_prefix(state_size, prefix=[None])) return zeros
def testBatch(self): # Build an arbitrary RGB image np.random.seed(7) batch_size = 5 shape = (batch_size, 2, 7, 3) for nptype in self.float_types: inp = GenerateNumpyRandomRGB(shape).astype(nptype) # Convert to HSV and back, as a batch and individually with self.test_session() as sess: batch0 = array_ops.placeholder(nptype, shape=shape) with self.test_scope(): batch1 = image_ops.rgb_to_hsv(batch0) batch2 = image_ops.hsv_to_rgb(batch1) split0 = array_ops.unstack(batch0) with self.test_scope(): split1 = list(map(image_ops.rgb_to_hsv, split0)) split2 = list(map(image_ops.hsv_to_rgb, split1)) join1 = array_ops.stack(split1) join2 = array_ops.stack(split2) batch1, batch2, join1, join2 = sess.run([batch1, batch2, join1, join2], {batch0: inp}) # Verify that processing batch elements together is the same as separate self.assertAllClose(batch1, join1) self.assertAllClose(batch2, join2) self.assertAllCloseAccordingToType( batch2, inp, bfloat16_atol=0.03, half_rtol=0.02)
def testConst(self): np.random.seed(7) with self.test_session(use_gpu=True): for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2): data = np.random.randn(*shape).astype(np.float32) # Pack back into a single tensorflow tensor directly using np array c = array_ops.stack(data) # This is implemented via a Const: self.assertEqual(c.op.type, "Const") self.assertAllEqual(c.eval(), data) c = array_ops.parallel_stack(data) self.assertAllEqual(c.eval(), data) # Python lists also work for 1-D case: if len(shape) == 1: data_list = list(data) cl = array_ops.stack(data_list) self.assertEqual(cl.op.type, "Const") self.assertAllEqual(cl.eval(), data) cl = array_ops.parallel_stack(data_list) self.assertAllEqual(cl.eval(), data) # Verify that shape induction works with shapes produced via const stack a = constant_op.constant([1, 2, 3, 4, 5, 6]) b = array_ops.reshape(a, array_ops.stack([2, 3])) self.assertAllEqual(b.get_shape(), [2, 3])
def testConcat(self): c = constant_op.constant([1.0, 2.0], dtype=dtypes.float32) l0 = list_ops.tensor_list_from_tensor(c, element_shape=scalar_shape()) l1 = list_ops.tensor_list_from_tensor([-1.0], element_shape=scalar_shape()) l_batch_0 = array_ops.stack([l0, l1]) l_batch_1 = array_ops.stack([l1, l0]) l_concat_01 = list_ops.tensor_list_concat_lists( l_batch_0, l_batch_1, element_dtype=dtypes.float32) l_concat_10 = list_ops.tensor_list_concat_lists( l_batch_1, l_batch_0, element_dtype=dtypes.float32) l_concat_00 = list_ops.tensor_list_concat_lists( l_batch_0, l_batch_0, element_dtype=dtypes.float32) l_concat_11 = list_ops.tensor_list_concat_lists( l_batch_1, l_batch_1, element_dtype=dtypes.float32) expected_00 = [[1.0, 2.0, 1.0, 2.0], [-1.0, -1.0]] expected_01 = [[1.0, 2.0, -1.0], [-1.0, 1.0, 2.0]] expected_10 = [[-1.0, 1.0, 2.0], [1.0, 2.0, -1.0]] expected_11 = [[-1.0, -1.0], [1.0, 2.0, 1.0, 2.0]] for i, (concat, expected) in enumerate(zip( [l_concat_00, l_concat_01, l_concat_10, l_concat_11], [expected_00, expected_01, expected_10, expected_11])): splitted = array_ops.unstack(concat) splitted_stacked_ret = self.evaluate( (list_ops.tensor_list_stack(splitted[0], dtypes.float32), list_ops.tensor_list_stack(splitted[1], dtypes.float32))) print("Test concat %d: %s, %s, %s, %s" % (i, expected[0], splitted_stacked_ret[0], expected[1], splitted_stacked_ret[1])) self.assertAllClose(expected[0], splitted_stacked_ret[0]) self.assertAllClose(expected[1], splitted_stacked_ret[1]) # Concatenating mismatched shapes fails. with self.assertRaises((errors.InvalidArgumentError, ValueError)): self.evaluate( list_ops.tensor_list_concat_lists( l_batch_0, list_ops.empty_tensor_list(scalar_shape(), dtypes.float32), element_dtype=dtypes.float32)) with self.assertRaisesRegexp(errors.InvalidArgumentError, "element shapes are not identical at index 0"): l_batch_of_vec_tls = array_ops.stack( [list_ops.tensor_list_from_tensor([[1.0]], element_shape=[1])] * 2) self.evaluate( list_ops.tensor_list_concat_lists(l_batch_0, l_batch_of_vec_tls, element_dtype=dtypes.float32)) with self.assertRaisesRegexp(errors.InvalidArgumentError, r"input_b\[0\].dtype != element_dtype."): l_batch_of_int_tls = array_ops.stack( [list_ops.tensor_list_from_tensor([1], element_shape=scalar_shape())] * 2) self.evaluate( list_ops.tensor_list_concat_lists(l_batch_0, l_batch_of_int_tls, element_dtype=dtypes.float32))
def testWithExtensionAndAttr(self): with ops.Graph().as_default() as g: c = constant_op.constant(5.0, dtype=dtypes.float32, name="c") array_ops.stack([c, c], name="pack") gdef = g.as_graph_def() with self.test_session(): pack, = importer.import_graph_def(gdef, return_elements=["pack"]) self.assertAllEqual(pack.outputs[0].eval(), [5.0, 5.0])
def crop_to_bounding_box(image, offset_height, offset_width, target_height, target_width): """Crops an image to a specified bounding box. This op cuts a rectangular part out of `image`. The top-left corner of the returned image is at `offset_height, offset_width` in `image`, and its lower-right corner is at `offset_height + target_height, offset_width + target_width`. Args: image: 3-D tensor with shape `[height, width, channels]` offset_height: Vertical coordinate of the top-left corner of the result in the input. offset_width: Horizontal coordinate of the top-left corner of the result in the input. target_height: Height of the result. target_width: Width of the result. Returns: 3-D tensor of image with shape `[target_height, target_width, channels]` Raises: ValueError: If the shape of `image` is incompatible with the `offset_*` or `target_*` arguments, or either `offset_height` or `offset_width` is negative, or either `target_height` or `target_width` is not positive. """ image = ops.convert_to_tensor(image, name='image') assert_ops = [] assert_ops += _Check3DImage(image, require_static=False) height, width, depth = _ImageDimensions(image) assert_ops += _assert(offset_width >= 0, ValueError, 'offset_width must be >= 0.') assert_ops += _assert(offset_height >= 0, ValueError, 'offset_height must be >= 0.') assert_ops += _assert(target_width > 0, ValueError, 'target_width must be > 0.') assert_ops += _assert(target_height > 0, ValueError, 'target_height must be > 0.') assert_ops += _assert(width >= (target_width + offset_width), ValueError, 'width must be >= target + offset.') assert_ops += _assert(height >= (target_height + offset_height), ValueError, 'height must be >= target + offset.') image = control_flow_ops.with_dependencies(assert_ops, image) cropped = array_ops.slice(image, array_ops.stack([offset_height, offset_width, 0]), array_ops.stack([target_height, target_width, -1])) cropped_shape = [None if _is_tensor(i) else i for i in [target_height, target_width, depth]] cropped.set_shape(cropped_shape) return cropped
def testOpsBetweenUnreachable(self): with ops.Graph().as_default() as g: t1 = constant(1.0) t2 = constant(2.0) _ = array_ops.stack([t1, t2]) t4 = constant(1.0) t5 = constant(2.0) t6 = array_ops.stack([t4, t5]) # Elements of to_ops are always listed. self._assertOpListEqual([t6.op], _OpsBetween(g, [t6.op], [t1.op]))
def testPack_Axis1(self): inputs = [np.random.rand(4, 7) for _ in range(3)] tf_val = array_ops.stack(inputs, axis=1) c_val = tensor_util.constant_value(tf_val) self.assertIsNone(c_val) tf_val = array_ops.stack( [inputs[0], array_ops.placeholder(dtypes.float32), inputs[2]], axis=1) c_val = tensor_util.constant_value(tf_val) self.assertIs(None, c_val)
def testSequenceLoss(self): with self.session(use_gpu=True) as sess: with variable_scope.variable_scope( 'root', initializer=init_ops.constant_initializer(0.5)): batch_size = 2 sequence_length = 3 number_of_classes = 5 logits = [ constant_op.constant( i + 0.5, shape=[batch_size, number_of_classes]) for i in range(sequence_length) ] logits = array_ops.stack(logits, axis=1) targets = [ constant_op.constant( i, dtypes.int32, shape=[batch_size]) for i in range(sequence_length) ] targets = array_ops.stack(targets, axis=1) weights = [ constant_op.constant( 1.0, shape=[batch_size]) for i in range(sequence_length) ] weights = array_ops.stack(weights, axis=1) average_loss_per_example = loss.sequence_loss( logits, targets, weights, average_across_timesteps=True, average_across_batch=True) res = sess.run(average_loss_per_example) self.assertAllClose(1.60944, res) average_loss_per_sequence = loss.sequence_loss( logits, targets, weights, average_across_timesteps=False, average_across_batch=True) res = sess.run(average_loss_per_sequence) compare_per_sequence = np.ones((sequence_length)) * 1.60944 self.assertAllClose(compare_per_sequence, res) average_loss_per_batch = loss.sequence_loss( logits, targets, weights, average_across_timesteps=True, average_across_batch=False) res = sess.run(average_loss_per_batch) compare_per_batch = np.ones((batch_size)) * 1.60944 self.assertAllClose(compare_per_batch, res) total_loss = loss.sequence_loss( logits, targets, weights, average_across_timesteps=False, average_across_batch=False) res = sess.run(total_loss) compare_total = np.ones((batch_size, sequence_length)) * 1.60944 self.assertAllClose(compare_total, res)
def inference_graph(self, input_data, **inference_args): """Constructs a TF graph for evaluating a random forest. Args: input_data: A tensor or dict of string->Tensor for the input data. This input_data must generate the same spec as the input_data used in training_graph: the dict must have the same keys, for example, and all tensors must have the same size in their first dimension. **inference_args: Keyword arguments to pass through to each tree. Returns: A tuple of (probabilities, tree_paths, variance), where variance is the variance over all the trees for regression problems only. Raises: NotImplementedError: If trying to use feature bagging with sparse features. """ processed_dense_features, processed_sparse_features, data_spec = ( data_ops.ParseDataTensorOrDict(input_data)) probabilities = [] paths = [] for i in range(self.params.num_trees): with ops.device(self.variables.device_dummies[i].device): tree_data = processed_dense_features if self.params.bagged_features: if processed_sparse_features is not None: raise NotImplementedError( 'Feature bagging not supported with sparse features.') tree_data = self._bag_features(i, tree_data) probs, path = self.trees[i].inference_graph( tree_data, data_spec, sparse_features=processed_sparse_features, **inference_args) probabilities.append(probs) paths.append(path) with ops.device(self.variables.device_dummies[0].device): # shape of all_predict should be [batch_size, num_trees, num_outputs] all_predict = array_ops.stack(probabilities, axis=1) average_values = math_ops.div( math_ops.reduce_sum(all_predict, 1), self.params.num_trees, name='probabilities') tree_paths = array_ops.stack(paths, axis=1) regression_variance = None if self.params.regression: expected_squares = math_ops.div( math_ops.reduce_sum(all_predict * all_predict, 1), self.params.num_trees) regression_variance = math_ops.maximum( 0., expected_squares - average_values * average_values) return average_values, tree_paths, regression_variance
def _ctc_state_trans(label_seq): """Compute CTC alignment model transition matrix. Args: label_seq: tensor of shape [batch_size, max_seq_length] Returns: tensor of shape [batch_size, states, states] with a state transition matrix computed for each sequence of the batch. """ with ops.name_scope("ctc_state_trans"): label_seq = ops.convert_to_tensor(label_seq, name="label_seq") batch_size = _get_dim(label_seq, 0) num_labels = _get_dim(label_seq, 1) num_label_states = num_labels + 1 num_states = 2 * num_label_states label_states = math_ops.range(num_label_states) blank_states = label_states + num_label_states # Start state to first label. start_to_label = [[1, 0]] # Blank to label transitions. blank_to_label = array_ops.stack([label_states[1:], blank_states[:-1]], 1) # Label to blank transitions. label_to_blank = array_ops.stack([blank_states, label_states], 1) # Scatter transitions that don't depend on sequence. indices = array_ops.concat( [start_to_label, blank_to_label, label_to_blank], 0) values = array_ops.ones([_get_dim(indices, 0)]) trans = array_ops.scatter_nd( indices, values, shape=[num_states, num_states]) trans += linalg_ops.eye(num_states) # Self-loops. # Label to label transitions. Disallow transitions between repeated labels # with no blank state in between. batch_idx = array_ops.zeros_like(label_states[2:]) indices = array_ops.stack( [batch_idx, label_states[2:], label_states[1:-1]], 1) indices = array_ops.tile( array_ops.expand_dims(indices, 0), [batch_size, 1, 1]) batch_idx = array_ops.expand_dims(math_ops.range(batch_size), 1) * [1, 0, 0] indices += array_ops.expand_dims(batch_idx, 1) repeats = math_ops.equal(label_seq[:, :-1], label_seq[:, 1:]) values = 1.0 - math_ops.cast(repeats, dtypes.float32) batched_shape = [batch_size, num_states, num_states] label_to_label = array_ops.scatter_nd(indices, values, batched_shape) return array_ops.expand_dims(trans, 0) + label_to_label
def testPack_Axis0(self): inputs = [np.random.rand(4, 7) for _ in range(3)] np_val = np.array(inputs) tf_val = array_ops.stack(inputs) c_val = tensor_util.constant_value(tf_val) self.assertAllClose(np_val, c_val) tf_val = array_ops.stack( [inputs[0], array_ops.placeholder(dtypes.float32), inputs[2]]) c_val = tensor_util.constant_value(tf_val) self.assertIs(None, c_val)
def _testAllFormats(self, superdiag, maindiag, subdiag, rhs, expected, dtype=dtypes.float64): superdiag_extended = np.pad(superdiag, [0, 1], 'constant') subdiag_extended = np.pad(subdiag, [1, 0], 'constant') diags_compact = np.stack([superdiag_extended, maindiag, subdiag_extended]) diags_matrix = np.diag(superdiag, 1) + np.diag(maindiag, 0) + np.diag( subdiag, -1) diags_sequence = (constant_op.constant(superdiag_extended, dtype), constant_op.constant(maindiag, dtype), constant_op.constant(subdiag_extended, dtype)) diags_compact = constant_op.constant(diags_compact, dtype) diags_matrix = constant_op.constant(diags_matrix, dtype) rhs = constant_op.constant(rhs, dtype) rhs_batch = array_ops.stack([rhs, 2 * rhs]) diags_compact_batch = array_ops.stack([diags_compact, 2 * diags_compact]) diags_matrix_batch = array_ops.stack([diags_matrix, 2 * diags_matrix]) diags_sequence_batch = [array_ops.stack([x, 2 * x]) for x in diags_sequence] results = [ linalg_impl.tridiagonal_matmul( diags_sequence, rhs, diagonals_format='sequence'), linalg_impl.tridiagonal_matmul( diags_compact, rhs, diagonals_format='compact'), linalg_impl.tridiagonal_matmul( diags_matrix, rhs, diagonals_format='matrix') ] results_batch = [ linalg_impl.tridiagonal_matmul( diags_sequence_batch, rhs_batch, diagonals_format='sequence'), linalg_impl.tridiagonal_matmul( diags_compact_batch, rhs_batch, diagonals_format='compact'), linalg_impl.tridiagonal_matmul( diags_matrix_batch, rhs_batch, diagonals_format='matrix') ] with self.cached_session(use_gpu=True): results = self.evaluate(results) results_batch = self.evaluate(results_batch) expected = np.array(expected) expected_batch = np.stack([expected, 4 * expected]) for result in results: self.assertAllClose(result, expected) for result in results_batch: self.assertAllClose(result, expected_batch)
def _log_prob(self, x): # By convention, we always put the grid points right-most. y = array_ops.stack( [aff.inverse(x) for aff in self.interpolated_affine], axis=-1) log_prob = math_ops.reduce_sum(self.distribution.log_prob(y), axis=-2) # Because the affine transformation has a constant Jacobian, it is the case # that `affine.fldj(x) = -affine.ildj(x)`. This is not true in general. fldj = array_ops.stack( [aff.forward_log_det_jacobian(x) for aff in self.interpolated_affine], axis=-1) return math_ops.reduce_logsumexp( self.mixture_distribution.logits - fldj + log_prob, axis=-1)
def _dict_to_tensor(self, x, k1, k2): """Convert a dictionary to a tensor. Args: x: a k1 * k2 dictionary. k1: first dimension of x. k2: second dimension of x. Returns: a k1 * k2 tensor. """ return array_ops.stack([array_ops.stack([x[i, j] for j in range(k2)]) for i in range(k1)])
def _compute_energy_change(current_target_log_prob, current_momentums, proposed_target_log_prob, proposed_momentums, independent_chain_ndims, name=None): """Helper to `kernel` which computes the energy change.""" with ops.name_scope( name, "compute_energy_change", ([current_target_log_prob, proposed_target_log_prob, independent_chain_ndims] + current_momentums + proposed_momentums)): # Abbreviate lk0=log_kinetic_energy and lk1=proposed_log_kinetic_energy # since they're a mouthful and lets us inline more. lk0, lk1 = [], [] for current_momentum, proposed_momentum in zip(current_momentums, proposed_momentums): axis = math_ops.range(independent_chain_ndims, array_ops.rank(current_momentum)) lk0.append(_log_sum_sq(current_momentum, axis)) lk1.append(_log_sum_sq(proposed_momentum, axis)) lk0 = -np.log(2.) + math_ops.reduce_logsumexp(array_ops.stack(lk0, axis=-1), axis=-1) lk1 = -np.log(2.) + math_ops.reduce_logsumexp(array_ops.stack(lk1, axis=-1), axis=-1) lp0 = -current_target_log_prob # log_potential lp1 = -proposed_target_log_prob # proposed_log_potential x = array_ops.stack([lp1, math_ops.exp(lk1), -lp0, -math_ops.exp(lk0)], axis=-1) # The sum is NaN if any element is NaN or we see both +Inf and -Inf. # Thus we will replace such rows with infinite energy change which implies # rejection. Recall that float-comparisons with NaN are always False. is_sum_determinate = ( math_ops.reduce_all(math_ops.is_finite(x) | (x >= 0.), axis=-1) & math_ops.reduce_all(math_ops.is_finite(x) | (x <= 0.), axis=-1)) is_sum_determinate = array_ops.tile( is_sum_determinate[..., array_ops.newaxis], multiples=array_ops.concat([ array_ops.ones(array_ops.rank(is_sum_determinate), dtype=dtypes.int32), [4], ], axis=0)) x = array_ops.where(is_sum_determinate, x, array_ops.fill(array_ops.shape(x), value=x.dtype.as_numpy_dtype(np.inf))) return math_ops.reduce_sum(x, axis=-1)
def _dict_to_tensor(self, x, k1, k2, k3): """Convert a dictionary to a tensor. Args: x: A k1 * k2 dictionary. k1: First dimension of x. k2: Second dimension of x. k3: Third dimension of x. Returns: A k1 * k2 * k3 tensor. """ return array_ops.stack([array_ops.stack( [array_ops.stack([x[i, j, k] for k in range(k3)]) for j in range(k2)]) for i in range(k1)])
def testIndexedSlicesToTensorList(self): with self.test_session(): numpy_list = [] dense_list = [] sparse_list = [] for _ in range(3): np_val = np.random.rand(4, 4, 4, 4).astype(np.float32) c = constant_op.constant(np_val) c_sparse = math_ops._as_indexed_slices(c) numpy_list.append(np_val) dense_list.append(c) sparse_list.append(c_sparse) packed_dense = array_ops.stack(dense_list) packed_sparse = array_ops.stack(sparse_list) self.assertAllClose(packed_dense.eval(), packed_sparse.eval())
def crop_to_bounding_box(image, offset_height, offset_width, target_height, target_width, dynamic_shape=False): """Crops an image to a specified bounding box. This op cuts a rectangular part out of `image`. The top-left corner of the returned image is at `offset_height, offset_width` in `image`, and its lower-right corner is at `offset_height + target_height, offset_width + target_width`. Args: image: 3-D tensor with shape `[height, width, channels]` offset_height: Vertical coordinate of the top-left corner of the result in the input. offset_width: Horizontal coordinate of the top-left corner of the result in the input. target_height: Height of the result. target_width: Width of the result. dynamic_shape: Whether the input image has undertermined shape. If set to `True`, shape information will be retrieved at run time. Default to `False`. Returns: 3-D tensor of image with shape `[target_height, target_width, channels]` Raises: ValueError: If the shape of `image` is incompatible with the `offset_*` or `target_*` arguments, and `dynamic_shape` is set to `False`. """ image = tf.convert_to_tensor(image, name='image') _Check3DImage(image, require_static=(not dynamic_shape)) height, width, _ = _ImageDimensions(image, dynamic_shape=dynamic_shape) if not dynamic_shape: if offset_width < 0: raise ValueError('offset_width must be >= 0.') if offset_height < 0: raise ValueError('offset_height must be >= 0.') if width < (target_width + offset_width): raise ValueError('width must be >= target + offset.') if height < (target_height + offset_height): raise ValueError('height must be >= target + offset.') cropped = array_ops.slice(image, array_ops.stack([offset_height, offset_width, 0]), array_ops.stack([target_height, target_width, -1])) return cropped
def grow_tree(self, stats_summaries_list, feature_ids_list, last_layer_nodes_range): # For not in memory situation, we need to accumulate enough of batches first # before proceeding with building a tree layer. max_splits = _get_max_splits(self._tree_hparams) # Prepare accumulators. accumulators = [] dependencies = [] for i, feature_ids in enumerate(feature_ids_list): stats_summaries = stats_summaries_list[i] accumulator = data_flow_ops.ConditionalAccumulator( dtype=dtypes.float32, # The stats consist of grads and hessians (the last dimension). shape=[len(feature_ids), max_splits, self._bucket_size_list[i], 2], shared_name='numeric_stats_summary_accumulator_' + str(i)) accumulators.append(accumulator) apply_grad = accumulator.apply_grad( array_ops.stack(stats_summaries, axis=0), self._stamp_token) dependencies.append(apply_grad) # Grow the tree if enough batches is accumulated. with ops.control_dependencies(dependencies): if not self._is_chief: return control_flow_ops.no_op() min_accumulated = math_ops.reduce_min( array_ops.stack([acc.num_accumulated() for acc in accumulators])) def grow_tree_from_accumulated_summaries_fn(): """Updates tree with the best layer from accumulated summaries.""" # Take out the accumulated summaries from the accumulator and grow. stats_summaries_list = [] stats_summaries_list = [ array_ops.unstack(accumulator.take_grad(1), axis=0) for accumulator in accumulators ] grow_op = self._grow_tree_from_stats_summaries( stats_summaries_list, feature_ids_list, last_layer_nodes_range) return grow_op grow_model = control_flow_ops.cond( math_ops.greater_equal(min_accumulated, self._n_batches_per_layer), grow_tree_from_accumulated_summaries_fn, control_flow_ops.no_op, name='wait_until_n_batches_accumulated') return grow_model
def _testFeedSerializeDeserializeBatchHelper(self, serialize_fn, deserialize_fn, out_type=dtypes.string): with self.cached_session(use_gpu=False) as sess: sp_input0 = self._SparseTensorPlaceholder() sp_input1 = self._SparseTensorPlaceholder() input0_val = self._SparseTensorValue_5x6(np.arange(6)) input1_val = self._SparseTensorValue_3x4(np.arange(6)) serialized0 = serialize_fn(sp_input0, out_type=out_type) serialized1 = serialize_fn(sp_input1, out_type=out_type) serialized_concat = array_ops.stack([serialized0, serialized1]) sp_deserialized = deserialize_fn(serialized_concat, dtype=dtypes.int32) combined_indices, combined_values, combined_shape = sess.run( sp_deserialized, {sp_input0: input0_val, sp_input1: input1_val}) self.assertAllEqual(combined_indices[:6, 0], [0] * 6) # minibatch 0 self.assertAllEqual(combined_indices[:6, 1:], input0_val[0]) self.assertAllEqual(combined_indices[6:, 0], [1] * 6) # minibatch 1 self.assertAllEqual(combined_indices[6:, 1:], input1_val[0]) self.assertAllEqual(combined_values[:6], input0_val[1]) self.assertAllEqual(combined_values[6:], input1_val[1]) self.assertAllEqual(combined_shape, [2, 5, 6])
def _broadcast_uniform_partitioned_dimension(self, axis, lengths): """Broadcasts the partitioned dimension `axis` to match `lengths`.""" axis_dim_size = self.dimension_size(axis) partitioned_sizes = list(self._partitioned_dim_sizes[:axis]) if lengths.shape.ndims == 0: lengths = array_ops.where( math_ops.equal(axis_dim_size, 1), lengths, axis_dim_size) repeats = array_ops.where(math_ops.equal(axis_dim_size, 1), lengths, 1) splits = array_ops.stack([0, self.num_slices_in_dimension(axis)]) else: splits = math_ops.range( array_ops.size(lengths, out_type=self.dim_size_dtype) + 1) repeats = lengths partitioned_sizes.append(lengths) for dim_size in self._partitioned_dim_sizes[axis + 1:]: if dim_size.shape.ndims == 0: partitioned_sizes.append(dim_size) splits *= dim_size else: partitioned_sizes.append( ragged_util.repeat_ranges(dim_size, splits, repeats)) splits = array_ops.gather( ragged_util.lengths_to_splits(dim_size), splits) inner_sizes = self._inner_dim_sizes return RaggedTensorDynamicShape(partitioned_sizes, inner_sizes)
def testNonSequenceNestedStructure(self): components = np.array([1, 2, 3], dtype=np.int64) dataset = dataset_ops.Dataset.from_tensors(components) self.assertEquals(dtypes.int64, dataset.output_types) self.assertEquals([3], dataset.output_shapes) dataset = dataset.filter( lambda x: math_ops.reduce_all(math_ops.equal(x, components))) self.assertEquals(dtypes.int64, dataset.output_types) self.assertEquals([3], dataset.output_shapes) dataset = dataset.map(lambda x: array_ops.stack([x, x])) self.assertEquals(dtypes.int64, dataset.output_types) self.assertEquals([2, 3], dataset.output_shapes) dataset = dataset.flat_map( lambda x: dataset_ops.Dataset.from_tensor_slices(x)) self.assertEquals(dtypes.int64, dataset.output_types) self.assertEquals([3], dataset.output_shapes) iterator = dataset.make_one_shot_iterator() get_next = iterator.get_next() self.assertEquals(dtypes.int64, get_next.dtype) self.assertEquals([3], get_next.shape)
def testAggregate(self): a = array_ops.constant([3., 4.]) b = array_ops.constant([5., 6.]) hint = op_hint.OpHint("agg") a0, a1 = array_ops.unstack(a) b0, b1 = array_ops.unstack(b) a0 = hint.add_input(a0, tag="c", aggregate=op_hint.OpHint.AGGREGATE_STACK) b0 = hint.add_input(b0, tag="n", aggregate=op_hint.OpHint.AGGREGATE_STACK) a1 = hint.add_input(a1, tag="c", aggregate=op_hint.OpHint.AGGREGATE_STACK) b1 = hint.add_input(b1, tag="n", aggregate=op_hint.OpHint.AGGREGATE_STACK) c0 = math_ops.add(a0, b0, name="addleft") c1 = math_ops.add(a1, b1, name="addright") c0 = hint.add_output( c0, tag="out", aggregate=op_hint.OpHint.AGGREGATE_STACK) c1 = hint.add_output( c1, tag="out", aggregate=op_hint.OpHint.AGGREGATE_STACK) curr = array_ops.stack([c0, c1]) output = array_ops.identity(curr, name="FINAL_OUTPUT") with self.cached_session() as sess: stubbed_graphdef = op_hint.convert_op_hints_to_stubs( graph_def=sess.graph_def) self.assertEqual( self._getGraphOpTypes( stubbed_graphdef, output_nodes=[op_hint._tensor_name_base(output.name)]), set(["agg", "Const", "Identity"]))
def feature_importances(self): tree_counts = [ self.trees[i].feature_usage_counts() for i in range(self.params.num_trees) ] total_counts = math_ops.reduce_sum(array_ops.stack(tree_counts, 0), 0) return total_counts / math_ops.reduce_sum(total_counts)
def _TopKGrad(op, grad, _): """Return the gradients for TopK. Args: op: The TopKOp for which we need to generate gradients. grad: Tensor. The gradients passed to the TopKOp. Returns: A list of two tensors, the first being the gradient w.r.t to the input and TopK, and the second being the gradient w.r.t. to the indices (all zero). """ in_shape = array_ops.shape(op.inputs[0]) ind_shape = array_ops.shape(op.outputs[1]) ind_lastdim = array_ops.gather(ind_shape, array_ops.size(ind_shape) - 1) # Flatten indices to 2D. ind_2d = array_ops.reshape(op.outputs[1], array_ops.stack([-1, ind_lastdim])) in_lastdim = array_ops.gather(in_shape, array_ops.size(in_shape) - 1) outerdim = array_ops.shape(ind_2d)[0] # Compute linear indices (flattened to 1D). ind = array_ops.reshape(ind_2d + array_ops.expand_dims( math_ops.range(0, outerdim * in_lastdim, in_lastdim), -1), [-1]) # Substitute grad to appropriate locations and fill the rest with zeros, # finally reshaping it to the original input shape. return [array_ops.reshape( sparse_ops.sparse_to_dense(ind, array_ops.reshape( math_ops.reduce_prod(in_shape), [1]), array_ops.reshape(grad, [-1]), validate_indices=False), in_shape), array_ops.zeros( [], dtype=dtypes.int32)]
def _shape_dynamic(self): matrix_shape = array_ops.stack( (self._num_rows, self._num_rows), axis=0) if self._batch_shape_arg is None: return matrix_shape return array_ops.concat((self._batch_shape_arg, matrix_shape), 0)
def _embedding_lookup_with_distributed_aggregation(params, ids, partition_strategy="mod", name=None, max_norm=None, weights=None, idx=None, segment_ids=None): """Lookup helper for embedding_lookup_sparse_with_distributed_aggregation.""" if params is None or params == []: # pylint: disable=g-explicit-bool-comparison raise ValueError("Need at least one param") if isinstance(params, variables.PartitionedVariable): params = list(params) # Iterate to get the underlying Variables. if not isinstance(params, list): params = [params] def maybe_normalize(x): if max_norm is not None: if x.get_shape().ndims is not None: ndims = x.get_shape().ndims else: ndims = array_ops.size(array_ops.shape(x)) return clip_ops.clip_by_norm(x, max_norm, axes=list(range(1, ndims))) return x with ops.name_scope(name, "embedding_lookup_with_distributed_aggregation", params + [ids]) as name: np = len(params) # Number of partitions # Preserve the resource variable status to avoid accidental dense reads. if not any( isinstance(p, resource_variable_ops.ResourceVariable) for p in params): params = ops.convert_n_to_tensor_or_indexed_slices(params, name="params") if np == 1: with ops.colocate_with(params[0]): ret = maybe_normalize(_do_gather(params[0], ids)) ignore_weights = weights is None if not ignore_weights: if weights.dtype != ret.dtype: weights = math_ops.cast(weights, ret.dtype) # Reshape to allow broadcast ones = array_ops.fill( array_ops.expand_dims(array_ops.rank(ret) - 1, 0), 1) bcast_weights_shape = array_ops.concat( [array_ops.shape(weights), ones], 0) orig_weights_shape = weights.get_shape() weights = array_ops.reshape(weights, bcast_weights_shape) # Set weights shape after reshape if ret.get_shape().ndims is not None: weights.set_shape( orig_weights_shape.concatenate( [1 for _ in range(ret.get_shape().ndims - 1)])) ret *= weights return math_ops.segment_sum(ret, segment_ids, name=name) else: return math_ops.sparse_segment_sum(ret, idx, segment_ids, name=name) else: ids = ops.convert_to_tensor(ids, name="ids") flat_ids = array_ops.reshape(ids, [-1]) original_indices = math_ops.range(array_ops.size(flat_ids)) # Create p_assignments and set new_ids depending on the strategy. if partition_strategy == "mod": p_assignments = flat_ids % np new_ids = flat_ids // np elif partition_strategy == "div": # Compute num_total_ids as the sum of dim-0 of params, then assign to # partitions based on a constant number of ids per partition. Optimize # if we already know the full shape statically. dim_0_size = params[0].get_shape()[0] for p in xrange(1, np): dim_0_size += params[p].get_shape()[0] if dim_0_size.value: num_total_ids = constant_op.constant( dim_0_size.value, flat_ids.dtype) else: dim_0_sizes = [] for p in xrange(np): if params[p].get_shape()[0].value is not None: dim_0_sizes.append(params[p].get_shape()[0].value) else: with ops.colocate_with(params[p]): dim_0_sizes.append( array_ops.shape(params[p])[0]) num_total_ids = math_ops.reduce_sum( math_ops.cast(array_ops.stack(dim_0_sizes), flat_ids.dtype)) ids_per_partition = num_total_ids // np extras = num_total_ids % np p_assignments = math_ops.maximum( flat_ids // (ids_per_partition + 1), (flat_ids - extras) // ids_per_partition) # Emulate a conditional using a boolean indicator tensor is_in_first_extras_partitions = math_ops.cast( p_assignments < extras, flat_ids.dtype) new_ids = (is_in_first_extras_partitions * (flat_ids % (ids_per_partition + 1)) + (1 - is_in_first_extras_partitions) * ((flat_ids - extras) % ids_per_partition)) else: raise ValueError("Unrecognized partition strategy: " + partition_strategy) # Cast partition assignments to int32 for use in dynamic_partition. # There really should not be more than 2^32 partitions. p_assignments = math_ops.cast(p_assignments, dtypes.int32) # Partition list of ids based on assignments into np separate lists gather_ids = data_flow_ops.dynamic_partition( new_ids, p_assignments, np) # Similarly, partition the original indices. pindices = data_flow_ops.dynamic_partition(original_indices, p_assignments, np) # Do np separate lookups, finding embeddings for plist[p] in params[p] partitioned_result = [] for p in xrange(np): with ops.colocate_with(params[p]): partitioned_result.append( _do_gather(params[p], gather_ids[p])) ignore_weights = weights is None if not ignore_weights: # Partition weights according to pindices. partitioned_weight = [] for p in xrange(np): partitioned_weight.append( array_ops.gather(weights, pindices[p])) # Reshape each partition result. element_shape = params[0].get_shape()[1:] for p in params[1:]: element_shape = element_shape.merge_with(p.get_shape()[1:]) if element_shape.is_fully_defined(): for p in xrange(np): with ops.colocate_with(params[p]): partitioned_result[p] = array_ops.reshape( partitioned_result[p], array_ops.concat( [array_ops.shape(pindices[p]), element_shape], 0)) else: with ops.colocate_with(params[0]): params_shape = array_ops.shape(params[0]) for p in xrange(np): with ops.colocate_with(params[p]): partitioned_result[p] = array_ops.reshape( partitioned_result[p], array_ops.concat([ array_ops.shape(pindices[p]), array_ops.slice(params_shape, [1], [-1]) ], 0)) # Normalize each partition result. for p in xrange(np): with ops.colocate_with(params[p]): partitioned_result[p] = maybe_normalize( partitioned_result[p]) if not ignore_weights: # Multiply each partition result with partition weights. for p in xrange(np): with ops.colocate_with(params[p]): if partitioned_weight[p].dtype != partitioned_result[ p].dtype: partitioned_weight[p] = math_ops.cast( partitioned_weight[p], partitioned_result[p].dtype) # Reshape partition weights. ones = array_ops.fill( array_ops.expand_dims( array_ops.rank(partitioned_result[p]) - 1, 0), 1) bcast_weights_shape = array_ops.concat( [array_ops.shape(partitioned_weight[p]), ones], 0) orig_weights_shape = partitioned_weight[p].get_shape() partitioned_weight[p] = array_ops.reshape( partitioned_weight[p], bcast_weights_shape) if partitioned_result[p].get_shape().ndims is not None: partitioned_weight[p].set_shape( orig_weights_shape.concatenate([ 1 for _ in range(partitioned_result[p]. get_shape().ndims - 1) ])) partitioned_result[p] *= partitioned_weight[p] partitioned_segment_ids = [] for p in xrange(np): if not ignore_weights: # Partition segment_ids according to pindices. p_segment_ids = array_ops.gather(segment_ids, pindices[p]) # Number the p_segment_ids to meet segment_sum's requirements. Note # that unique_p_segment_ids contains unique segment ids of this # partition and these ids' order is unchanged. unique_p_segment_ids, unique_p_segment_idx = array_ops.unique( p_segment_ids) partitioned_segment_ids.append(unique_p_segment_ids) # segment_sum this partition's result. with ops.colocate_with(params[p]): partitioned_result[p] = math_ops.segment_sum( partitioned_result[p], unique_p_segment_idx) else: # When ignore weights, we need to get indexs of elements in idx and # segment_ids. _, exclude_idx = array_ops.setdiff1d(idx, pindices[p]) all_idx = math_ops.range(array_ops.shape(idx)[0]) _, include_idx = array_ops.setdiff1d(all_idx, exclude_idx) # Gather segment_ids and idx according to indexs. p_segment_ids = array_ops.gather(segment_ids, include_idx) p_idx = array_ops.gather(idx, include_idx) # Number the p_segment_ids, same as ignore_weights case above. unique_p_segment_ids, unique_p_segment_idx = array_ops.unique( p_segment_ids) _, unique_p_idx_idx = array_ops.unique(p_idx) partitioned_segment_ids.append(unique_p_segment_ids) with ops.colocate_with(params[p]): partitioned_result[p] = math_ops.sparse_segment_sum( partitioned_result[p], unique_p_idx_idx, unique_p_segment_idx) # Concat each partition's segment_ids and result for final segment_sum. concat_segment_ids = array_ops.concat(partitioned_segment_ids, 0) concat_partitioned_result = array_ops.concat(partitioned_result, 0) return math_ops.unsorted_segment_sum( concat_partitioned_result, concat_segment_ids, math_ops.reduce_max(concat_segment_ids) + 1, name=name)
def beam_attention_decoder(decoder_inputs, initial_state, attention_states, cell, embedding, output_size=None, num_heads=1, loop_function=None, dtype=None, scope=None, initial_state_attention=False, output_projection=None, beam_size=10): if not decoder_inputs: raise ValueError("Must provide at least 1 input to attention decoder.") if num_heads < 1: raise ValueError("With less than 1 heads, use a non-attention decoder.") if not attention_states.get_shape()[1:2].is_fully_defined(): raise ValueError("Shape[1] and [2] of attention_states must be known: %s" % attention_states.get_shape()) if output_size is None: output_size = cell.output_size with variable_scope.variable_scope(scope or "attention_decoder", dtype=dtype) as scope: dtype = scope.dtype # batch_size = array_ops.shape(decoder_inputs[0])[0] # Needed for reshaping. attn_length = attention_states.get_shape()[1].value if attn_length is None: attn_length = array_ops.shape(attention_states)[1] attn_size = attention_states.get_shape()[2].value # To calculate W1 * h_t we use a 1-by-1 convolution, need to reshape before. hidden = array_ops.reshape(attention_states, [-1, attn_length, 1, attn_size]) hidden_features = [] v = [] attention_vec_size = attn_size # Size of query vectors for attention. for a in xrange(num_heads): k = variable_scope.get_variable("AttnW_%d" % a, [1, 1, attn_size, attention_vec_size]) hidden_features.append(nn_ops.conv2d(hidden, k, [1, 1, 1, 1], "SAME")) v.append(variable_scope.get_variable("AttnV_%d" % a, [attention_vec_size])) state = [] # 将encoder的最后一个隐层状态扩展成beam_size维,因为decoder阶段的batch_size是beam_size。 # initial_state是一个列表,RNN有多少层就有多少个元素,每个元素都是一个LSTMStateTuple,包含h,c两个隐层状态 # 所以要将其扩展成beam_size维,其实是把c和h进行扩展,最后再合成LSTMStateTuple就可以了 for layers in initial_state: c = [layers.c] * beam_size h = [layers.h] * beam_size c = tf.concat(c, 0) h = tf.concat(h, 0) state.append(rnn_cell_impl.LSTMStateTuple(c, h)) state = tuple(state) # state_size = int(initial_state.get_shape().with_rank(2)[1]) # states = [] # for kk in range(beam_size): # states.append(initial_state) # state = tf.concat(states, 0) # state = initial_state def attention(query): ds = [] # Results of attention reads will be stored here. if nest.is_sequence(query): # If the query is a tuple, flatten it. query_list = nest.flatten(query) for q in query_list: # Check that ndims == 2 if specified. ndims = q.get_shape().ndims if ndims: assert ndims == 2 query = array_ops.concat(query_list, 1) for a in xrange(num_heads): with variable_scope.variable_scope("Attention_%d" % a): y = Linear(query, attention_vec_size, True)#(query) y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size]) # Attention mask is a softmax of v^T * tanh(...). s = math_ops.reduce_sum(v[a] * math_ops.tanh(hidden_features[a] + y), [2, 3]) a = nn_ops.softmax(s) # Now calculate the attention-weighted vector d. d = math_ops.reduce_sum(array_ops.reshape(a, [-1, attn_length, 1, 1]) * hidden, [1, 2]) ds.append(array_ops.reshape(d, [-1, attn_size])) return ds outputs = [] prev = None # attention也要定义成beam_size为的tensor batch_attn_size = array_ops.stack([beam_size, attn_size]) attns = [array_ops.zeros(batch_attn_size, dtype=dtype) for _ in xrange(num_heads)] for a in attns: # Ensure the second shape of attention vectors is set. a.set_shape([None, attn_size]) if initial_state_attention: attns = attention(initial_state) log_beam_probs, beam_path, beam_symbols = [], [], [] for i, inp in enumerate(decoder_inputs): if i > 0: variable_scope.get_variable_scope().reuse_variables() # If loop_function is set, we use it instead of decoder_inputs. if i == 0: #i=0时,输入时一个batch_szie=beam_size的tensor,且里面每个元素的值都是相同的,都是<GO>标志 inp = tf.nn.embedding_lookup(embedding, tf.constant(1, dtype=tf.int32, shape=[beam_size])) if loop_function is not None and prev is not None: with variable_scope.variable_scope("loop_function", reuse=True): inp = loop_function(prev, i, log_beam_probs, beam_path, beam_symbols) # Merge input and previous attentions into one vector of the right size. input_size = inp.get_shape().with_rank(2)[1] if input_size.value is None: raise ValueError("Could not infer input size from input: %s" % inp.name) inputs = [inp] + attns x = Linear(inputs, input_size, True)#(inputs) # Run the RNN. cell_output, state = cell(x, state) # Run the attention mechanism. if i == 0 and initial_state_attention: with variable_scope.variable_scope(variable_scope.get_variable_scope(), reuse=True): attns = attention(state) else: attns = attention(state) with variable_scope.variable_scope("AttnOutputProjection"): inputs = [cell_output] + attns output = Linear(inputs, output_size, True)#(inputs) if loop_function is not None: prev = output outputs.append(tf.argmax(nn_ops.xw_plus_b(output, output_projection[0], output_projection[1]), axis=1)) return outputs, state, tf.reshape(tf.concat(beam_path, 0), [-1, beam_size]), tf.reshape(tf.concat(beam_symbols, 0), [-1, beam_size])
def call(self, inputs): if not isinstance(inputs, (list, tuple)): raise ValueError('A merge layer should be called on a list of inputs.') if self._reshape_required: reshaped_inputs = [] input_ndims = list(map(backend.ndim, inputs)) if None not in input_ndims: # If ranks of all inputs are available, # we simply expand each of them at axis=1 # until all of them have the same rank. max_ndim = max(input_ndims) for x in inputs: x_ndim = backend.ndim(x) for _ in range(max_ndim - x_ndim): x = array_ops.expand_dims(x, axis=1) reshaped_inputs.append(x) return self._merge_function(reshaped_inputs) else: # Transpose all inputs so that batch size is the last dimension. # (batch_size, dim1, dim2, ... ) -> (dim1, dim2, ... , batch_size) transposed = False for x in inputs: x_ndim = backend.ndim(x) if x_ndim is None: x_shape = array_ops.shape(x) batch_size = x_shape[0] new_shape = backend.concatenate( [x_shape[1:], array_ops.expand_dims(batch_size, axis=-1)]) x_transposed = array_ops.reshape( x, array_ops.stack( [batch_size, math_ops.reduce_prod(x_shape[1:])], axis=0)) x_transposed = array_ops.transpose(x_transposed, perm=(1, 0)) x_transposed = array_ops.reshape(x_transposed, new_shape) reshaped_inputs.append(x_transposed) transposed = True elif x_ndim > 1: dims = list(range(1, x_ndim)) + [0] reshaped_inputs.append(array_ops.transpose(x, perm=dims)) transposed = True else: # We don't transpose inputs if they are 1D vectors or scalars. reshaped_inputs.append(x) y = self._merge_function(reshaped_inputs) y_ndim = backend.ndim(y) if transposed: # If inputs have been transposed, we have to transpose the output too. if y_ndim is None: y_shape = array_ops.shape(y) y_ndim = array_ops.shape(y_shape)[0] batch_size = y_shape[y_ndim - 1] new_shape = backend.concatenate([ array_ops.expand_dims(batch_size, axis=-1), y_shape[:y_ndim - 1] ]) y = array_ops.reshape(y, (-1, batch_size)) y = array_ops.transpose(y, perm=(1, 0)) y = array_ops.reshape(y, new_shape) elif y_ndim > 1: dims = [y_ndim - 1] + list(range(y_ndim - 1)) y = array_ops.transpose(y, perm=dims) return y else: return self._merge_function(inputs)
def rnn(step_fn, inputs, initial_states, go_backwards=False, unroll=False, input_length=None, name='rnn_block'): with ops.name_scope(name): dim = ndim(inputs) if dim < 3: raise ValueError("Input should be at least 3D") perm = [1, 0] + list(range(2, dim)) inputs = array_ops.transpose(inputs, perm=perm, name='to_time_major') if unroll: assert int_shape(inputs)[0] is not None,\ "Unrolling requires a fixed number of time steps" states = initial_states successive_states = [] successive_outputs = [] input_list = array_ops.unstack(inputs) if go_backwards: input_list.reverse() for x in input_list: outputs, states = step_fn(x, states) successive_outputs.append(outputs) successive_states.append(states) last_output = successive_outputs[-1] new_states = successive_states[-1] outputs = array_ops.stack(successive_outputs) else: if go_backwards: inputs = array_ops.reverse(inputs, axis=0) states = tuple(initial_states) time_steps = array_ops.shape(inputs)[0] outputs, _ = step_fn(inputs[0], initial_states) output_ta = tensor_array_ops.TensorArray( dtype=outputs.dtype, size=time_steps, tensor_array_name='output_ta') input_ta = tensor_array_ops.TensorArray( dtype=inputs.dtype, size=time_steps, tensor_array_name='input_ta') # unstack inputs and write into input array input_ta = input_ta.unstack(inputs) time = array_ops.constant(0, dtype='int32', name='time') def _step(_time, _output_ta, *_states): current_input = input_ta.read(_time) output, _new_states = step_fn(current_input, tuple(_states)) for state, new_state in zip(_states, _new_states): new_state.set_shape(state.get_shape()) _output_ta = _output_ta.write(_time, output) return (_time + 1, _output_ta) + tuple(_new_states) final_outputs = control_flow_ops.while_loop( cond=lambda _time, *_: _time < time_steps, body=_step, loop_vars=(time, output_ta) + states, parallel_iterations=32, swap_memory=True, maximum_iterations=input_length) last_time = final_outputs[0] output_ta = final_outputs[1] new_states = final_outputs[2:] outputs = output_ta.stack() last_output = output_ta.read(last_time - 1) perm = [1, 0] + list(range(2, ndim(outputs))) outputs = array_ops.transpose(outputs, perm=perm) return last_output, outputs, new_states
def calibration_layer(uncalibrated_tensor, num_keypoints, keypoints_initializers=None, keypoints_initializer_fns=None, bound=False, monotonic=None, missing_input_values=None, missing_output_values=None, name=None, **regularizer_amounts): """Creates a calibration layer for uncalibrated values. Returns a calibrated tensor of the same shape as the uncalibrated continuous signals passed in, and a list of projection ops, that must be applied at each step (or every so many steps) to project the model to a feasible space: used for bounding the outputs or for imposing monotonicity -- the list will be empty if bound and monotonic are not set. Args: uncalibrated_tensor: Tensor of shape [batch_size, ...] with uncalibrated values. num_keypoints: Number of keypoints to use. Either a scalar value that will be used for every uncalibrated signal, or a list of n values, per uncalibrated signal -- uncalibrated is first flattened ( see tf.contrib.layers.flatten) to [batch_size, n], and there should be one value in the list per n. If a value of the list is 0 or None the correspondent signal won't be calibrated. keypoints_initializers: For evaluation or inference (or when resuming training from a checkpoint) the values will be loaded from disk, so they don't need to be given (leave it as None). Otherwise provide either a tuple of two tensors of shape [num_keypoints], or a list of n pairs of tensors, each of shape [num_keypoints]. In this list there should be one pair per uncalibrated signal, just like num_keypoints above. Notice that num_keypoints can be different per signal. keypoints_initializer_fns: Like keypoints_initializers but using lambda initializers. They should be compatible with tf.get_variable. If this is set, then keypoints_initializers must be None. bound: boolean whether output of calibration must be bound. Alternatively a list of n booleans, one per uncalibrated value, like num_keypoints above. monotonic: whether calibration is monotonic: None or 0 means no monotonicity. Positive or negative values mean increasing or decreasing monotonicity respectively. Alternatively a list of n monotonic values, one per uncalibrated value, like num_keypoints above. missing_input_values: If set, and if the input has this value it is assumed to be missing and the output will either be calibrated to some value between `[calibration_output_min, calibration_output_max]` or set to a fixed value set by missing_output_value. Limitation: it only works for scalars. Either one value for all inputs, or a list with one value per uncalibrated value. missing_output_values: Requires missing_input_value also to be set. If set if will convert missing input to this value. Either one value for all outputs, or a list with one value per uncalibrated value. name: Name scope for operations. **regularizer_amounts: Keyword args of regularization amounts passed to regularizers.calibrator_regularization(). Keyword names should be among supported regularizers.CALIBRATOR_REGULARIZERS and values should be either float or list of floats. If float, then same value is applied to all input signals. Returns: A tuple of: * calibrated tensor of shape [batch_size, ...], the same shape as uncalibrated. * list of projection ops, that must be applied at each step (or every so many steps) to project the model to a feasible space: used for bounding the outputs or for imposing monotonicity. Empty if none are requested. * None or tensor with regularization loss. Raises: ValueError: If dimensions don't match. """ with ops.name_scope(name or 'calibration_layer'): # Flattening uncalibrated tensor [batch_Size, k1, k2, ..., kn] to # [batch_size, k1 * k2 * ... * kn]. uncalibrated_shape = uncalibrated_tensor.get_shape().as_list() n = 1 for non_batch_dim in uncalibrated_shape[1:]: n *= non_batch_dim flat_uncalibrated = array_ops.reshape( uncalibrated_tensor, shape=[-1, n], name='flat_uncalibrated') num_keypoints = tools.cast_to_list(num_keypoints, n, 'num_keypoints') keypoints_initializers = tools.cast_to_list(keypoints_initializers, n, 'keypoints_initializers') keypoints_initializer_fns = tools.cast_to_list(keypoints_initializer_fns, n, 'keypoints_initializer_fns') bound = tools.cast_to_list(bound, n, 'bound') monotonic = tools.cast_to_list(monotonic, n, 'monotonic') missing_input_values = tools.cast_to_list(missing_input_values, n, 'missing_input_values') missing_output_values = tools.cast_to_list(missing_output_values, n, 'missing_output_values') regularizer_amounts = { regularizer_name: tools.cast_to_list( regularizer_amounts[regularizer_name], n, regularizer_name) for regularizer_name in regularizer_amounts } signal_names = ['signal_%d' % ii for ii in range(n)] uncalibrated_splits = array_ops.unstack(flat_uncalibrated, axis=1) calibrated_splits = [] projection_ops = [] total_regularization = None for ii in range(n): if not num_keypoints[ii]: # No calibration for this signal. calibrated_splits += [uncalibrated_splits[ii]] else: signal_regularizer_amounts = { regularizer_name: regularizer_amounts[regularizer_name][ii] for regularizer_name in regularizer_amounts } calibrated, projection, reg = one_dimensional_calibration_layer( uncalibrated_splits[ii], num_keypoints[ii], signal_name=signal_names[ii], keypoints_initializers=keypoints_initializers[ii], keypoints_initializer_fns=keypoints_initializer_fns[ii], bound=bound[ii], monotonic=monotonic[ii], missing_input_value=missing_input_values[ii], missing_output_value=missing_output_values[ii], **signal_regularizer_amounts) calibrated_splits += [calibrated] if projection is not None: projection_ops += [projection] total_regularization = tools.add_if_not_none(total_regularization, reg) flat_calibrated = array_ops.stack( calibrated_splits, axis=1, name='stack_calibrated') reshaped_calibrated = array_ops.reshape( flat_calibrated, shape=array_ops.shape(uncalibrated_tensor), name='reshape_calibrated') return reshaped_calibrated, projection_ops, total_regularization
def expand_dims(input, axis, name=None): # pylint: disable=redefined-builtin """Inserts a dimension with shape 1 into a potentially ragged tensor's shape. Given a potentially ragged tenor `input`, this operation inserts a dimension with size 1 at the dimension `axis` of `input`'s shape. * If `input` is a `Tensor`, then this is equivalent to `tf.expand_dims`. * If `input` is ragged, and `axis=0`, then the new dimension will be uniform; but the previously outermost dimension will become ragged. * If `input` is ragged, and `0 < axis < input.ragged_rank`, then the new dimension will be ragged. * If `input` is ragged, and axis >= input.ragged_rank`, then the new dimension will be uniform. The following table gives some examples showing how `ragged.expand_dims` impacts the shapes of different input tensors. Ragged dimensions are indicated by enclosing them in parentheses. input.shape | axis | result.shape ----------------------- | ---- | ----------------------------- `[D1, D2]` | `0` | `[1, D1, D2]` `[D1, D2]` | `1` | `[D1, 1, D2]` `[D1, D2]` | `2` | `[D1, D2, 1]` `[D1, (D2), (D3), D4]` | `0` | `[1, (D1), (D2), (D3), D4]` `[D1, (D2), (D3), D4]` | `1` | `[D1, (1), (D2), (D3), D4]` `[D1, (D2), (D3), D4]` | `2` | `[D1, (D2), (1), (D3), D4]` `[D1, (D2), (D3), D4]` | `3` | `[D1, (D2), (D3), 1, D4]` `[D1, (D2), (D3), D4]` | `4` | `[D1, (D2), (D3), D4, 1]` Args: input: The potentially tensor that should be expanded with a new dimension. axis: An integer constant indicating where the new dimension should be inserted. name: A name for the operation (optional). Returns: A tensor with the same values as `input`, with an added dimension of size 1 at `axis`. #### Examples: >>> rt = tf.ragged.constant([[1, 2], [3]]) >>> print(rt.shape) (2, None) >>> expanded = tf.expand_dims(rt, axis=0) >>> print(expanded.shape, expanded) (1, None, None) <tf.RaggedTensor [[[1, 2], [3]]]> >>> expanded = tf.expand_dims(rt, axis=1) >>> print(expanded.shape, expanded) (2, None, None) <tf.RaggedTensor [[[1, 2]], [[3]]]> >>> expanded = tf.expand_dims(rt, axis=2) >>> print(expanded.shape, expanded) (2, None, 1) <tf.RaggedTensor [[[1], [2]], [[3]]]> """ with ops.name_scope(name, 'RaggedExpandDims', [input]): input = ragged_tensor.convert_to_tensor_or_ragged_tensor(input, name='input') if not ragged_tensor.is_ragged(input): return array_ops.expand_dims(input, axis) ndims = None if input.shape.ndims is None else input.shape.ndims + 1 axis = ragged_util.get_positive_axis(axis, ndims) if axis == 0: values = input splits = array_ops.stack([0, input.nrows()]) elif axis == 1: values = input splits = math_ops.range(input.nrows() + 1) else: values = expand_dims(input.values, axis - 1) splits = input.row_splits return ragged_tensor.RaggedTensor.from_row_splits(values, splits, validate=False)
def _compute_random_ri_sampled_logits(ri_tensors, k_dim, s_active, weights, labels, inputs, num_sampled, num_true=1, subtract_log_q=True, partition_strategy="mod", name=None, seed=None): """ Random Random Index Sampled Logits with negative sampling https://arxiv.org/pdf/1410.8251.pdf Computes the sampled logits from the space of all possible random indexes. Since any random index is possible, we sample, not from the existing random indexes but from the space of possible random indexes so that the model learns which combinations of bases are NOT the ones used to predict a given feature. Args: ri_tensors: k_dim: s_active: weights: labels: inputs: num_sampled: sampled_values: num_true: subtract_log_q: remove_accidental_hits: partition_strategy: name: seed: Returns: """ if isinstance(weights, variables.PartitionedVariable): weights = list(weights) if not isinstance(weights, list): weights = [weights] with ops.name_scope(name, "random_ri_sampled_logits", weights + [inputs, labels]): if labels.dtype != dtypes.int64: labels = math_ops.cast(labels, dtypes.int64) labels_flat = array_ops.reshape(labels, [-1]) true_ris = tx.gather_sparse(sp_tensor=ri_tensors, ids=labels_flat) sampled_ris, expected_true_ris, expected_sampled_ris = sample_ri(k_dim, s_active, num_sampled, true_ris) all_ris = sparse_ops.sparse_concat(axis=0, sp_inputs=[true_ris, sampled_ris]) sp_values = all_ris sp_indices = tx.sparse_indices(sp_values) # Retrieve the weights # weights shape is [num_classes, dim] all_w = embedding_lookup_sparse( weights, sp_indices, sp_values, combiner="sum", partition_strategy=partition_strategy) # true_w shape is [batch_size * num_true, dim] true_w = array_ops.slice(all_w, [0, 0], array_ops.stack( [array_ops.shape(labels_flat)[0], -1])) sampled_w = array_ops.slice( all_w, array_ops.stack([array_ops.shape(labels_flat)[0], 0]), [-1, -1]) # inputs has shape [batch_size, dim] # sampled_w has shape [num_sampled, dim] # Apply X*W', which yields [batch_size, num_sampled] sampled_logits = math_ops.matmul(inputs, sampled_w, transpose_b=True) dim = array_ops.shape(true_w)[1:2] new_true_w_shape = array_ops.concat([[-1, num_true], dim], 0) row_wise_dots = math_ops.multiply( array_ops.expand_dims(inputs, 1), array_ops.reshape(true_w, new_true_w_shape)) # We want the row-wise dot plus biases which yields a # [batch_size, num_true] tensor of true_logits. dots_as_matrix = array_ops.reshape(row_wise_dots, array_ops.concat([[-1], dim], 0)) true_logits = array_ops.reshape(_sum_rows(dots_as_matrix), [-1, num_true]) if subtract_log_q: # Subtract log of Q(l), prior probability that label appears in sampled. true_logits -= math_ops.log(expected_true_ris) sampled_logits -= math_ops.log(expected_sampled_ris) # Construct output logits and labels. The true labels/logits start at col 0. out_logits = array_ops.concat([true_logits, sampled_logits], 1) # true_logits is a float tensor, ones_like(true_logits) is a float # tensor of ones. We then divide by num_true to ensure the per-example # labels sum to 1.0, i.e. form a proper probability distribution. out_labels = array_ops.concat([ array_ops.ones_like(true_logits) / num_true, array_ops.zeros_like(sampled_logits) ], 1) return out_logits, out_labels
def _block_lstm(seq_len_max, x, w, b, cs_prev=None, h_prev=None, wci=None, wcf=None, wco=None, forget_bias=None, cell_clip=None, use_peephole=None, name=None): r"""TODO(williamchan): add doc. Args: seq_len_max: A `Tensor` of type `int64`. x: A list of at least 1 `Tensor` objects of the same type in: `float32`. w: A `Tensor`. Must have the same type as `x`. b: A `Tensor`. Must have the same type as `x`. cs_prev: A `Tensor`. Must have the same type as `x`. h_prev: A `Tensor`. Must have the same type as `x`. wci: A `Tensor`. Must have the same type as `x`. wcf: A `Tensor`. Must have the same type as `x`. wco: A `Tensor`. Must have the same type as `x`. forget_bias: An optional `float`. Defaults to `1`. cell_clip: An optional `float`. Defaults to `3`. use_peephole: An optional `bool`. Defaults to `False`. name: A name for the operation (optional). Returns: A tuple of `Tensor` objects (i, cs, f, o, ci, co, h). i: A list with the same number of `Tensor` objects as `x` of `Tensor` objects of the same type as x. cs: A list with the same number of `Tensor` objects as `x` of `Tensor` objects of the same type as x. f: A list with the same number of `Tensor` objects as `x` of `Tensor` objects of the same type as x. o: A list with the same number of `Tensor` objects as `x` of `Tensor` objects of the same type as x. ci: A list with the same number of `Tensor` objects as `x` of `Tensor` objects of the same type as x. co: A list with the same number of `Tensor` objects as `x` of `Tensor` objects of the same type as x. h: A list with the same number of `Tensor` objects as `x` of `Tensor` objects of the same type as x. Raises: ValueError: If `b` does not have a valid shape. """ batch_size = x[0].get_shape().with_rank(2)[0].value cell_size4 = b.get_shape().with_rank(1)[0].value if cell_size4 is None: raise ValueError("`b` shape must not be None.") cell_size = cell_size4 / 4 zero_state = None if cs_prev is None or h_prev is None: zero_state = array_ops.constant(0, dtype=dtypes.float32, shape=[batch_size, cell_size]) if cs_prev is None: cs_prev = zero_state if h_prev is None: h_prev = zero_state if wci is None: wci = array_ops.constant(0, dtype=dtypes.float32, shape=[cell_size]) wco = wci wcf = wci # pylint: disable=protected-access i, cs, f, o, ci, co, h = _lstm_ops_so.block_lstm(seq_len_max=seq_len_max, x=array_ops.stack(x), cs_prev=cs_prev, h_prev=h_prev, w=w, wci=wci, wco=wco, wcf=wcf, b=b, forget_bias=forget_bias, cell_clip=cell_clip, name=name, use_peephole=use_peephole) return array_ops.unstack(i), array_ops.unstack(cs), array_ops.unstack( f), array_ops.unstack(o), array_ops.unstack(ci), array_ops.unstack( co), array_ops.unstack(h)
def __call__(self, inputs, initial_state=None, dtype=None, sequence_length=None, scope=None): """Run this LSTM on inputs, starting from the given state. Args: inputs: `3-D` tensor with shape `[time_len, batch_size, input_size]` or a list of `time_len` tensors of shape `[batch_size, input_size]`. initial_state: a tuple `(initial_cell_state, initial_output)` with tensors of shape `[batch_size, self._num_units]`. If this is not provided, the cell is expected to create a zero initial state of type `dtype`. dtype: The data type for the initial state and expected output. Required if `initial_state` is not provided or RNN state has a heterogeneous dtype. sequence_length: Specifies the length of each sequence in inputs. An `int32` or `int64` vector (tensor) size `[batch_size]`, values in `[0, time_len).` Defaults to `time_len` for each element. scope: `VariableScope` for the created subgraph; defaults to class name. Returns: A pair containing: - Output: A `3-D` tensor of shape `[time_len, batch_size, output_size]` or a list of time_len tensors of shape `[batch_size, output_size]`, to match the type of the `inputs`. - Final state: a tuple `(cell_state, output)` matching `initial_state`. Raises: ValueError: in case of shape mismatches """ with vs.variable_scope(scope or "lstm_block_wrapper"): is_list = isinstance(inputs, list) if is_list: inputs = array_ops.stack(inputs) inputs_shape = inputs.get_shape().with_rank(3) if not inputs_shape[2]: raise ValueError("Expecting inputs_shape[2] to be set: %s" % inputs_shape) batch_size = inputs_shape[1].value if batch_size is None: batch_size = array_ops.shape(inputs)[1] time_len = inputs_shape[0].value if time_len is None: time_len = array_ops.shape(inputs)[0] # Provide default values for initial_state and dtype if initial_state is None: if dtype is None: raise ValueError( "Either initial_state or dtype needs to be specified") z = array_ops.zeros(array_ops.stack( [batch_size, self.num_units]), dtype=dtype) initial_state = z, z else: if len(initial_state) != 2: raise ValueError( "Expecting initial_state to be a tuple with length 2 or None" ) if dtype is None: dtype = initial_state[0].dtype # create the actual cell if sequence_length is not None: sequence_length = ops.convert_to_tensor(sequence_length) initial_cell_state, initial_output = initial_state # pylint: disable=unpacking-non-sequence cell_states, outputs = self._call_cell(inputs, initial_cell_state, initial_output, dtype, sequence_length) if sequence_length is not None: # Mask out the part beyond sequence_length mask = array_ops.transpose( array_ops.sequence_mask(sequence_length, time_len, dtype=dtype), [1, 0]) mask = array_ops.tile(array_ops.expand_dims(mask, [-1]), [1, 1, self.num_units]) outputs *= mask # Prepend initial states to cell_states and outputs for indexing to work # correctly,since we want to access the last valid state at # sequence_length - 1, which can even be -1, corresponding to the # initial state. mod_cell_states = array_ops.concat_v2([ array_ops.expand_dims(initial_cell_state, [0]), cell_states ], 0) mod_outputs = array_ops.concat_v2( [array_ops.expand_dims(initial_output, [0]), outputs], 0) final_cell_state = self._gather_states(mod_cell_states, sequence_length, batch_size) final_output = self._gather_states(mod_outputs, sequence_length, batch_size) else: # No sequence_lengths used: final state is the last state final_cell_state = cell_states[-1] final_output = outputs[-1] if is_list: # Input was a list, so return a list outputs = array_ops.unstack(outputs) return outputs, (final_cell_state, final_output)
def encode(self, x): """Encoder using LSTM. Args: x: tensor of size [num_children, num_groups, embedding_size] Returns: last_c, last_h: tensors of size [num_children, hidden_size], the final LSTM states attn_mem: tensor of size [num_children, num_groups, hidden_size], the attention memory, i.e. concatenation of all hidden states, linearly transformed by an attention matrix attn_w_1 """ if self.hparams.bi_lstm: with variable_scope.variable_scope(self.hparams.name, reuse=True): w_lstm_forward = variable_scope.get_variable( "encoder_lstm_forward") w_lstm_backward = variable_scope.get_variable( "encoder_lstm_backward") forget_bias = variable_scope.get_variable( "encoder_forget_bias") attn_w_1 = variable_scope.get_variable("attn_w_1") else: with variable_scope.variable_scope(self.hparams.name, reuse=True): w_lstm = variable_scope.get_variable("encoder_lstm") forget_bias = variable_scope.get_variable( "encoder_forget_bias") attn_w_1 = variable_scope.get_variable("attn_w_1") embedding_size = array_ops.shape(x)[2] signals = array_ops.split(x, self.num_groups, axis=1) for i in range(len(signals)): signals[i] = array_ops.reshape( signals[i], [self.hparams.num_children, embedding_size]) if self.hparams.bi_lstm: def body(i, prev_c_forward, prev_h_forward, prev_c_backward, prev_h_backward): """while loop for LSTM.""" signal_forward = signals[i] next_c_forward, next_h_forward = lstm(signal_forward, prev_c_forward, prev_h_forward, w_lstm_forward, forget_bias) signal_backward = signals[self.num_groups - 1 - i] next_c_backward, next_h_backward = lstm( signal_backward, prev_c_backward, prev_h_backward, w_lstm_backward, forget_bias) next_h = array_ops.concat([next_h_forward, next_h_backward], axis=1) all_h.append(next_h) return (next_c_forward, next_h_forward, next_c_backward, next_h_backward) c_forward = array_ops.zeros( [self.hparams.num_children, self.hparams.hidden_size / 2], dtype=dtypes.float32) h_forward = array_ops.zeros( [self.hparams.num_children, self.hparams.hidden_size / 2], dtype=dtypes.float32) c_backward = array_ops.zeros( [self.hparams.num_children, self.hparams.hidden_size / 2], dtype=dtypes.float32) h_backward = array_ops.zeros( [self.hparams.num_children, self.hparams.hidden_size / 2], dtype=dtypes.float32) all_h = [] for i in range(0, self.num_groups): c_forward, h_forward, c_backward, h_backward = body( i, c_forward, h_forward, c_backward, h_backward) last_c = array_ops.concat([c_forward, c_backward], axis=1) last_h = array_ops.concat([h_forward, h_backward], axis=1) attn_mem = array_ops.stack(all_h) else: def body(i, prev_c, prev_h): signal = signals[i] next_c, next_h = lstm(signal, prev_c, prev_h, w_lstm, forget_bias) all_h.append(next_h) return next_c, next_h c = array_ops.zeros( [self.hparams.num_children, self.hparams.hidden_size], dtype=dtypes.float32) h = array_ops.zeros( [self.hparams.num_children, self.hparams.hidden_size], dtype=dtypes.float32) all_h = [] for i in range(0, self.num_groups): c, h = body(i, c, h) last_c = c last_h = h attn_mem = array_ops.stack(all_h) attn_mem = array_ops.transpose(attn_mem, [1, 0, 2]) attn_mem = array_ops.reshape(attn_mem, [ self.hparams.num_children * self.num_groups, self.hparams.hidden_size ]) attn_mem = math_ops.matmul(attn_mem, attn_w_1) attn_mem = array_ops.reshape(attn_mem, [ self.hparams.num_children, self.num_groups, self.hparams.hidden_size ]) return last_c, last_h, attn_mem
def testDimOutOfRange(self): t = [constant_op.constant([1, 2, 3]), constant_op.constant([4, 5, 6])] with self.assertRaisesRegexp(ValueError, r"axis = 2 not in \[-2, 2\)"): array_ops.stack(t, axis=2)
def _UnpackGrad(op, *grads): """Gradient for unpack op.""" return array_ops.stack(grads, axis=op.get_attr("axis"))
def shard(computation, inputs=None, num_shards=1, input_shard_axes=None, outputs_from_all_shards=True, output_shard_axes=None, infeed_queue=None, device_assignment=None, name=None): """Shards `computation` for parallel execution. `inputs` must be a list of Tensors or None (equivalent to an empty list), each of which has a corresponding split axis (from `input_shard_axes`). Each input is split into `num_shards` pieces along the corresponding axis, and computation is applied to each shard in parallel. Tensors are broadcast to all shards if they are lexically captured by `computation`. e.g., x = tf.constant(7) def computation(): return x + 3 ... = shard(computation, ...) TODO(phawkins): consider adding support for broadcasting Tensors passed as inputs. If `outputs_from_all_shards` is true, the outputs from all shards of `computation` are concatenated back together along their `output_shards_axes`. Otherwise, each output is taken from an arbitrary shard. Inputs and outputs of the computation must be at least rank-1 Tensors. Args: computation: A Python function that builds a computation to apply to each shard of the input. inputs: A list of input tensors or None (equivalent to an empty list). Each input tensor has a corresponding shard axes, given by `input_shard_axes`, which must have size divisible by `num_shards`. num_shards: The number of shards. input_shard_axes: A list of dimensions along which to shard `inputs`, or `None`. `None` means "shard all inputs along dimension 0". If not `None`, there must be one dimension per input. outputs_from_all_shards: Boolean or list of boolean. For each output, if `True`, outputs from all shards are concatenated along the corresponding `output_shard_axes` entry. Otherwise, each output is taken from an arbitrary shard. If the argument is a boolean, the argument's value is used for each output. output_shard_axes: A list of dimensions along which to concatenate the outputs of `computation`, or `None`. `None` means "concatenate all outputs along dimension 0". If not `None`, there must be one dimension per output. Ignored if `outputs_from_all_shards` is False. infeed_queue: If not `None`, the `InfeedQueue` to use to augment the inputs of `computation`. device_assignment: If not `None`, a `DeviceAssignment` describing the mapping between logical cores in the computation with physical cores in the TPU topology. Uses a default device assignment if `None`. The `DeviceAssignment` may be omitted if each shard of the computation uses only one core, and there is either only one shard, or the number of shards is equal to the number of cores in the TPU system. name: (Deprecated) Does nothing. Returns: A list of output tensors. Raises: ValueError: If num_shards <= 0 ValueError: If len(input_shard_axes) != len(inputs) ValueError: If len(output_shard_axes) != len(outputs from `computation`) """ if num_shards <= 0: raise ValueError("num_shards must be a positive integer.") # Converts inputs to Tensors. inputs = [] if inputs is None else [ ops.convert_to_tensor(x) for x in inputs ] if input_shard_axes is None: input_shard_axes = [0] * len(inputs) if len(inputs) != len(input_shard_axes): raise ValueError( "Length of input_shard_axes must be equal to the number " "of inputs.") if inputs: # Splits the `inputs` along the corresponding `input_shard_axes`, giving # lists with layout [input][shard] split_inputs = [ array_ops.split(x, num_shards, axis=axis) for (axis, x) in zip(input_shard_axes, inputs) ] # Transposes the input lists to have layout [shard][input] transposed_inputs = [list(i) for i in zip(*split_inputs)] else: transposed_inputs = [[]] * num_shards outputs = replicate(computation, transposed_inputs, infeed_queue=infeed_queue, device_assignment=device_assignment, name=name) # There must be at least one shard since num_shards > 0. # TODO(b/36647078) remove disable when pylint bug is fixed. # pylint: disable=indexing-exception if isinstance(outputs[0], ops.Operation): # pylint: enable=indexing-exception # There were no outputs from the computation and replicate returned a list # of NoOps with control dependencies on the computation. Return the first # one so it can be used as a control dependency or fetch node. # TODO(b/36647078) remove disable when pylint bug is fixed. # pylint: disable=indexing-exception return [outputs[0]] # pylint: enable=indexing-exception # TODO(b/36647078) remove disable when pylint bug is fixed. # pylint: disable=indexing-exception num_outputs = len(outputs[0]) # pylint: enable=indexing-exception if output_shard_axes is None: output_shard_axes = [0] * num_outputs if num_outputs != len(output_shard_axes): raise ValueError( "Length of output_shard_axes must be equal to the number " "of outputs.") if isinstance(outputs_from_all_shards, bool): outputs_from_all_shards = [outputs_from_all_shards] * num_outputs if num_outputs != len(outputs_from_all_shards): raise ValueError( "Length of outputs_from_all_shards must be equal to the " "number of outputs.") results = [] for (axis, all_shards, x) in zip(output_shard_axes, outputs_from_all_shards, zip(*outputs)): if all_shards: # Concatenate all of the outputs together (use stack for scalars). shape = x[0].shape is_scalar = shape is not None and (shape.ndims == 0) results.append((array_ops.stack(list(x)) if is_scalar else array_ops.concat(list(x), axis=axis))) else: # TODO(phawkins): use a smarter policy, e.g., round-robin across shards. results.append(x[0]) return results
def _compute_ri_sampled_logits(ri_tensors, weights, labels, inputs, num_sampled, num_classes, sampled_values, num_true=1, subtract_log_q=True, remove_accidental_hits=False, partition_strategy="mod", name=None, seed=None): if isinstance(weights, variables.PartitionedVariable): weights = list(weights) if not isinstance(weights, list): weights = [weights] with ops.name_scope(name, "ri_sampled_logits", weights + [inputs, labels]): if labels.dtype != dtypes.int64: labels = math_ops.cast(labels, dtypes.int64) labels_flat = array_ops.reshape(labels, [-1]) # Sample the negative labels. # sampled shape: [num_sampled] tensor # true_expected_count shape = [batch_size, 1] tensor # sampled_expected_count shape = [num_sampled] tensor if sampled_values is None: sampled_values = candidate_sampling_ops.uniform_candidate_sampler( true_classes=labels, num_true=num_true, num_sampled=num_sampled, unique=True, range_max=num_classes, seed=seed) # NOTE: pylint cannot tell that 'sampled_values' is a sequence # pylint: disable=unpacking-non-sequence sampled, true_expected_count, sampled_expected_count = ( array_ops.stop_gradient(s) for s in sampled_values) # pylint: enable=unpacking-non-sequence sampled = math_ops.cast(sampled, dtypes.int64) all_ids = array_ops.concat([labels_flat, sampled], 0) # true_ris = tx.gather_sparse(ri_tensors, labels_flat) # another way is to sample from ri_tensor # sampled_ris = generate_ri(k, s, num_sampled) # all_ris = sparse_ops.sparse_concat(0, [true_ris, sampled_ris]) all_ris = tx.gather_sparse(sp_tensor=ri_tensors, ids=all_ids) sp_values = all_ris sp_indices = tx.sparse_indices(sp_values) # Retrieve the true weights and the logits of the sampled weights. # weights shape is [num_classes, dim] all_w = embedding_lookup_sparse( weights, sp_indices, sp_values, combiner="sum", partition_strategy=partition_strategy) # true_w shape is [batch_size * num_true, dim] true_w = array_ops.slice(all_w, [0, 0], array_ops.stack( [array_ops.shape(labels_flat)[0], -1])) sampled_w = array_ops.slice( all_w, array_ops.stack([array_ops.shape(labels_flat)[0], 0]), [-1, -1]) # inputs has shape [batch_size, dim] # sampled_w has shape [num_sampled, dim] # Apply X*W', which yields [batch_size, num_sampled]inputs # for energy based models the inputs are the predicted feature vectors sampled_logits = math_ops.matmul(inputs, sampled_w, transpose_b=True) # inputs shape is [batch_size, dim] # true_w shape is [batch_size * num_true, dim] # row_wise_dots is [batch_size, num_true, dim] dim = array_ops.shape(true_w)[1:2] new_true_w_shape = array_ops.concat([[-1, num_true], dim], 0) row_wise_dots = math_ops.multiply( array_ops.expand_dims(inputs, 1), array_ops.reshape(true_w, new_true_w_shape)) # We want the row-wise dot plus biases which yields a # [batch_size, num_true] tensor of true_logits. dots_as_matrix = array_ops.reshape(row_wise_dots, array_ops.concat([[-1], dim], 0)) true_logits = array_ops.reshape(_sum_rows(dots_as_matrix), [-1, num_true]) if remove_accidental_hits: acc_hits = candidate_sampling_ops.compute_accidental_hits( labels, sampled, num_true=num_true) acc_indices, acc_ids, acc_weights = acc_hits # This is how SparseToDense expects the indices. acc_indices_2d = array_ops.reshape(acc_indices, [-1, 1]) acc_ids_2d_int32 = array_ops.reshape( math_ops.cast(acc_ids, dtypes.int32), [-1, 1]) sparse_indices = array_ops.concat([acc_indices_2d, acc_ids_2d_int32], 1, "sparse_indices") # Create sampled_logits_shape = [batch_size, num_sampled] sampled_logits_shape = array_ops.concat( [array_ops.shape(labels)[:1], array_ops.expand_dims(num_sampled, 0)], 0) if sampled_logits.dtype != acc_weights.dtype: acc_weights = math_ops.cast(acc_weights, sampled_logits.dtype) sampled_logits += sparse_ops.sparse_to_dense( sparse_indices, sampled_logits_shape, acc_weights, default_value=0.0, validate_indices=False) if subtract_log_q: # Subtract log of Q(l), prior probability that label appears in sampled. true_logits -= math_ops.log(true_expected_count) sampled_logits -= math_ops.log(sampled_expected_count) # Construct output logits and labels. The true labels/logits start at col 0. out_logits = array_ops.concat([true_logits, sampled_logits], 1) # true_logits is a float tensor, ones_like(true_logits) is a float # tensor of ones. We then divide by num_true to ensure the per-example # labels sum to 1.0, i.e. form a proper probability distribution. out_labels = array_ops.concat([ array_ops.ones_like(true_logits) / num_true, array_ops.zeros_like(sampled_logits) ], 1) return out_logits, out_labels
def input_calibration_layer(columns_to_tensors, num_keypoints, feature_columns=None, keypoints_initializers=None, keypoints_initializer_fns=None, bound=False, monotonic=None, missing_input_values=None, missing_output_values=None, dtype=dtypes.float32, **regularizer_amounts): """Creates a calibration layer for the given input and feature_columns. Returns a tensor with the calibrated values of the given features, a list of the names of the features in the order they feature in the returned, and a list of projection ops, that must be applied at each step (or every so many steps) to project the model to a feasible space: used for bounding the outputs or for imposing monotonic -- the list will be empty if bound and monotonic are not set. Args: columns_to_tensors: A mapping from feature name to tensors. 'string' key means a base feature (not-transformed). If feature_columns is not set these are the features calibrated. Otherwise the transformed feature_columns are the ones calibrated. num_keypoints: Number of keypoints to use. Either a single int, or a dict mapping feature names to num_keypoints. If a value of the dict is 0 or None the correspondent feature won't be calibrated. feature_columns: Optional. If set to a set of FeatureColumns, these will be the features used and calibrated. keypoints_initializers: For evaluation or inference (or when resuming training from a checkpoint) the values will be loaded from disk, so they don't need to be given (leave it as None). Either a tuple of two tensors of shape [num_keypoints], or a dict mapping feature names to pair of tensors of shape [num_keypoints[feature_name]]. See load_keypoints_from_quantiles or uniform_keypoints_for_signal on how to generate these (module keypoints_initialization). keypoints_initializer_fns: Like keypoints_initializers but using lambda initializers. They should be compatible with tf.get_variable. If this is set, then keypoints_initializers must be None. bound: boolean whether output of calibration must be bound. Alternatively a dict mapping feature name to boundness. monotonic: whether calibration has to be kept monotonic: None or 0 means no monotonic. Positive or negative values mean increasing or decreasing monotonic respectively. Alternatively a dict mapping feature name to monotonic. missing_input_values: If set, and if the input has this value it is assumed to be missing and the output will either be calibrated to some value between `[calibration_output_min, calibration_output_max]` or set to a fixed value set by missing_output_value. Limitation: it only works for scalars. Either one value for all inputs, or a dict mapping feature name to missing_input_value for the respective feature. missing_output_values: Requires missing_input_value also to be set. If set if will convert missing input to this value. Either one value for all inputs, or a dict mapping feature name to missing_input_value for the respective feature. dtype: If any of the scalars are not given as tensors, they are converted to tensors with this dtype. **regularizer_amounts: Keyword args of regularization amounts passed to regularizers.calibrator_regularization(). Keyword names should be among supported regularizers.CALIBRATOR_REGULARIZERS and values should be either float or {feature_name: float}. If float, then same value is applied to all features. Returns: A tuple of: * calibrated tensor of shape [batch_size, sum(features dimensions)]. * list of the feature names in the order they feature in the calibrated tensor. A name may appear more than once if the feature is multi-dimension (for instance a multi-dimension embedding) * list of projection ops, that must be applied at each step (or every so many steps) to project the model to a feasible space: used for bounding the outputs or for imposing monotonicity. Empty if none are requested. * None or tensor with regularization loss. Raises: ValueError: if dtypes are incompatible. """ with ops.name_scope('input_calibration_layer'): feature_names = tools.get_sorted_feature_names(columns_to_tensors, feature_columns) num_keypoints = tools.cast_to_dict(num_keypoints, feature_names, 'num_keypoints') bound = tools.cast_to_dict(bound, feature_names, 'bound') monotonic = tools.cast_to_dict(monotonic, feature_names, 'monotonic') keypoints_initializers = tools.cast_to_dict( keypoints_initializers, feature_names, 'keypoints_initializers') keypoints_initializer_fns = tools.cast_to_dict( keypoints_initializer_fns, feature_names, 'keypoints_initializer_fns') missing_input_values = tools.cast_to_dict( missing_input_values, feature_names, 'missing_input_values') missing_output_values = tools.cast_to_dict( missing_output_values, feature_names, 'missing_output_values') regularizer_amounts = { regularizer_name: tools.cast_to_dict( regularizer_amounts[regularizer_name], feature_names, regularizer_name) for regularizer_name in regularizer_amounts } per_dimension_feature_names = [] # Get uncalibrated tensors, either from columns_to_tensors, or using # feature_columns. if feature_columns is None: uncalibrated_features = [ columns_to_tensors[name] for name in feature_names ] else: transformed_columns_to_tensors = columns_to_tensors.copy() dict_feature_columns = {f_col.name: f_col for f_col in feature_columns} uncalibrated_features = [ tools.input_from_feature_column(transformed_columns_to_tensors, dict_feature_columns[name], dtype) for name in feature_names ] projection_ops = [] calibrated_splits = [] total_regularization = None for feature_idx in range(len(feature_names)): name = feature_names[feature_idx] uncalibrated_feature = uncalibrated_features[feature_idx] if uncalibrated_feature.shape.ndims == 1: feature_dim = 1 uncalibrated_splits = [uncalibrated_feature] elif uncalibrated_feature.shape.ndims == 2: feature_dim = uncalibrated_feature.shape.dims[1].value uncalibrated_splits = array_ops.unstack(uncalibrated_feature, axis=1) else: raise ValueError( 'feature {}: it has rank {}, but only ranks 1 or 2 are ' 'supported; feature shape={}'.format( name, uncalibrated_feature.shape.ndims, uncalibrated_feature.shape)) missing_input_value = missing_input_values[name] missing_output_value = missing_output_values[name] feature_regularizer_amounts = { regularizer_name: regularizer_amounts[regularizer_name][name] for regularizer_name in regularizer_amounts } # FutureWork: make the interpolation ops handle multi-dimension values, # so this step is not needed. for dim_idx in range(feature_dim): per_dimension_feature_names += [name] split_name = name if feature_dim > 1: split_name = '{}_dim_{}'.format(name, dim_idx) uncalibrated = uncalibrated_splits[dim_idx] if not num_keypoints[name]: # No calibration for this feature: calibrated_splits += [uncalibrated] if (missing_input_value is not None or missing_output_value is not None): raise ValueError( 'feature %s: cannot handle missing values if feature is not ' 'calibrated, missing_input_value=%s, missing_output_value=%s' % (name, missing_input_value, missing_output_value)) else: calibrated, projection, reg = one_dimensional_calibration_layer( uncalibrated, num_keypoints[name], signal_name=split_name, keypoints_initializers=keypoints_initializers[name], keypoints_initializer_fns=keypoints_initializer_fns[name], bound=bound[name], monotonic=monotonic[name], missing_input_value=missing_input_value, missing_output_value=missing_output_value, **feature_regularizer_amounts) calibrated_splits += [calibrated] if projection is not None: projection_ops += [projection] total_regularization = tools.add_if_not_none(total_regularization, reg) all_calibrated = array_ops.stack( calibrated_splits, axis=1, name='stack_calibrated') return (all_calibrated, per_dimension_feature_names, projection_ops, total_regularization)
def attn_decoder(decoder_inputs, attention_states, encoder_state, cells, model_size, lstm_size, batch_size, embedding_size, num_symbols, loop_function=None, num_heads=1, initial_state_attention=False, output_size=None, attention=True, scope=None): # encoder size num_encoder_word = model_size['encoder']['h1'] num_encoder_sen = model_size['encoder']['h2'] # decoder size num_decoder_word = model_size['decoder']['h1'] num_decoder_sen = model_size['decoder']['h2'] outputs, attn_outputs = [], [] if output_size is None: output_size = cells["decoder_h1"].output_size with variable_scope.variable_scope(scope or "attention_decoder"): batch_size = array_ops.shape( decoder_inputs[0])[0] # Needed for reshaping. # print(attention_shapes.get_shape()) attn_length = attention_states.get_shape()[1].value attn_size = attention_states.get_shape()[2].value word_attn_size = num_encoder_word * num_encoder_sen # To calculate W1 * h_t we use a 1-by-1 convolution, need to reshape before. hidden = array_ops.reshape(attention_states, [-1, attn_length, 1, attn_size]) hidden_features = [] v = [] attention_vec_size = attn_size # Size of query vectors for attention. for a in range(num_heads): k = variable_scope.get_variable( "AttnW_%d" % a, [1, 1, attn_size, attention_vec_size]) # print(k.get_shape()) hidden_features.append( nn_ops.conv2d(hidden, k, [1, 1, 1, 1], "SAME")) v.append( variable_scope.get_variable("AttnV_%d" % a, [attention_vec_size])) def attention(state): """Put attention masks on hidden using hidden_features and query.""" if np.array(state).ndim > 1: concat_layers = [tf.concat([c, h], 1) for c, h in state] query = tf.concat(concat_layers, 1) else: query = tf.concat([state[0], state[1]], 1) ds, ass = [], [] # Results of attention reads will be stored here. for a in range(num_heads): with variable_scope.variable_scope("Attention_%d" % a): y = linear(query, attention_vec_size, True) y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size]) # Attention mask is a softmax of v^T * tanh(...). s = math_ops.reduce_sum( v[a] * math_ops.tanh(hidden_features[a] + y), [2, 3]) a = nn_ops.softmax(s) ass.append(a) # Now calculate the attention-weighted vector d. d = math_ops.reduce_sum( array_ops.reshape(a, [-1, attn_length, 1, 1]) * hidden, [1, 2]) ds.append(array_ops.reshape(d, [-1, attn_size])) return ds, ass batch_attn_size = array_ops.stack([batch_size, attn_size]) batch_word_attn_size = array_ops.stack([batch_size, word_attn_size]) attns = [ array_ops.zeros(batch_attn_size, dtype=dtypes.float32) for _ in range(num_heads) ] word_attns = [ array_ops.zeros(batch_attn_size, dtype=dtypes.float32) for _ in range(num_heads) ] for a in attns: # Ensure the second shape of attention vectors is set. a.set_shape([None, attn_size]) if initial_state_attention: attns, word_attns = attention(initial_state) prev = None sen_state = encoder_state decoder_word_idx = 0 for i in range(num_decoder_sen): if i > 0: variable_scope.get_variable_scope().reuse_variables() with tf.variable_scope(scope or "decode_words"): word_input, word_output = None, None word_state = cells["decoder_h1"].zero_state( batch_size, tf.float32) for t in range(num_decoder_word): if t > 0: variable_scope.get_variable_scope().reuse_variables() word_state = word_state if t else sen_state # If loop_function is set, we use it instead of decoder_inputs. if loop_function is not None and prev is not None: with variable_scope.variable_scope("loop_function", reuse=True): word_input = loop_function(prev, i) else: word_input = decoder_inputs[decoder_word_idx] decoder_word_idx += 1 x = linear([word_input] + attns, output_size, True) word_output, word_state = cells["decoder_h1"](x, word_state) if not i and initial_state_attention: with variable_scope.variable_scope( variable_scope.get_variable_scope(), reuse=True): attns, word_attns = attention(word_state) else: attns, word_attns = attention(word_state) with variable_scope.variable_scope("AttnOutputProjection"): output = linear([word_output] + attns, output_size, True) outputs.append(output) attn_outputs.append(word_attns) if loop_function is not None: prev = word_output _, sen_state = cells["decoder_h2"](word_output, sen_state) return outputs, sen_state, attn_outputs
def update_confusion_matrix_variables(variables_to_update, y_true, y_pred, thresholds, top_k=None, class_id=None, sample_weight=None, multi_label=False, label_weights=None): """Returns op to update the given confusion matrix variables. For every pair of values in y_true and y_pred: true_positive: y_true == True and y_pred > thresholds false_negatives: y_true == True and y_pred <= thresholds true_negatives: y_true == False and y_pred <= thresholds false_positive: y_true == False and y_pred > thresholds The results will be weighted and added together. When multiple thresholds are provided, we will repeat the same for every threshold. For estimation of these metrics over a stream of data, the function creates an `update_op` operation that updates the given variables. If `sample_weight` is `None`, weights default to 1. Use weights of 0 to mask values. Args: variables_to_update: Dictionary with 'tp', 'fn', 'tn', 'fp' as valid keys and corresponding variables to update as values. y_true: A `Tensor` whose shape matches `y_pred`. Will be cast to `bool`. y_pred: A floating point `Tensor` of arbitrary shape and whose values are in the range `[0, 1]`. thresholds: A float value, float tensor, python list, or tuple of float thresholds in `[0, 1]`, or NEG_INF (used when top_k is set). top_k: Optional int, indicates that the positive labels should be limited to the top k predictions. class_id: Optional int, limits the prediction and labels to the class specified by this argument. sample_weight: Optional `Tensor` whose rank is either 0, or the same rank as `y_true`, and must be broadcastable to `y_true` (i.e., all dimensions must be either `1`, or the same as the corresponding `y_true` dimension). multi_label: Optional boolean indicating whether multidimensional prediction/labels should be treated as multilabel responses, or flattened into a single label. When True, the valus of `variables_to_update` must have a second dimension equal to the number of labels in y_true and y_pred, and those tensors must not be RaggedTensors. label_weights: (optional) tensor of non-negative weights for multilabel data. The weights are applied when calculating TP, FP, FN, and TN without explicit multilabel handling (i.e. when the data is to be flattened). Returns: Update op. Raises: ValueError: If `y_pred` and `y_true` have mismatched shapes, or if `sample_weight` is not `None` and its shape doesn't match `y_pred`, or if `variables_to_update` contains invalid keys. """ if multi_label and label_weights is not None: raise ValueError('`label_weights` for multilabel data should be handled ' 'outside of `update_confusion_matrix_variables` when ' '`multi_label` is True.') if variables_to_update is None: return if not any( key for key in variables_to_update if key in list(ConfusionMatrix)): raise ValueError( 'Please provide at least one valid confusion matrix ' 'variable to update. Valid variable key options are: "{}". ' 'Received: "{}"'.format( list(ConfusionMatrix), variables_to_update.keys())) variable_dtype = list(variables_to_update.values())[0].dtype y_true = math_ops.cast(y_true, dtype=variable_dtype) y_pred = math_ops.cast(y_pred, dtype=variable_dtype) thresholds = ops.convert_to_tensor_v2(thresholds, dtype=variable_dtype) num_thresholds = thresholds.shape[0] if multi_label: one_thresh = math_ops.equal( math_ops.cast(1, dtype=dtypes.int32), array_ops.rank(thresholds), name='one_set_of_thresholds_cond') else: [y_pred, y_true], _ = ragged_assert_compatible_and_get_flat_values([y_pred, y_true], sample_weight) one_thresh = math_ops.cast(True, dtype=dtypes.bool) invalid_keys = [ key for key in variables_to_update if key not in list(ConfusionMatrix) ] if invalid_keys: raise ValueError( 'Invalid keys: {}. Valid variable key options are: "{}"'.format( invalid_keys, list(ConfusionMatrix))) with ops.control_dependencies([ check_ops.assert_greater_equal( y_pred, math_ops.cast(0.0, dtype=y_pred.dtype), message='predictions must be >= 0'), check_ops.assert_less_equal( y_pred, math_ops.cast(1.0, dtype=y_pred.dtype), message='predictions must be <= 1') ]): if sample_weight is None: y_pred, y_true = losses_utils.squeeze_or_expand_dimensions( y_pred, y_true) else: sample_weight = math_ops.cast(sample_weight, dtype=variable_dtype) y_pred, y_true, sample_weight = ( losses_utils.squeeze_or_expand_dimensions( y_pred, y_true, sample_weight=sample_weight)) y_pred.shape.assert_is_compatible_with(y_true.shape) if top_k is not None: y_pred = _filter_top_k(y_pred, top_k) if class_id is not None: y_true = y_true[..., class_id] y_pred = y_pred[..., class_id] pred_shape = array_ops.shape(y_pred) num_predictions = pred_shape[0] if y_pred.shape.ndims == 1: num_labels = 1 else: num_labels = gen_math_ops.Prod(input=pred_shape[1:], axis=0) thresh_label_tile = control_flow_ops.cond( one_thresh, lambda: num_labels, lambda: math_ops.cast(1, dtype=dtypes.int32)) # Reshape predictions and labels, adding a dim for thresholding. if multi_label: predictions_extra_dim = array_ops.expand_dims(y_pred, 0) labels_extra_dim = array_ops.expand_dims( math_ops.cast(y_true, dtype=dtypes.bool), 0) else: # Flatten predictions and labels when not multilabel. predictions_extra_dim = array_ops.reshape(y_pred, [1, -1]) labels_extra_dim = array_ops.reshape( math_ops.cast(y_true, dtype=dtypes.bool), [1, -1]) # Tile the thresholds for every prediction. if multi_label: thresh_pretile_shape = [num_thresholds, 1, -1] thresh_tiles = [1, num_predictions, thresh_label_tile] data_tiles = [num_thresholds, 1, 1] else: thresh_pretile_shape = [num_thresholds, -1] thresh_tiles = [1, num_predictions * num_labels] data_tiles = [num_thresholds, 1] thresh_tiled = array_ops.tile( array_ops.reshape(thresholds, thresh_pretile_shape), array_ops.stack(thresh_tiles)) # Tile the predictions for every threshold. preds_tiled = array_ops.tile(predictions_extra_dim, data_tiles) # Compare predictions and threshold. pred_is_pos = math_ops.greater(preds_tiled, thresh_tiled) # Tile labels by number of thresholds label_is_pos = array_ops.tile(labels_extra_dim, data_tiles) if sample_weight is not None: sample_weight = weights_broadcast_ops.broadcast_weights( math_ops.cast(sample_weight, dtype=variable_dtype), y_pred) weights_tiled = array_ops.tile( array_ops.reshape(sample_weight, thresh_tiles), data_tiles) else: weights_tiled = None if label_weights is not None and not multi_label: label_weights = array_ops.expand_dims(label_weights, 0) label_weights = weights_broadcast_ops.broadcast_weights(label_weights, y_pred) label_weights_tiled = array_ops.tile( array_ops.reshape(label_weights, thresh_tiles), data_tiles) if weights_tiled is None: weights_tiled = label_weights_tiled else: weights_tiled = math_ops.multiply(weights_tiled, label_weights_tiled) update_ops = [] def weighted_assign_add(label, pred, weights, var): label_and_pred = math_ops.cast( math_ops.logical_and(label, pred), dtype=var.dtype) if weights is not None: label_and_pred *= math_ops.cast(weights, dtype=var.dtype) return var.assign_add(math_ops.reduce_sum(label_and_pred, 1)) loop_vars = { ConfusionMatrix.TRUE_POSITIVES: (label_is_pos, pred_is_pos), } update_tn = ConfusionMatrix.TRUE_NEGATIVES in variables_to_update update_fp = ConfusionMatrix.FALSE_POSITIVES in variables_to_update update_fn = ConfusionMatrix.FALSE_NEGATIVES in variables_to_update if update_fn or update_tn: pred_is_neg = math_ops.logical_not(pred_is_pos) loop_vars[ConfusionMatrix.FALSE_NEGATIVES] = (label_is_pos, pred_is_neg) if update_fp or update_tn: label_is_neg = math_ops.logical_not(label_is_pos) loop_vars[ConfusionMatrix.FALSE_POSITIVES] = (label_is_neg, pred_is_pos) if update_tn: loop_vars[ConfusionMatrix.TRUE_NEGATIVES] = (label_is_neg, pred_is_neg) for matrix_cond, (label, pred) in loop_vars.items(): if matrix_cond in variables_to_update: update_ops.append( weighted_assign_add(label, pred, weights_tiled, variables_to_update[matrix_cond])) return control_flow_ops.group(update_ops)
def Bar(x): return array_ops.stack([x])
def _get_pixel(image, _y, _x): b, _h, _w = image.get_shape().as_list()[0:-1] batch_idx = array_ops.reshape(math_ops.range(b), shape=(b, 1, 1)) batch_idx = array_ops.tile(batch_idx, multiples=(1, _h - 1, _w - 1)) indices = array_ops.stack([batch_idx, _y, _x], axis=3) return array_ops.gather_nd(image, indices)
def _broadcast_to_ragged_shape(rt_input, dst_shape, broadcast_inner_dimensions): """Broadcasts rt_input to the ragged shape `dst_shape`.""" # Check that rt_input and dst_shape have the same row_splits dtype. if (isinstance(rt_input, ragged_tensor.RaggedTensor) and rt_input.row_splits.dtype != dst_shape.dim_size_dtype): if not ragged_config.auto_cast_partition_dtype(): raise ValueError( 'rt_input and dst_shape have different row_split ' 'dtypes; use RaggedTensor.with_row_splits_dtype() or ' 'RaggedTensorDynamicShape.with_dim_size_dtype() to ' 'convert to a compatible dtype.') rt_input = rt_input.with_row_splits_dtype(dtypes.int64) dst_shape = dst_shape.with_dim_size_dtype(dtypes.int64) # dst_shape's rank and ragged_rank must be greater than or equal to rt_input's if rt_input.shape.ndims is None or dst_shape.rank is None: raise ValueError('Unable to broadcast: unknown rank') if rt_input.shape.ndims > dst_shape.rank: raise ValueError('Incompatible with shape: rank mismatch') if (isinstance(rt_input, ragged_tensor.RaggedTensor) and rt_input.ragged_rank >= dst_shape.num_partitioned_dimensions): raise ValueError('Incompatible with shape: ragged rank mismatch') src_shape = RaggedTensorDynamicShape.from_tensor(rt_input) src_shape = src_shape.broadcast_to_rank(dst_shape.rank) # Add dimensions to rt_input so its rank and ragged_rank matches dst_shape. if dst_shape.rank > rt_input.shape.ndims: if rt_input.shape.ndims < dst_shape.num_inner_dimensions + 1: rt_input = array_ops.reshape( rt_input, array_ops.concat([[-1], dst_shape.inner_dim_sizes], axis=0)) for _ in range(dst_shape.rank - rt_input.shape.ndims): if ragged_tensor.is_ragged(rt_input): nrows = rt_input.nrows() else: nrows = array_ops.shape(rt_input, out_type=dst_shape.dim_size_dtype)[0] rt_input = ragged_tensor.RaggedTensor.from_row_lengths( rt_input, [nrows], validate=False) # Add ragged dimensions to match dst_shape. if ragged_tensor.is_ragged(rt_input): inner_rank_diff = (rt_input.flat_values.shape.ndims - 1 - dst_shape.num_inner_dimensions) if inner_rank_diff > 0: rt_input = rt_input.with_flat_values( ragged_tensor.RaggedTensor.from_tensor( rt_input.flat_values, ragged_rank=inner_rank_diff, row_splits_dtype=dst_shape.dim_size_dtype)) else: rt_input = ragged_tensor.RaggedTensor.from_tensor( rt_input, ragged_rank=dst_shape.num_partitioned_dimensions - 1, row_splits_dtype=dst_shape.dim_size_dtype) # Do broadcasting for any dimensions that will remain uniform. We can do # these all at once, since they're independent of one another. multiples = [1] * dst_shape.rank for axis in range(dst_shape.num_partitioned_dimensions): if not src_shape.is_ragged(axis) and not dst_shape.is_ragged(axis): src_size = src_shape.dimension_size(axis) dst_size = dst_shape.dimension_size(axis) if ((tensor_util.constant_value(src_size) in (1, None)) and (tensor_util.constant_value(dst_size) != 1)): multiples[axis] = array_ops.where(math_ops.equal(src_size, 1), dst_size, 1) if not all(isinstance(v, int) and v == 1 for v in multiples): multiples = array_ops.stack(multiples, axis=0) rt_input = ragged_array_ops.tile(rt_input, multiples) if broadcast_inner_dimensions: new_shape = array_ops.broadcast_dynamic_shape( array_ops.shape(rt_input.flat_values, out_type=dst_shape.dim_size_dtype), array_ops.concat([[1], dst_shape.inner_dim_sizes], axis=0)) rt_input = rt_input.with_flat_values( array_ops.broadcast_to(rt_input.flat_values, new_shape)) # Do broadcasting for dimensions that become ragged. We must do these from # outermost to innermost. for axis in range(dst_shape.num_partitioned_dimensions): if not src_shape.is_ragged(axis) and dst_shape.is_ragged(axis): dst_size = dst_shape.dimension_size(axis) rt_input = _ragged_tile_axis(rt_input, axis, dst_size, dst_shape.dim_size_dtype) return rt_input
def loop_fn(i): x1 = array_ops.gather(x, i) return array_ops.stack([x1, y], axis=-1)
def stack(self, name=None): """See TensorArray.""" if self._tensor_array: for ix in range(len(self._tensor_array)): self._maybe_zero(ix) return array_ops.stack(self._tensor_array, name=name)
def hessians(ys, xs, name="hessians", colocate_gradients_with_ops=False, gate_gradients=False, aggregation_method=None): """Constructs the Hessian of sum of `ys` with respect to `x` in `xs`. `hessians()` adds ops to the graph to output the Hessian matrix of `ys` with respect to `xs`. It returns a list of `Tensor` of length `len(xs)` where each tensor is the Hessian of `sum(ys)`. This function currently only supports evaluating the Hessian with respect to (a list of) one- dimensional tensors. The Hessian is a matrix of second-order partial derivatives of a scalar tensor (see https://en.wikipedia.org/wiki/Hessian_matrix for more details). Args: ys: A `Tensor` or list of tensors to be differentiated. xs: A `Tensor` or list of tensors to be used for differentiation. name: Optional name to use for grouping all the gradient ops together. defaults to 'hessians'. colocate_gradients_with_ops: See `gradients()` documentation for details. gate_gradients: See `gradients()` documentation for details. aggregation_method: See `gradients()` documentation for details. Returns: A list of Hessian matrices of `sum(y)` for each `x` in `xs`. Raises: LookupError: if one of the operations between `xs` and `ys` does not have a registered gradient function. ValueError: if the arguments are invalid or not supported. Currently, this function only supports one-dimensional `x` in `xs`. """ xs = _AsList(xs) kwargs = { 'colocate_gradients_with_ops': colocate_gradients_with_ops, 'gate_gradients': gate_gradients, 'aggregation_method': aggregation_method } # Compute a hessian matrix for each x in xs hessians = [] for i, x in enumerate(xs): # Check dimensions ndims = x.get_shape().ndims if ndims is None: raise ValueError('Cannot compute Hessian because the dimensionality of ' 'element number %d of `xs` cannot be determined' % i) elif ndims != 1: raise ValueError('Computing hessians is currently only supported for ' 'one-dimensional tensors. Element number %d of `xs` has ' '%d dimensions.' % (i, ndims)) with ops.name_scope(name + '_first_derivative'): # Compute the partial derivatives of the input with respect to all # elements of `x` _gradients = gradients(ys, x, **kwargs)[0] # Unpack the gradients into a list so we can take derivatives with # respect to each element _gradients = array_ops.unstack(_gradients) with ops.name_scope(name + '_second_derivative'): # Compute the partial derivatives with respect to each element of the list _hess = [gradients(_gradient, x, **kwargs)[0] for _gradient in _gradients] # Pack the list into a matrix and add to the list of hessians hessians.append(array_ops.stack(_hess, name=name)) return hessians
def gather(self, indices, name=None): """See TensorArray.""" del name # not meaningful in Eager mode return array_ops.stack([self._maybe_zero(i) for i in indices.numpy()])
def testLSTMFusedSequenceLengths(self): """Verify proper support for sequence lengths in LSTMBlockFusedCell.""" with self.session(use_gpu=True) as sess: batch_size = 3 input_size = 4 cell_size = 5 max_sequence_length = 6 inputs = [] for _ in range(max_sequence_length): inp = ops.convert_to_tensor( np.random.randn(batch_size, input_size), dtype=dtypes.float32) inputs.append(inp) seq_lengths = constant_op.constant([3, 4, 5]) cell_inputs = array_ops.stack(inputs) initializer = init_ops.random_uniform_initializer( -0.01, 0.01, seed=19890213) with variable_scope.variable_scope("lstm_cell", initializer=initializer): # magic naming so that the cells pick up these variables and reuse them variable_scope.get_variable( "kernel", shape=[input_size + cell_size, cell_size * 4], dtype=dtypes.float32) variable_scope.get_variable( "bias", shape=[cell_size * 4], dtype=dtypes.float32, initializer=init_ops.zeros_initializer()) cell = lstm_ops.LSTMBlockFusedCell( cell_size, cell_clip=0, use_peephole=False, reuse=True, name="lstm_cell") fused_outputs_op, fused_state_op = cell( cell_inputs, dtype=dtypes.float32, sequence_length=seq_lengths) cell_vars = [ v for v in variables.trainable_variables() if v.name.endswith("kernel") or v.name.endswith("bias") ] # Verify that state propagation works if we turn our sequence into # tiny (single-time) subsequences, i.e. unfuse the cell unfused_outputs_op = [] state = None with variable_scope.variable_scope( variable_scope.get_variable_scope(), reuse=True): for i, inp in enumerate(inputs): lengths = [int(i < l) for l in seq_lengths.eval()] output, state = cell( array_ops.expand_dims(inp, 0), initial_state=state, dtype=dtypes.float32, sequence_length=lengths) unfused_outputs_op.append(output[0]) unfused_outputs_op = array_ops.stack(unfused_outputs_op) sess.run([variables.global_variables_initializer()]) unfused_outputs, unfused_state = sess.run([unfused_outputs_op, state[0]]) unfused_grads = sess.run( gradients_impl.gradients(unfused_outputs_op, inputs)) unfused_wgrads = sess.run( gradients_impl.gradients(unfused_outputs_op, cell_vars)) fused_outputs, fused_state = sess.run( [fused_outputs_op, fused_state_op[0]]) fused_grads = sess.run(gradients_impl.gradients(fused_outputs_op, inputs)) fused_wgrads = sess.run( gradients_impl.gradients(fused_outputs_op, cell_vars)) self.assertAllClose(fused_outputs, unfused_outputs) self.assertAllClose(fused_state, unfused_state) self.assertAllClose(fused_grads, unfused_grads) for fused, unfused in zip(fused_wgrads, unfused_wgrads): self.assertAllClose(fused, unfused, rtol=1e-6, atol=1e-6)
def stack_dynamic_partitions(data, partitions, num_partitions, name=None): """Stacks dynamic partitions of a Tensor or RaggedTensor. Returns a RaggedTensor `output` with `num_partitions` rows, where the row `output[i]` is formed by stacking all slices `data[j1...jN]` such that `partitions[j1...jN] = i`. Slices of `data` are stacked in row-major order. If `num_partitions` is an `int` (not a `Tensor`), then this is equivalent to `tf.ragged.stack(tf.dynamic_partition(data, partitions, num_partitions))`. #### Example: >>> data = ['a', 'b', 'c', 'd', 'e'] >>> partitions = [ 3, 0, 2, 2, 3] >>> num_partitions = 5 >>> tf.ragged.stack_dynamic_partitions(data, partitions, num_partitions) <tf.RaggedTensor [[b'b'], [], [b'c', b'd'], [b'a', b'e'], []]> Args: data: A `Tensor` or `RaggedTensor` containing the values to stack. partitions: An `int32` or `int64` `Tensor` or `RaggedTensor` specifying the partition that each slice of `data` should be added to. `partitions.shape` must be a prefix of `data.shape`. Values must be greater than or equal to zero, and less than `num_partitions`. `partitions` is not required to be sorted. num_partitions: An `int32` or `int64` scalar specifying the number of partitions to output. This determines the number of rows in `output`. name: A name prefix for the returned tensor (optional). Returns: A `RaggedTensor` containing the stacked partitions. The returned tensor has the same dtype as `data`, and its shape is `[num_partitions, (D)] + data.shape[partitions.rank:]`, where `(D)` is a ragged dimension whose length is the number of data slices stacked for each `partition`. """ with ops.name_scope(name, 'SegmentStack', [data, partitions, num_partitions]): # Convert inputs to tensors. data = ragged_tensor.convert_to_tensor_or_ragged_tensor(data, name='data') row_splits_dtype = (data.row_splits.dtype if isinstance( data, ragged_tensor.RaggedTensor) else None) partitions = ragged_tensor.convert_to_tensor_or_ragged_tensor( partitions, name='partitions', preferred_dtype=row_splits_dtype) num_partitions = ops.convert_to_tensor( num_partitions, name='num_partitions', preferred_dtype=partitions.dtype) if row_splits_dtype is not None: partitions = math_ops.cast(partitions, row_splits_dtype) num_partitions = math_ops.cast(num_partitions, partitions.dtype) # Sanity-checks for shapes. partitions_rank = partitions.shape.ndims if partitions_rank is None: raise ValueError('partitions must have known rank.') num_partitions.shape.assert_has_rank(0) partitions.shape.assert_is_compatible_with( data.shape[:partitions_rank]) if partitions_rank == 0: # If partitions is a scalar, then just create a RaggedTensor containing # that single the complete `data` value in the specified row. return ragged_tensor.RaggedTensor.from_value_rowids( values=array_ops.stack([data]), value_rowids=array_ops.stack([partitions]), nrows=num_partitions, validate=False) elif partitions_rank == 1: # If partitions is a vector (the typical case): we can just use data and # partitions as the `values` and `value_rowids` for `from_value_rowids`, # as long as we sort them first. permutation = sort_ops.argsort(partitions, stable=True) value_rowids = array_ops.gather(partitions, permutation) values = array_ops.gather(data, permutation) check = check_ops.assert_less( value_rowids[-1:], num_partitions, message='partitions must be less than num_partitions') with ops.control_dependencies([check]): return ragged_tensor.RaggedTensor.from_value_rowids( values, value_rowids, nrows=num_partitions, validate=False) else: # Handle higher-dimensional partitions via recursion. if not isinstance(data, ragged_tensor.RaggedTensor): data = ragged_tensor.RaggedTensor.from_tensor( data, row_splits_dtype=partitions.dtype, ragged_rank=1) if not isinstance(partitions, ragged_tensor.RaggedTensor): partitions = ragged_tensor.RaggedTensor.from_tensor( partitions, row_splits_dtype=partitions.dtype, ragged_rank=max(data.ragged_rank, partitions_rank - 1)) check = check_ops.assert_equal( data.row_splits, partitions.row_splits, message='data and partitions have incompatible ragged shapes') with ops.control_dependencies([check]): return stack_dynamic_partitions(data.values, partitions.values, num_partitions)
def blocks_match(sess, use_peephole, dtype=dtypes.float32, cell_clip=None): batch_size = 2 input_size = 3 cell_size = 4 sequence_length = 4 inputs = [] for _ in range(sequence_length): inp = ops.convert_to_tensor( np.random.randn(batch_size, input_size), dtype=dtype) inputs.append(inp) stacked_inputs = array_ops.stack(inputs) init_bound = 1e-1 if dtype == dtypes.float16 else 1e-2 initializer = _get_initializer(init_bound, dtype=dtype, seed=19890212) with variable_scope.variable_scope("test", initializer=initializer): # magic naming so that the cells pick up these variables and reuse them if use_peephole: wci = variable_scope.get_variable( "rnn/lstm_cell/w_i_diag", shape=[cell_size], dtype=dtype) wcf = variable_scope.get_variable( "rnn/lstm_cell/w_f_diag", shape=[cell_size], dtype=dtype) wco = variable_scope.get_variable( "rnn/lstm_cell/w_o_diag", shape=[cell_size], dtype=dtype) w = variable_scope.get_variable( "rnn/lstm_cell/kernel", shape=[input_size + cell_size, cell_size * 4], dtype=dtype) b = variable_scope.get_variable( "rnn/lstm_cell/bias", shape=[cell_size * 4], dtype=dtype, initializer=init_ops.zeros_initializer()) basic_cell = rnn_cell.LSTMCell( cell_size, use_peepholes=use_peephole, cell_clip=cell_clip, dtype=dtype, state_is_tuple=True, reuse=True) basic_outputs_op, basic_state_op = rnn.static_rnn( basic_cell, inputs, dtype=dtype) if use_peephole: _, _, _, _, _, _, block_outputs_op = block_lstm( ops.convert_to_tensor(sequence_length, dtype=dtypes.int64), inputs, w, b, wci=wci, wcf=wcf, wco=wco, cell_clip=cell_clip, use_peephole=True) else: _, _, _, _, _, _, block_outputs_op = block_lstm( ops.convert_to_tensor(sequence_length, dtype=dtypes.int64), inputs, w, b, cell_clip=cell_clip) fused_cell = lstm_ops.LSTMBlockFusedCell( cell_size, cell_clip=cell_clip, use_peephole=use_peephole, reuse=True, name="rnn/lstm_cell") fused_outputs_op, fused_state_op = fused_cell(stacked_inputs, dtype=dtype) sess.run([variables.global_variables_initializer()]) basic_outputs, basic_state = sess.run([basic_outputs_op, basic_state_op[0]]) basic_grads = sess.run(gradients_impl.gradients(basic_outputs_op, inputs)) xs = [w, b] if use_peephole: xs += [wci, wcf, wco] basic_wgrads = sess.run(gradients_impl.gradients(basic_outputs_op, xs)) block_outputs = sess.run(block_outputs_op) block_grads = sess.run(gradients_impl.gradients(block_outputs_op, inputs)) block_wgrads = sess.run(gradients_impl.gradients(block_outputs_op, xs)) xs = [w, b] if use_peephole: xs += [wci, wcf, wco] fused_outputs, fused_state = sess.run([fused_outputs_op, fused_state_op[0]]) fused_grads = sess.run(gradients_impl.gradients(fused_outputs_op, inputs)) fused_wgrads = sess.run(gradients_impl.gradients(fused_outputs_op, xs)) return (basic_state, fused_state, basic_outputs, block_outputs, fused_outputs, basic_grads, block_grads, fused_grads, basic_wgrads, block_wgrads, fused_wgrads)
def _ConcatGradHelper(op, grad, start_value_index, end_value_index, dim_index): """Gradient for concat op. Args: op: An operation. grad: `Tensor` or `IndexedSlices` representing the gradients with respect to each output of the op. start_value_index: An integer index of the first value in the op.inputs. end_value_index: An integer index of the last value in the op.inputs. dim_index: An interger index of concat_dim or axis parameter in op.inputs. Returns: Tensors represending the partial gradients with respect to each input of the op. Raises: ValueError: if concat_dim/axis is not statically known. """ def _CreateDenseMaskAndBegin(sizes, concat_dim): """Create variables for iteratively slicing a dense gradients tensor.""" # Since shape is 1-D, shape_of_shape = [rank-of-inputs] shape_of_shape = array_ops.shape(sizes[0]) # Make a vector of length equal to the input's dimensions, # with 0's everywhere and 1 in the concat dim position. # Note: Can't use sparse_to_dense since it isn't GPU-capable (for now) mask = array_ops.concat([ array_ops.fill(array_ops.expand_dims(concat_dim, 0), 0), [1], array_ops.fill(shape_of_shape - concat_dim - 1, 0) ], 0) begin = array_ops.fill(shape_of_shape, 0) return mask, begin def _ExtractInputShapes(inputs): """Extract the shapes of a set of input tensors.""" sizes = [] fully_known = True for x in inputs: input_shape = array_ops.shape(x) if not isinstance(input_shape, ops.Tensor) or input_shape.op.type != "Const": fully_known = False break else: sizes.append(input_shape) if fully_known: return sizes else: return array_ops.shape_n(inputs) # Degenerate concatenation, just return grad. if len(op.inputs) == 2: return grad + [None] if end_value_index <= dim_index else [None] + grad concat_dim = op.inputs[dim_index] input_values = op.inputs[start_value_index:end_value_index] # Using mod here for convenience since concat_dim is already verified # in concat implementation to be within the allowed [-rank, rank) range. non_neg_concat_dim = concat_dim % array_ops.rank(input_values[0]) out_grads = [] if isinstance(grad, ops.Tensor): # Get the inputs' tensor shapes sizes = _ExtractInputShapes(input_values) # The magic number of 16 was found through benchmarking a range of sizes # on CPUs and a Maxwell TitanX. A speedup was seen in a large majority of # cases when switching implementations at N=16, but it is possible that # there will be a small number of performance regressions. # pylint: disable=protected-access if len(sizes) > 16: # extract the size of each input along the concat dimension sizes = array_ops.squeeze( array_ops.slice(array_ops.stack(sizes, axis=1), [non_neg_concat_dim, 0], [1, -1])) out_grads = array_ops.split(grad, sizes, non_neg_concat_dim) else: offset = gen_array_ops._concat_offset(non_neg_concat_dim, sizes) for (begin, size) in zip(offset, sizes): out_grads.append(array_ops.slice(grad, begin, size)) # pylint: enable=protected-access elif isinstance(grad, ops.IndexedSlices): concat_dim_static = tensor_util.constant_value(concat_dim) if concat_dim_static is None: raise ValueError("Can only compute IndexedSlices gradient with " "statically-known concat_dim") if concat_dim_static < 0: rank = tensor_util.constant_value(array_ops.rank(input_values[0])) if rank is None: raise ValueError( "Can only compute IndexedSlices gradient with " "negative concat_dim when first value rank is " "statically-known.") concat_dim_static %= rank # Get the inputs' tensor shapes sizes = [array_ops.shape(x) for x in input_values] if concat_dim_static > 0: # IndexedSlices, non_neg_concat_dim > 0. Each input gets IndexedSlices # gradients with all the indices, but with grad.values sliced accordingly. # This is like the Tensor case, except shape(grad.values)[0] is not equal # to shape(sizes[i])[0], since only a subset of the dim-0 values are # stored. mask, begin = _CreateDenseMaskAndBegin(sizes, non_neg_concat_dim) for size in sizes: new_values = array_ops.slice( grad.values, begin, array_ops.concat( [[-1], array_ops.slice(size, [1], [-1])], 0)) out_grads.append( ops.IndexedSlices(new_values, grad.indices, size)) # Lint complains begin = begin + ... begin = math_ops.add(begin, size * mask) else: # IndexedSlices, concat_dim == 0. Each input gets IndexedSlices gradients # only for the relevant indices. start = constant_op.constant(0, dtype=grad.indices.dtype) for size in sizes: size_concat_dim = array_ops.gather(size, non_neg_concat_dim) if size_concat_dim.dtype != grad.indices.dtype: size_concat_dim = math_ops.cast(size_concat_dim, dtype=grad.indices.dtype) end = start + size_concat_dim # Compute the 1-D Tensor of indices relevant for this input. indices_to_select = array_ops.squeeze(array_ops.where( math_ops.logical_and(grad.indices >= start, grad.indices < end)), squeeze_dims=[1]) new_indices = array_ops.gather(grad.indices, indices_to_select) - start new_values = array_ops.gather(grad.values, indices_to_select) out_grads.append( ops.IndexedSlices(new_values, new_indices, size)) start = end else: raise TypeError("Expected Tensor or IndexedSlices, got %s" % type(grad)) return (out_grads + [None] if end_value_index <= dim_index else [None] + out_grads)
def frechet_classifier_distance(real_images, generated_images, classifier_fn, num_batches=1): """Classifier distance for evaluating a generative model. This is based on the Frechet Inception distance, but for an arbitrary classifier. This technique is described in detail in https://arxiv.org/abs/1706.08500. Given two Gaussian distribution with means m and m_w and covariance matrices C and C_w, this function calculates |m - m_w|^2 + Tr(C + C_w - 2(C * C_w)^(1/2)) which captures how different the distributions of real images and generated images (or more accurately, their visual features) are. Note that unlike the Inception score, this is a true distance and utilizes information about real world images. Note that when computed using sample means and sample covariance matrices, Frechet distance is biased. It is more biased for small sample sizes. (e.g. even if the two distributions are the same, for a small sample size, the expected Frechet distance is large). It is important to use the same sample size to compute Frechet classifier distance when comparing two generative models. NOTE: This function consumes images, computes their activations, and then computes the classifier score. If you would like to precompute many activations for real and generated images for large batches, please use frechet_clasifier_distance_from_activations(), which this method also uses. Args: real_images: Real images to use to compute Frechet Inception distance. generated_images: Generated images to use to compute Frechet Inception distance. classifier_fn: A function that takes images and produces activations based on a classifier. num_batches: Number of batches to split images in to in order to efficiently run them through the classifier network. Returns: The Frechet Inception distance. A floating-point scalar of the same type as the output of `classifier_fn`. """ real_images_list = array_ops.split( real_images, num_or_size_splits=num_batches) generated_images_list = array_ops.split( generated_images, num_or_size_splits=num_batches) real_imgs = array_ops.stack(real_images_list) generated_imgs = array_ops.stack(generated_images_list) # Compute the activations using the memory-efficient `map_fn`. def compute_activations(elems): return functional_ops.map_fn(fn=classifier_fn, elems=elems, parallel_iterations=1, back_prop=False, swap_memory=True, name='RunClassifier') real_a = compute_activations(real_imgs) gen_a = compute_activations(generated_imgs) # Ensure the activations have the right shapes. real_a = array_ops.concat(array_ops.unstack(real_a), 0) gen_a = array_ops.concat(array_ops.unstack(gen_a), 0) return frechet_classifier_distance_from_activations(real_a, gen_a)
def safe_embedding_lookup_sparse(embedding_weights, sparse_ids, sparse_weights=None, combiner=None, default_id=None, name=None, partition_strategy="div", max_norm=None): """Lookup embedding results, accounting for invalid IDs and empty features. The partitioned embedding in `embedding_weights` must all be the same shape except for the first dimension. The first dimension is allowed to vary as the vocabulary size is not necessarily a multiple of `P`. `embedding_weights` may be a `PartitionedVariable` as returned by using `tf.get_variable()` with a partitioner. Invalid IDs (< 0) are pruned from input IDs and weights, as well as any IDs with non-positive weight. For an entry with no features, the embedding vector for `default_id` is returned, or the 0-vector if `default_id` is not supplied. The ids and weights may be multi-dimensional. Embeddings are always aggregated along the last dimension. Args: embedding_weights: A list of `P` float tensors or values representing partitioned embedding tensors. Alternatively, a `PartitionedVariable`, created by partitioning along dimension 0. The total unpartitioned shape should be `[e_0, e_1, ..., e_m]`, where `e_0` represents the vocab size and `e_1, ..., e_m` are the embedding dimensions. sparse_ids: `SparseTensor` of shape `[d_0, d_1, ..., d_n]` containing the ids. `d_0` is typically batch size. sparse_weights: `SparseTensor` of same shape as `sparse_ids`, containing float weights corresponding to `sparse_ids`, or `None` if all weights are be assumed to be 1.0. combiner: A string specifying how to combine embedding results for each entry. Currently "mean", "sqrtn" and "sum" are supported, with "mean" the default. default_id: The id to use for an entry with no features. name: A name for this operation (optional). partition_strategy: A string specifying the partitioning strategy. Currently `"div"` and `"mod"` are supported. Default is `"div"`. max_norm: If not None, all embeddings are l2-normalized to max_norm before combining. Returns: Dense tensor of shape `[d_0, d_1, ..., d_{n-1}, e_1, ..., e_m]`. Raises: ValueError: if `embedding_weights` is empty. """ if combiner is None: logging.warn("The default value of combiner will change from \"mean\" " "to \"sqrtn\" after 2016/11/01.") combiner = "mean" if embedding_weights is None: raise ValueError("Missing embedding_weights %s." % embedding_weights) if isinstance(embedding_weights, variables.PartitionedVariable): embedding_weights = list( embedding_weights) # get underlying Variables. if not isinstance(embedding_weights, list): embedding_weights = [embedding_weights] if len(embedding_weights) < 1: raise ValueError("Missing embedding_weights %s." % embedding_weights) dtype = sparse_weights.dtype if sparse_weights is not None else None if isinstance(embedding_weights, variables.PartitionedVariable): embedding_weights = list(embedding_weights) embedding_weights = [ ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights ] contrib_tensor_util.assert_same_float_dtype(embedding_weights + [sparse_weights]) with ops.name_scope(name, "embedding_lookup", embedding_weights + [sparse_ids, sparse_weights]) as scope: # Reshape higher-rank sparse ids and weights to linear segment ids. original_shape = sparse_ids.dense_shape original_rank_dim = sparse_ids.dense_shape.get_shape()[0] original_rank = (array_ops.size(original_shape) if original_rank_dim.value is None else original_rank_dim.value) sparse_ids = sparse_ops.sparse_reshape(sparse_ids, [ math_ops.reduce_prod( array_ops.slice(original_shape, [0], [original_rank - 1])), array_ops.gather(original_shape, original_rank - 1) ]) if sparse_weights is not None: sparse_weights = sparse_tensor.SparseTensor( sparse_ids.indices, sparse_weights.values, sparse_ids.dense_shape) # Prune invalid ids and weights. sparse_ids, sparse_weights = _prune_invalid_ids( sparse_ids, sparse_weights) # Fill in dummy values for empty features, if necessary. sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows( sparse_ids, default_id or 0) if sparse_weights is not None: sparse_weights, _ = sparse_ops.sparse_fill_empty_rows( sparse_weights, 1.0) result = embedding_ops.embedding_lookup_sparse( embedding_weights, sparse_ids, sparse_weights, combiner=combiner, partition_strategy=partition_strategy, name=None if default_id is None else scope, max_norm=max_norm) if default_id is None: # Broadcast is_row_empty to the same shape as embedding_lookup_result, # for use in Select. is_row_empty = array_ops.tile( array_ops.reshape(is_row_empty, [-1, 1]), array_ops.stack([1, array_ops.shape(result)[1]])) result = array_ops.where(is_row_empty, array_ops.zeros_like(result), result, name=scope) # Reshape back from linear ids back into higher-dimensional dense result. final_result = array_ops.reshape( result, array_ops.concat([ array_ops.slice(math_ops.cast(original_shape, dtypes.int32), [0], [original_rank - 1]), array_ops.slice(array_ops.shape(result), [1], [-1]) ], 0)) final_result.set_shape( tensor_shape.unknown_shape( (original_rank_dim - 1).value).concatenate( result.get_shape()[1:])) return final_result