def frames(signal, frame_length, frame_step, name=None): """Frame a signal into overlapping frames. May be used in front of spectral functions. For example: ```python pcm = tf.placeholder(tf.float32, [None, 9152]) frames = tf.contrib.signal.frames(pcm, 512, 180) magspec = tf.abs(tf.spectral.rfft(frames, [512])) image = tf.expand_dims(magspec, 3) ``` Args: signal: A `Tensor` of shape `[batch_size, signal_length]`. frame_length: An `int32` or `int64` `Tensor`. The length of each frame. frame_step: An `int32` or `int64` `Tensor`. The step between frames. name: A name for the operation (optional). Returns: A `Tensor` of frames with shape `[batch_size, num_frames, frame_length]`. Raises: ValueError: if signal does not have rank 2. """ with ops.name_scope(name, "frames", [signal, frame_length, frame_step]): signal = ops.convert_to_tensor(signal, name="signal") frame_length = ops.convert_to_tensor(frame_length, name="frame_length") frame_step = ops.convert_to_tensor(frame_step, name="frame_step") signal_rank = signal.shape.ndims if signal_rank != 2: raise ValueError("expected signal to have rank 2 but was " + signal_rank) signal_length = array_ops.shape(signal)[1] num_frames = math_ops.ceil((signal_length - frame_length) / frame_step) num_frames = 1 + math_ops.cast(num_frames, dtypes.int32) pad_length = (num_frames - 1) * frame_step + frame_length pad_signal = array_ops.pad(signal, [[0, 0], [0, pad_length - signal_length]]) indices_frame = array_ops.expand_dims(math_ops.range(frame_length), 0) indices_frames = array_ops.tile(indices_frame, [num_frames, 1]) indices_step = array_ops.expand_dims( math_ops.range(num_frames) * frame_step, 1) indices_steps = array_ops.tile(indices_step, [1, frame_length]) indices = indices_frames + indices_steps # TODO(androbin): remove `transpose` when `gather` gets `axis` support pad_signal = array_ops.transpose(pad_signal) signal_frames = array_ops.gather(pad_signal, indices) signal_frames = array_ops.transpose(signal_frames, perm=[2, 0, 1]) return signal_frames
def _mask_probs(probs, eos_token, finished): """Masks log probabilities. The result is that finished beams allocate all probability mass to eos and unfinished beams remain unchanged. Args: probs: Log probabiltiies of shape `[batch_size, beam_width, vocab_size]` eos_token: An int32 id corresponding to the EOS token to allocate probability to. finished: A boolean tensor of shape `[batch_size, beam_width]` that specifies which elements in the beam are finished already. Returns: A tensor of shape `[batch_size, beam_width, vocab_size]`, where unfinished beams stay unchanged and finished beams are replaced with a tensor with all probability on the EOS token. """ vocab_size = array_ops.shape(probs)[2] # All finished examples are replaced with a vector that has all # probability on EOS finished_row = array_ops.one_hot( eos_token, vocab_size, dtype=probs.dtype, on_value=0., off_value=probs.dtype.min) finished_probs = array_ops.tile( array_ops.reshape(finished_row, [1, 1, -1]), array_ops.concat([array_ops.shape(finished), [1]], 0)) finished_mask = array_ops.tile( array_ops.expand_dims(finished, 2), [1, 1, vocab_size]) return array_ops.where(finished_mask, finished_probs, probs)
def _SumGrad(op, grad): """Gradient for Sum.""" # Fast path for when reducing to a scalar and ndims is known: adds only # Reshape and Tile ops (and possibly a Shape). input_0_shape = op.inputs[0]._shape_tuple() # pylint: disable=protected-access if input_0_shape is not None: axes = tensor_util.constant_value(op.inputs[1]) if axes is not None: rank = len(input_0_shape) if np.array_equal(axes, np.arange(rank)): # Reduce all dims. grad = array_ops.reshape(grad, [1] * rank) # If shape is not fully defined (but rank is), we use Shape. if None not in input_0_shape: input_shape = input_0_shape else: input_shape = array_ops.shape(op.inputs[0]) return [array_ops.tile(grad, input_shape), None] input_shape = array_ops.shape(op.inputs[0]) # TODO(apassos) remove this once device placement for eager ops makes more # sense. with ops.colocate_with(input_shape): output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1]) tile_scaling = _safe_shape_div(input_shape, output_shape_kept_dims) grad = array_ops.reshape(grad, output_shape_kept_dims) return [array_ops.tile(grad, tile_scaling), None]
def to_weighted_sum(self, input_tensor, num_outputs=1, weight_collections=None, trainable=True): """Returns a Tensor as linear predictions and a list of created Variable.""" dimension = self.source_column.dimension batch_size = array_ops.shape(input_tensor)[0] if dimension > 1: i1 = array_ops.reshape(array_ops.tile(array_ops.expand_dims( math_ops.range(0, batch_size), 1), [1, dimension]), [-1]) i2 = array_ops.tile(math_ops.range(0, dimension), [batch_size]) # Flatten the bucket indices and unique them across dimensions # E.g. 2nd dimension indices will range from k to 2*k-1 with k buckets # TODO(chapelle): move that logic to insert_transformed_feature to ensure # unique buckets across dimensions after crossing. bucket_indices = array_ops.reshape(input_tensor, [-1]) + self.length * i2 else: # Simpler indices when dimension=1 i1 = math_ops.range(0, batch_size) i2 = array_ops.zeros([batch_size], dtype=dtypes.int32) bucket_indices = array_ops.reshape(input_tensor, [-1]) indices = math_ops.to_int64(array_ops.transpose(array_ops.pack((i1, i2)))) shape = math_ops.to_int64(array_ops.pack([batch_size, 1])) sparse_id_values = ops.SparseTensor(indices, bucket_indices, shape) vocab_size = self.length * self.source_column.dimension return _create_embedding_lookup( sparse_id_values, vocab_size, num_outputs, _add_variable_collection(weight_collections), 0., "sum", trainable, self.name + "_weights")
def testShapeFunctionEdgeCases(self): # Unknown multiples shape. inp = constant_op.constant(0.0, shape=[4, 4, 4, 4]) tiled = array_ops.tile(inp, array_ops.placeholder(dtypes.int32)) self.assertEqual([None, None, None, None], tiled.get_shape().as_list()) # Unknown input shape. inp = array_ops.placeholder(dtypes.float32) tiled = array_ops.tile(inp, [2, 2, 2, 2]) self.assertEqual([None, None, None, None], tiled.get_shape().as_list()) # Unknown input and multiples shape. inp = array_ops.placeholder(dtypes.float32) tiled = array_ops.tile(inp, array_ops.placeholder(dtypes.int32)) self.assertIs(None, tiled.get_shape().ndims) # Known input and partially known multiples. inp = constant_op.constant(0.0, shape=[1, 1]) tiled = array_ops.tile(inp, [array_ops.placeholder(dtypes.int32), 7]) self.assertEqual([None, 7], tiled.get_shape().as_list()) # Mismatched input rank and multiples length. inp = array_ops.placeholder(dtypes.float32, shape=[None, None]) with self.assertRaises(ValueError): tiled = array_ops.tile( inp, array_ops.placeholder( dtypes.int32, shape=[3]))
def gather_tree_from_array(t, parent_ids, sequence_length): """Calculates the full beams for `TensorArray`s. Args: t: A stacked `TensorArray` of size `max_time` that contains `Tensor`s of shape `[batch_size, beam_width, s]` or `[batch_size * beam_width, s]` where `s` is the depth shape. parent_ids: The parent ids of shape `[max_time, batch_size, beam_width]`. sequence_length: The sequence length of shape `[batch_size, beam_width]`. Returns: A `Tensor` which is a stacked `TensorArray` of the same size and type as `t` and where beams are sorted in each `Tensor` according to `parent_ids`. """ max_time = parent_ids.shape[0].value or array_ops.shape(parent_ids)[0] batch_size = parent_ids.shape[1].value or array_ops.shape(parent_ids)[1] beam_width = parent_ids.shape[2].value or array_ops.shape(parent_ids)[2] # Generate beam ids that will be reordered by gather_tree. beam_ids = array_ops.expand_dims( array_ops.expand_dims(math_ops.range(beam_width), 0), 0) beam_ids = array_ops.tile(beam_ids, [max_time, batch_size, 1]) mask = array_ops.sequence_mask( sequence_length, maxlen=max_time, dtype=dtypes.int32) mask = array_ops.transpose(mask, perm=[2, 0, 1]) # Use beam_width + 1 to mark the end of beam. masked_beam_ids = (beam_ids * mask) + (1 - mask) * (beam_width + 1) max_sequence_lengths = math_ops.to_int32( math_ops.reduce_max(sequence_length, axis=1)) sorted_beam_ids = beam_search_ops.gather_tree( step_ids=masked_beam_ids, parent_ids=parent_ids, max_sequence_lengths=max_sequence_lengths, end_token=beam_width + 1) # For out of range steps, simply copy the same beam. sorted_beam_ids = array_ops.where( math_ops.cast(mask, dtypes.bool), x=sorted_beam_ids, y=beam_ids) # Generate indices for gather_nd. time_ind = array_ops.tile(array_ops.reshape( math_ops.range(max_time), [-1, 1, 1]), [1, batch_size, beam_width]) batch_ind = array_ops.tile(array_ops.reshape( math_ops.range(batch_size), [-1, 1, 1]), [1, max_time, beam_width]) batch_ind = array_ops.transpose(batch_ind, perm=[1, 0, 2]) indices = array_ops.stack([time_ind, batch_ind, sorted_beam_ids], -1) # Gather from a tensor with collapsed additional dimensions. gather_from = t final_shape = array_ops.shape(gather_from) gather_from = array_ops.reshape( gather_from, [max_time, batch_size, beam_width, -1]) ordered = array_ops.gather_nd(gather_from, indices) ordered = array_ops.reshape(ordered, final_shape) return ordered
def testInvalidDim(self): with self.test_session(): inp = np.random.rand(4, 1).astype("f") a = constant_op.constant( [float(x) for x in inp.ravel(order="C")], shape=[4, 1], dtype=dtypes.float32) # Wrong length of multiples. with self.assertRaises(ValueError): array_ops.tile(a, [1, 4, 2]) # Wrong rank for multiples. with self.assertRaises(ValueError): array_ops.tile(a, [[2, 3], [3, 4]]).eval()
def make_tril_ids(n): """Internal helper to create vector of linear indices into y.""" cols = array_ops.reshape(array_ops.tile(math_ops.range(n), [n]), [n, n]) rows = array_ops.tile( array_ops.expand_dims(math_ops.range(n), -1), [1, n]) pred = math_ops.greater(cols, rows) tril_ids = array_ops.tile(array_ops.reshape( math_ops.cumsum(math_ops.range(n)), [n, 1]), [1, n]) + cols tril_ids = math_ops.select(pred, array_ops.zeros([n, n], dtype=dtypes.int32), tril_ids + 1) tril_ids = array_ops.reshape(tril_ids, [-1]) return tril_ids
def _initialize_variables(self, data, initial_means=None): """Initializes variables. Args: data: a list of Tensors with data, each row is a new example. initial_means: a Tensor with a matrix of means. """ first_shard = data[0] # Initialize means: num_classes X 1 X dimensions. if initial_means is not None: means = array_ops.expand_dims(initial_means, 1) else: # Sample data randomly means = array_ops.expand_dims( _init_clusters_random(data, self._num_classes, self._random_seed), 1) # Initialize covariances. if self._covariance_type == FULL_COVARIANCE: cov = _covariance(first_shard, False) + self._min_var # A matrix per class, num_classes X dimensions X dimensions covs = array_ops.tile( array_ops.expand_dims(cov, 0), [self._num_classes, 1, 1]) elif self._covariance_type == DIAG_COVARIANCE: cov = _covariance(first_shard, True) + self._min_var # A diagonal per row, num_classes X dimensions. covs = array_ops.tile( array_ops.expand_dims(array_ops.diag_part(cov), 0), [self._num_classes, 1]) with ops.colocate_with(self._cluster_centers_initialized): initialized = control_flow_ops.with_dependencies( [means, covs], array_ops.identity(self._cluster_centers_initialized)) self._init_ops = [] with ops.colocate_with(self._means): init_means = state_ops.assign(self._means, means, validate_shape=False) init_means = control_flow_ops.with_dependencies( [init_means], state_ops.assign(self._cluster_centers_initialized, True)) self._init_ops.append(control_flow_ops.cond(initialized, control_flow_ops.no_op, lambda: init_means).op) with ops.colocate_with(self._covs): init_covs = state_ops.assign(self._covs, covs, validate_shape=False) init_covs = control_flow_ops.with_dependencies( [init_covs], state_ops.assign(self._cluster_centers_initialized, True)) self._init_ops.append(control_flow_ops.cond(initialized, control_flow_ops.no_op, lambda: init_covs).op)
def runFiniteDifferences(self, shapes, dtypes=(dtypes_lib.float32, dtypes_lib.float64), scalarTest=False): with self.test_session(use_gpu=False): for shape in shapes: for batch in False, True: for dtype in dtypes: if not scalarTest: x = constant_op.constant(np.random.randn(shape[0], shape[1]), dtype) tensor = math_ops.matmul(x, array_ops.transpose(x)) / shape[0] else: # This is designed to be a faster test for larger matrices. x = constant_op.constant(np.random.randn(), dtype) R = constant_op.constant(np.random.randn(shape[0], shape[1]), dtype) e = math_ops.mul(R, x) tensor = math_ops.matmul(e, array_ops.transpose(e)) / shape[0] # Inner-most matrices in tensor are positive definite. if batch: tensor = array_ops.tile(array_ops.expand_dims(tensor, 0), [4, 1, 1]) y = linalg_ops.cholesky(tensor) if scalarTest: y = math_ops.reduce_mean(y) error = gradient_checker.compute_gradient_error(x, x._shape_as_list(), y, y._shape_as_list()) tf_logging.info("error = %f", error) if dtype == dtypes_lib.float64: self.assertLess(error, 1e-5) else: self.assertLess(error, 3e-3)
def _entropy(self): if (not self.distribution.is_continuous or not self.bijector.is_constant_jacobian): raise NotImplementedError("entropy is not implemented") # Suppose Y = g(X) where g is a diffeomorphism and X is a continuous rv. It # can be shown that: # H[Y] = H[X] + E_X[(log o abs o det o J o g)(X)]. # If is_constant_jacobian then: # E_X[(log o abs o det o J o g)(X)] = (log o abs o det o J o g)(c) # where c can by anything. entropy = self.distribution.entropy() if self._is_maybe_event_override: # H[X] = sum_i H[X_i] if X_i are mutually independent. # This means that a reduce_sum is a simple rescaling. entropy *= math_ops.cast(math_ops.reduce_prod(self._override_event_shape), dtype=entropy.dtype.base_dtype) if self._is_maybe_batch_override: new_shape = array_ops.concat([ _ones_like(self._override_batch_shape), self.distribution.batch_shape_tensor() ], 0) entropy = array_ops.reshape(entropy, new_shape) multiples = array_ops.concat([ self._override_batch_shape, _ones_like(self.distribution.batch_shape_tensor()) ], 0) entropy = array_ops.tile(entropy, multiples) dummy = array_ops.zeros([], self.dtype) entropy -= self.bijector.inverse_log_det_jacobian(dummy) entropy.set_shape(self.batch_shape) return entropy
def _align_matrices(x, y): """Aligns x and y tensors to allow computations over pairs of their rows.""" x_matrix = _to_matrix(x) y_matrix = _to_matrix(y) x_shape = x_matrix.shape y_shape = y_matrix.shape if y_shape[1] != x_shape[1]: # dimensions do not match. raise ValueError( 'The outermost dimensions of the input tensors should match. Given: {} ' 'vs {}.'.format(y_shape[1], x_shape[1])) x_tile = array_ops.tile( array_ops.expand_dims(x_matrix, 1), [1, y_shape[0], 1]) y_tile = array_ops.tile( array_ops.expand_dims(y_matrix, 0), [x_shape[0], 1, 1]) return x_tile, y_tile
def testPrefetchBufferUtilization(self, dataset_transformation): aggregator = stats_aggregator.StatsAggregator() dataset = dataset_ops.Dataset.range(100).map( lambda x: array_ops.tile([x], ops.convert_to_tensor([x]))).prefetch(-1) dataset = dataset_transformation(dataset, aggregator) iterator = dataset_ops.make_initializable_iterator(dataset) next_element = iterator.get_next() summary_t = aggregator.get_summary() with self.cached_session() as sess: self.evaluate(iterator.initializer) for i in range(100): self.assertAllEqual( np.array([i] * i, dtype=np.int64), self.evaluate(next_element)) summary_str = self.evaluate(summary_t) self._assertSummaryHasCount(summary_str, "Prefetch::buffer_utilization", float(i + 1)) self._assertSummaryContains(summary_str, "Prefetch::buffer_capacity") self._assertSummaryContains(summary_str, "Prefetch::buffer_size") self._assertSummaryHasRange(summary_str, "Prefetch::buffer_utilization", 0, 1) with self.assertRaises(errors.OutOfRangeError): self.evaluate(next_element) summary_str = self.evaluate(summary_t) self._assertSummaryHasCount(summary_str, "Prefetch::buffer_utilization", 100)
def dataset_fn(): dataset = dataset_ops.Dataset.range(10).map( lambda x: array_ops.tile([x], ops.convert_to_tensor([x])), num_parallel_calls=optimization.AUTOTUNE) options = dataset_ops.Options() options.experimental_autotune = True return dataset.with_options(options)
def inverse_stft_window_fn_inner(frame_length, dtype): """Computes a window that can be used in `inverse_stft`. Args: frame_length: An integer scalar `Tensor`. The window length in samples. dtype: Data type of waveform passed to `stft`. Returns: A window suitable for reconstructing original waveform in `inverse_stft`. Raises: ValueError: If `frame_length` is not scalar, `forward_window_fn` is not a callable that takes a window length and a `dtype` keyword argument and returns a `[window_length]` `Tensor` of samples in the provided datatype `frame_step` is not scalar, or `frame_step` is not scalar. """ with ops.name_scope(name, 'inverse_stft_window_fn', [forward_window_fn]): frame_length = ops.convert_to_tensor(frame_length, name='frame_length') frame_length.shape.assert_has_rank(0) # Use equation 7 from Griffin + Lim. forward_window = forward_window_fn(frame_length, dtype=dtype) denom = math_ops.square(forward_window) overlaps = -(-frame_length // frame_step) # Ceiling division. denom = array_ops.pad(denom, [(0, overlaps * frame_step - frame_length)]) denom = array_ops.reshape(denom, [overlaps, frame_step]) denom = math_ops.reduce_sum(denom, 0, keep_dims=True) denom = array_ops.tile(denom, [overlaps, 1]) denom = array_ops.reshape(denom, [overlaps * frame_step]) return forward_window / denom[:frame_length]
def _ApplyLengthsToBatch(sequence_lengths, tf_output): # TODO(drpng): just use Update so that we don't carry over the gradients? """Sets the output to be zero at the end of the sequence.""" # output is batch major. batch_size, max_time, vector_size = tf_output.shape output_time = array_ops.tile(math_ops.range(0, max_time), [batch_size]) output_time = array_ops.reshape(output_time, [batch_size, max_time]) lengths = array_ops.tile( array_ops.reshape(sequence_lengths, [-1, 1]), [1, max_time]) is_less = math_ops.cast( math_ops.less(output_time, lengths), dtype=dtypes.float32) keep_mask = array_ops.tile( array_ops.expand_dims(is_less, -1), [1, 1, vector_size]) final_output = keep_mask * tf_output return final_output
def _BiasAddGradGrad(op, received_grad): """Gradient for the BiasAddGrad op. Args: op: BiasAddGrad op for which we are calculating gradients. received_grad: The gradients passed to the BiasAddGrad op. Returns: A single gradient Tensor for the input to BiasAddGrad (which is the gradient of the bias term in BiasAdd) """ try: data_format = op.get_attr("data_format") except ValueError: data_format = None shape = array_ops.shape(op.inputs[0]) rank = array_ops.rank(op.inputs[0]) bias_shape = array_ops.shape(received_grad) if data_format == b"NCHW": expanded_shape = array_ops.concat([ array_ops.ones_like(shape[:-3]), bias_shape, array_ops.ones_like(shape[-2:]) ], 0) tile_mults = array_ops.concat([shape[:-3], [1], shape[-2:]], 0) else: expanded_shape = array_ops.concat( [array_ops.ones_like(shape[:-1]), bias_shape], 0) tile_mults = array_ops.concat([shape[:-1], [1]], 0) expanded_grad = array_ops.reshape(received_grad, expanded_shape) return array_ops.tile(expanded_grad, tile_mults)
def testUnknownInputShape(self): """Importing can call _TileShape without shape of <multiples> known.""" with self.test_session(): inp = array_ops.placeholder(dtypes.float32) # unknown shape multiples = constant_op.constant([1, 2, 3, 4], dtype=np.int32) tiled = array_ops.tile(inp, multiples) gdef = tiled.graph.as_graph_def() # Move the tile op to the start of the graph so that shapes of its inputs # are not available when the shape function runs on import. swapped = False for i, n in enumerate(gdef.node): if n.op == "Tile": # Swap tile op to be first in gdef.node assert i != 0 new_node = node_def_pb2.NodeDef() new_node.CopyFrom(gdef.node[i]) gdef.node[i].CopyFrom(gdef.node[0]) gdef.node[0].CopyFrom(new_node) swapped = True assert swapped tiled_imported, = importer.import_graph_def( gdef, return_elements=[tiled.name]) self.assertEqual(4, tiled_imported.get_shape().ndims)
def _SumGrad(op, grad): """Gradient for Sum.""" input_shape = array_ops.shape(op.inputs[0]) output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1]) tile_scaling = _safe_shape_div(input_shape, output_shape_kept_dims) grad = array_ops.reshape(grad, output_shape_kept_dims) return [array_ops.tile(grad, tile_scaling), None]
def _model_start_state_placeholders( self, batch_size_tensor, static_batch_size=None): """Creates placeholders with zeroed start state for the current model.""" gathered_state = {} # Models may not know the shape of their state without creating some # variables/ops. Avoid polluting the default graph by making a new one. We # use only static metadata from the returned Tensors. with ops.Graph().as_default(): self._model.initialize_graph() # Evaluate the initial state as same-dtype "zero" values. These zero # constants aren't used, but are necessary for feeding to # placeholder_with_default for the "cold start" case where state is not # fed to the model. def _zeros_like_constant(tensor): return tensor_util.constant_value(array_ops.zeros_like(tensor)) start_state = nest.map_structure( _zeros_like_constant, self._model.get_start_state()) for prefixed_state_name, state in ts_head_lib.state_to_dictionary( start_state).items(): state_shape_with_batch = tensor_shape.TensorShape( (static_batch_size,)).concatenate(state.shape) default_state_broadcast = array_ops.tile( state[None, ...], multiples=array_ops.concat( [batch_size_tensor[None], array_ops.ones(len(state.shape), dtype=dtypes.int32)], axis=0)) gathered_state[prefixed_state_name] = array_ops.placeholder_with_default( input=default_state_broadcast, name=prefixed_state_name, shape=state_shape_with_batch) return gathered_state
def get_placements(self, *args, **kwargs): num_children = self.hparams.num_children with variable_scope.variable_scope("controller_{}".format(self.ctrl_id)): actions_cache = variable_scope.get_local_variable( "actions_cache", initializer=init_ops.zeros_initializer, dtype=dtypes.int32, shape=[num_children, self.num_groups], trainable=False) x = array_ops.tile(self.seq2seq_input_layer, [num_children, 1, 1]) last_c, last_h, attn_mem = self.encode(x) actions, log_probs = {}, {} actions["sample"], log_probs["sample"] = ( self.decode( x, last_c, last_h, attn_mem, mode="sample")) actions["target"], log_probs["target"] = ( self.decode( x, last_c, last_h, attn_mem, mode="target", y=actions_cache)) actions["greedy"], log_probs["greedy"] = ( self.decode( x, last_c, last_h, attn_mem, mode="greedy")) actions["sample"] = control_flow_ops.cond( self.global_step < self.hparams.stop_sampling, lambda: state_ops.assign(actions_cache, actions["sample"]), lambda: state_ops.assign(actions_cache, actions["target"])) self.actions_cache = actions_cache return actions, log_probs
def testTile(self): if test.is_gpu_available(cuda_only=True): random_seed.set_random_seed(0) x = random_ops.truncated_normal([1, 784], seed=0) conv = _two_layer_model(x) multiple = array_ops.placeholder(dtype='int32') tile = array_ops.tile(conv, multiple) output = array_ops.identity(tile) multiple_val = [2, 3, 4, 1] with session.Session() as sess: output_val_ref = sess.run(output, feed_dict={multiple: multiple_val}) with session.Session(config=_get_config()) as sess: metadata = config_pb2.RunMetadata() output_val = sess.run( output, run_metadata=metadata, feed_dict={ multiple: multiple_val }) nodes = [] num_transposes = 0 for node in metadata.cost_graph.node: if _is_transpose(node.name): num_transposes += 1 nodes.append(node.name) # Four transposes were initially added in the Expand phase of # LayoutOptimizer; two of them are cancelled out in the Collapse phase. expected_num_transposes = 2 self.assertEqual(expected_num_transposes, num_transposes) self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) self._assert_trans_nchw_to_nhwc('Tile-0-0', nodes) self._assert_vec_nhwc_to_nchw('Tile-1', nodes) self.assertAllClose(output_val_ref, output_val, atol=1e-3)
def testBytesProduced(self): stats_aggregator = stats_ops.StatsAggregator() dataset = dataset_ops.Dataset.range(100).map( lambda x: array_ops.tile([x], ops.convert_to_tensor([x]))).apply( stats_ops.bytes_produced_stats("bytes_produced")).apply( stats_ops.set_stats_aggregator(stats_aggregator)) iterator = dataset.make_initializable_iterator() next_element = iterator.get_next() summary_t = stats_aggregator.get_summary() with self.test_session() as sess: sess.run(iterator.initializer) expected_sum = 0.0 for i in range(100): self.assertAllEqual( np.array([i] * i, dtype=np.int64), sess.run(next_element)) summary_str = sess.run(summary_t) self._assertSummaryHasCount(summary_str, "bytes_produced", float(i + 1)) expected_sum += i * 8.0 self._assertSummaryHasSum(summary_str, "bytes_produced", expected_sum) with self.assertRaises(errors.OutOfRangeError): sess.run(next_element) summary_str = sess.run(summary_t) self._assertSummaryHasCount(summary_str, "bytes_produced", 100.0) self._assertSummaryHasSum(summary_str, "bytes_produced", expected_sum)
def testPrefetchBufferUtilization(self, dataset_transformation): aggregator = stats_aggregator.StatsAggregator() dataset = dataset_ops.Dataset.range(100).map( lambda x: array_ops.tile([x], ops.convert_to_tensor([x]))).prefetch(-1) dataset = dataset_transformation(dataset, aggregator) next_element = self.getNext(dataset, requires_initialization=True) for i in range(100): self.assertAllEqual( np.array([i] * i, dtype=np.int64), self.evaluate(next_element())) summary_str = self.evaluate(aggregator.get_summary()) self._assertSummaryHasCount( summary_str, self.regexForNodeName("PrefetchDataset", "buffer_utilization"), float(i + 1)) self._assertSummaryContains( summary_str, self.regexForNodeName("PrefetchDataset", "buffer_capacity")) self._assertSummaryContains( summary_str, self.regexForNodeName("PrefetchDataset", "buffer_size")) self._assertSummaryHasRange( summary_str, self.regexForNodeName("PrefetchDataset", "buffer_utilization"), 0, 1) with self.assertRaises(errors.OutOfRangeError): self.evaluate(next_element()) summary_str = self.evaluate(aggregator.get_summary()) self._assertSummaryHasCount( summary_str, self.regexForNodeName("PrefetchDataset", "buffer_utilization"), 100)
def testTypes(self): types_to_test = { "bool": (dtypes.bool, bool), "float32": (dtypes.float32, float), "float64": (dtypes.float64, float), "complex64": (dtypes.complex64, complex), "complex128": (dtypes.complex128, complex), "uint8": (dtypes.uint8, int), "int8": (dtypes.int8, int), "int16": (dtypes.int16, int), "int32": (dtypes.int32, int), "int64": (dtypes.int64, int), bytes: (dtypes.string, bytes) } for dtype_np, (dtype_tf, cast) in types_to_test.items(): with self.cached_session(use_gpu=True): inp = np.random.rand(4, 1).astype(dtype_np) a = constant_op.constant( [cast(x) for x in inp.ravel(order="C")], shape=[4, 1], dtype=dtype_tf) tiled = array_ops.tile(a, [1, 4]) result = self.evaluate(tiled) self.assertEqual(result.shape, (4, 4)) self.assertEqual([4, 4], tiled.get_shape()) self.assertAllEqual(result, np.tile(inp, (1, 4)))
def _centered_bias_step(targets, loss_fn, num_label_columns): centered_bias = ops.get_collection("centered_bias") batch_size = array_ops.shape(targets)[0] logits = array_ops.reshape( array_ops.tile(centered_bias[0], [batch_size]), [batch_size, num_label_columns]) loss = loss_fn(logits, targets) return train.AdagradOptimizer(0.1).minimize(loss, var_list=centered_bias)
def _ProdGrad(op, grad): """Gradient for Prod.""" # TODO(kearnes): this gives NaNs for 0s in the input tensor _, new_output_shape, input_shape = _ReductionGradAssist(op) tile_scaling = input_shape // new_output_shape grad = array_ops.reshape(grad * op.outputs[0], new_output_shape) grad = math_ops.div(array_ops.tile(grad, tile_scaling), op.inputs[0]) return grad, None
def testEmpty(self): with self.test_session(): inp = np.random.rand(2, 3).astype(np.float32) a = constant_op.constant(inp) tiled = array_ops.tile(a, [5, 0]) result = tiled.eval() self.assertEqual(result.shape, (10, 0)) self.assertEqual([10, 0], tiled.get_shape())
def testGradientStridedReductionGC(self): with self.test_session(): inp = np.random.rand(4, 2).astype("f") a = constant_op.constant( [float(x) for x in inp.flatten()], shape=[4, 2], dtype=dtypes.float32) tiled = array_ops.tile(a, [1, 2]) err = gradient_checker.compute_gradient_error(a, [4, 2], tiled, [4, 4]) self.assertLess(err, 1e-3)
def _tile_ragged_values(rt_input, multiples, const_multiples=None): """Builds flat_values tensor for a tiled `RaggedTensor`. Returns a tensor that repeats the values in `rt_input.flat_values` in the appropriate pattern to construct a `RaggedTensor` that tiles `rt_input` as specified by `multiples`. Args: rt_input: The `RaggedTensor` whose values should be repeated. multiples: A 1-D integer `tensor`, indicating how many times each dimension should be repeated. const_multiples: Optional constant value for multiples. Used to skip tiling dimensions where `multiples=1`. Returns: A `Tensor` with the same type and rank as `rt_input.flat_values`. #### Example: ```python >>> rt = tf.ragged.constant([[1, 2], [3]]) >>> _tile_ragged_values(rt, [3, 2]) [1, 2, 1, 2, 3, 3, 1, 2, 1, 2, 3, 3, 1, 2, 1, 2, 3, 3] ``` """ ragged_rank = rt_input.ragged_rank nested_splits = rt_input.nested_row_splits # Pointers to the values in `rt_input.flat_values`. inner_value_ids = math_ops.range(nested_splits[-1][-1]) # For each ragged dimension (working from the innermost to outermost), # expand `inner_value_ids` as necessary to tile that dimension. prev_splits = None for axis in range(ragged_rank, 0, -1): # Ragged splits for this dimension. splits = nested_splits[axis - 1] # Adjust splits so they point into `inner_value_ids` (instead of just # pointing into the next dimension's values). if prev_splits is not None: # Not the first pass through the loop. splits = array_ops.gather(prev_splits * multiples[axis + 1], splits) # Repeat each element in this ragged dimension `multiples[axis]` times. if const_multiples is None or const_multiples[axis] != 1: inner_value_ids = ragged_util.repeat_ranges(inner_value_ids, splits, multiples[axis]) prev_splits = splits # Gather the tiled inner values. ragged_tiled_values = array_ops.gather(rt_input.flat_values, inner_value_ids) # Tile the flat_values for the uniform dimensions (i.e., for `axis=0` plus # `axis=range(ragged_rank, rank)`). inner_repeats = array_ops.concat([multiples[:1], multiples[ragged_rank + 1:]], axis=0) return array_ops.tile(ragged_tiled_values, inner_repeats)
def update_confusion_matrix_variables(variables_to_update, y_true, y_pred, thresholds, top_k=None, class_id=None, sample_weight=None): """Returns op to update the given confusion matrix variables. For every pair of values in y_true and y_pred: true_positive: y_true == True and y_pred > thresholds false_negatives: y_true == True and y_pred <= thresholds true_negatives: y_true == False and y_pred <= thresholds false_positive: y_true == False and y_pred > thresholds The results will be weighted and added together. When multiple thresholds are provided, we will repeat the same for every threshold. For estimation of these metrics over a stream of data, the function creates an `update_op` operation that updates the given variables. If `sample_weight` is `None`, weights default to 1. Use weights of 0 to mask values. Args: variables_to_update: Dictionary with 'tp', 'fn', 'tn', 'fp' as valid keys and corresponding variables to update as values. y_true: A `Tensor` whose shape matches `y_pred`. Will be cast to `bool`. y_pred: A floating point `Tensor` of arbitrary shape and whose values are in the range `[0, 1]`. thresholds: A float value or a python list or tuple of float thresholds in `[0, 1]`, or NEG_INF (used when top_k is set). top_k: Optional int, indicates that the positive labels should be limited to the top k predictions. class_id: Optional int, limits the prediction and labels to the class specified by this argument. sample_weight: Optional `Tensor` whose rank is either 0, or the same rank as `y_true`, and must be broadcastable to `y_true` (i.e., all dimensions must be either `1`, or the same as the corresponding `y_true` dimension). Returns: Update op. Raises: ValueError: If `y_pred` and `y_true` have mismatched shapes, or if `sample_weight` is not `None` and its shape doesn't match `y_pred`, or if `variables_to_update` contains invalid keys. """ if variables_to_update is None: return y_true = ops.convert_to_tensor(y_true) y_pred = ops.convert_to_tensor(y_pred) y_pred.shape.assert_is_compatible_with(y_true.shape) if not any(key for key in variables_to_update if key in list(ConfusionMatrix)): raise ValueError( 'Please provide at least one valid confusion matrix ' 'variable to update. Valid variable key options are: "{}". ' 'Received: "{}"'.format(list(ConfusionMatrix), variables_to_update.keys())) invalid_keys = [ key for key in variables_to_update if key not in list(ConfusionMatrix) ] if invalid_keys: raise ValueError( 'Invalid keys: {}. Valid variable key options are: "{}"'.format( invalid_keys, list(ConfusionMatrix))) with ops.control_dependencies([ check_ops.assert_greater_equal(y_pred, math_ops.cast(0.0, dtype=y_pred.dtype), message='predictions must be >= 0'), check_ops.assert_less_equal(y_pred, math_ops.cast(1.0, dtype=y_pred.dtype), message='predictions must be <= 1') ]): y_pred, y_true, sample_weight = squeeze_or_expand_dimensions( math_ops.cast(y_pred, dtype=dtypes.float32), math_ops.cast(y_true, dtype=dtypes.bool), sample_weight) if top_k is not None: y_pred = _filter_top_k(y_pred, top_k) if class_id is not None: y_true = y_true[..., class_id] y_pred = y_pred[..., class_id] thresholds = to_list(thresholds) num_thresholds = len(thresholds) num_predictions = array_ops.size(y_pred) # Reshape predictions and labels. predictions_2d = array_ops.reshape(y_pred, [1, -1]) labels_2d = array_ops.reshape(math_ops.cast(y_true, dtype=dtypes.bool), [1, -1]) # Tile the thresholds for every prediction. thresh_tiled = array_ops.tile( array_ops.expand_dims(array_ops.constant(thresholds), 1), array_ops.stack([1, num_predictions])) # Tile the predictions for every threshold. preds_tiled = array_ops.tile(predictions_2d, [num_thresholds, 1]) # Compare predictions and threshold. pred_is_pos = math_ops.greater(preds_tiled, thresh_tiled) # Tile labels by number of thresholds label_is_pos = array_ops.tile(labels_2d, [num_thresholds, 1]) if sample_weight is not None: weights = weights_broadcast_ops.broadcast_weights( math_ops.cast(sample_weight, dtype=dtypes.float32), y_pred) weights_tiled = array_ops.tile(array_ops.reshape(weights, [1, -1]), [num_thresholds, 1]) else: weights_tiled = None update_ops = [] def weighted_assign_add(label, pred, weights, var): label_and_pred = math_ops.cast(math_ops.logical_and(label, pred), dtype=dtypes.float32) if weights is not None: label_and_pred *= weights return state_ops.assign_add(var, math_ops.reduce_sum(label_and_pred, 1)) loop_vars = { ConfusionMatrix.TRUE_POSITIVES: (label_is_pos, pred_is_pos), } update_tn = ConfusionMatrix.TRUE_NEGATIVES in variables_to_update update_fp = ConfusionMatrix.FALSE_POSITIVES in variables_to_update update_fn = ConfusionMatrix.FALSE_NEGATIVES in variables_to_update if update_fn or update_tn: pred_is_neg = math_ops.logical_not(pred_is_pos) loop_vars[ConfusionMatrix.FALSE_NEGATIVES] = (label_is_pos, pred_is_neg) if update_fp or update_tn: label_is_neg = math_ops.logical_not(label_is_pos) loop_vars[ConfusionMatrix.FALSE_POSITIVES] = (label_is_neg, pred_is_pos) if update_tn: loop_vars[ConfusionMatrix.TRUE_NEGATIVES] = (label_is_neg, pred_is_neg) for matrix_cond, (label, pred) in loop_vars.items(): if matrix_cond in variables_to_update: update_ops.append( weighted_assign_add(label, pred, weights_tiled, variables_to_update[matrix_cond])) return control_flow_ops.group(update_ops)
def map_fn(x): return array_ops.tile(x, x)
def create_estimator_spec(self, features, mode, logits, labels=None, train_op_fn=None): """See `Head`.""" # Predict. with ops.name_scope('head'): with ops.name_scope(None, 'predictions', (logits, )): pred_keys = prediction_keys.PredictionKeys logits = _check_logits(logits, self.logits_dimension) logistic = math_ops.sigmoid(logits, name=pred_keys.LOGISTIC) two_class_logits = array_ops.concat( (array_ops.zeros_like(logits), logits), 1, name='two_class_logits') scores = nn.softmax(two_class_logits, name=pred_keys.PROBABILITIES) class_ids = array_ops.reshape(math_ops.argmax(two_class_logits, axis=1), (-1, 1), name='classes') if self._label_vocabulary: table = lookup_ops.index_to_string_table_from_tensor( vocabulary_list=self._label_vocabulary, name='class_string_lookup') classes = table.lookup(class_ids) else: classes = string_ops.as_string(class_ids, name='str_classes') predictions = { pred_keys.LOGITS: logits, pred_keys.LOGISTIC: logistic, pred_keys.PROBABILITIES: scores, pred_keys.CLASS_IDS: class_ids, pred_keys.CLASSES: classes, } if mode == model_fn.ModeKeys.PREDICT: batch_size = array_ops.shape(logistic)[0] export_class_list = self._label_vocabulary if not export_class_list: export_class_list = string_ops.as_string([0, 1]) export_output_classes = array_ops.tile( input=array_ops.expand_dims(input=export_class_list, axis=0), multiples=[batch_size, 1]) classifier_output = export_output.ClassificationOutput( scores=scores, # `ClassificationOutput` requires string classes. classes=export_output_classes) return model_fn.EstimatorSpec( mode=model_fn.ModeKeys.PREDICT, predictions=predictions, export_outputs={ '': classifier_output, # to be same as other heads. 'classification': classifier_output, # to be called by name. _DEFAULT_SERVING_KEY: classifier_output, # default 'regression': export_output.RegressionOutput(value=logistic) }) # Eval. unweighted_loss, processed_labels = self.create_loss( features=features, mode=mode, logits=logits, labels=labels) weights = _weights(features, self._weight_column) training_loss = losses.compute_weighted_loss( unweighted_loss, weights=weights, reduction=losses.Reduction.SUM) if mode == model_fn.ModeKeys.EVAL: return model_fn.EstimatorSpec( mode=model_fn.ModeKeys.EVAL, predictions=predictions, loss=training_loss, eval_metric_ops=self._eval_metric_ops( labels=processed_labels, logits=logits, logistic=logistic, scores=scores, class_ids=class_ids, unweighted_loss=unweighted_loss, weights=weights)) # Train. if train_op_fn is None: raise ValueError('train_op_fn can not be None.') with ops.name_scope(''): summary.scalar( _summary_key(self._head_name, metric_keys.MetricKeys.LOSS), training_loss) summary.scalar( _summary_key(self._head_name, metric_keys.MetricKeys.LOSS_MEAN), losses.compute_weighted_loss(unweighted_loss, weights=weights, reduction=losses.Reduction.MEAN)) return model_fn.EstimatorSpec(mode=model_fn.ModeKeys.TRAIN, predictions=predictions, loss=training_loss, train_op=train_op_fn(training_loss))
def call(self, inputs, initial_state=None, dtype=None, sequence_length=None): """Run this LSTM on inputs, starting from the given state. Args: inputs: `3-D` tensor with shape `[time_len, batch_size, input_size]`. initial_state: a tuple `(initial_cell_state, initial_output)` with tensors of shape `[batch_size, self._num_units]`. If this is not provided, the cell is expected to create a zero initial state of type `dtype`. dtype: The data type for the initial state and expected output. Required if `initial_state` is not provided or RNN state has a heterogeneous dtype. sequence_length: Specifies the length of each sequence in inputs. An `int32` or `int64` vector (tensor) size `[batch_size]`, values in `[0, time_len).` Defaults to `time_len` for each element. Returns: A pair containing: - Output: A `3-D` tensor of shape `[time_len, batch_size, output_size]` or a list of time_len tensors of shape `[batch_size, output_size]`, to match the type of the `inputs`. - Final state: a tuple `(cell_state, output)` matching `initial_state`. Raises: ValueError: in case of shape mismatches """ is_list = isinstance(inputs, list) if is_list: inputs = array_ops.stack(inputs) inputs_shape = inputs.get_shape().with_rank(3) if not inputs_shape[2]: raise ValueError("Expecting inputs_shape[2] to be set: %s" % inputs_shape) batch_size = inputs_shape.dims[1].value if batch_size is None: batch_size = array_ops.shape(inputs)[1] time_len = inputs_shape.dims[0].value if time_len is None: time_len = array_ops.shape(inputs)[0] # Provide default values for initial_state and dtype if initial_state is None: if dtype is None: raise ValueError( "Either initial_state or dtype needs to be specified") z = array_ops.zeros(array_ops.stack([batch_size, self.num_units]), dtype=dtype) initial_state = z, z else: if len(initial_state) != 2: raise ValueError( "Expecting initial_state to be a tuple with length 2 or None" ) if dtype is None: dtype = initial_state[0].dtype # create the actual cell if sequence_length is not None: sequence_length = ops.convert_to_tensor(sequence_length) initial_cell_state, initial_output = initial_state # pylint: disable=unpacking-non-sequence cell_states, outputs = self._call_cell(inputs, initial_cell_state, initial_output, dtype, sequence_length) if sequence_length is not None: # Mask out the part beyond sequence_length mask = array_ops.transpose( array_ops.sequence_mask(sequence_length, time_len, dtype=dtype), [1, 0]) mask = array_ops.tile(array_ops.expand_dims(mask, [-1]), [1, 1, self.num_units]) outputs *= mask # Prepend initial states to cell_states and outputs for indexing to work # correctly,since we want to access the last valid state at # sequence_length - 1, which can even be -1, corresponding to the # initial state. mod_cell_states = array_ops.concat( [array_ops.expand_dims(initial_cell_state, [0]), cell_states], 0) mod_outputs = array_ops.concat( [array_ops.expand_dims(initial_output, [0]), outputs], 0) final_cell_state = self._gather_states(mod_cell_states, sequence_length, batch_size) final_output = self._gather_states(mod_outputs, sequence_length, batch_size) else: # No sequence_lengths used: final state is the last state final_cell_state = cell_states[-1] final_output = outputs[-1] if is_list: # Input was a list, so return a list outputs = array_ops.unstack(outputs) final_state = rnn_cell_impl.LSTMStateTuple(final_cell_state, final_output) return outputs, final_state
def dataset_fn(): return dataset_ops.Dataset.range(10).map( lambda x: array_ops.tile([x], ops.convert_to_tensor([x])), num_parallel_calls=4)
def _beam_search_step(time, logits, next_cell_state, beam_state, batch_size, beam_width, end_token, length_penalty_weight): """Performs a single step of Beam Search Decoding. Args: time: Beam search time step, should start at 0. At time 0 we assume that all beams are equal and consider only the first beam for continuations. logits: Logits at the current time step. A tensor of shape `[batch_size, beam_width, vocab_size]` next_cell_state: The next state from the cell, e.g. an instance of AttentionWrapperState if the cell is attentional. beam_state: Current state of the beam search. An instance of `BeamSearchDecoderState`. batch_size: The batch size for this input. beam_width: Python int. The size of the beams. end_token: The int32 end token. length_penalty_weight: Float weight to penalize length. Disabled with 0.0. Returns: A new beam state. """ static_batch_size = tensor_util.constant_value(batch_size) # Calculate the current lengths of the predictions prediction_lengths = beam_state.lengths previously_finished = beam_state.finished # Calculate the total log probs for the new hypotheses # Final Shape: [batch_size, beam_width, vocab_size] step_log_probs = nn_ops.log_softmax(logits) step_log_probs = _mask_probs(step_log_probs, end_token, previously_finished) total_probs = array_ops.expand_dims(beam_state.log_probs, 2) + step_log_probs # Calculate the continuation lengths by adding to all continuing beams. vocab_size = logits.shape[-1].value or array_ops.shape(logits)[-1] lengths_to_add = array_ops.one_hot( indices=array_ops.tile(array_ops.reshape(end_token, [1, 1]), [batch_size, beam_width]), depth=vocab_size, on_value=constant_op.constant(0, dtype=dtypes.int64), off_value=constant_op.constant(1, dtype=dtypes.int64), dtype=dtypes.int64) add_mask = (1 - math_ops.to_int64(previously_finished)) lengths_to_add = array_ops.expand_dims(add_mask, 2) * lengths_to_add new_prediction_lengths = (lengths_to_add + array_ops.expand_dims(prediction_lengths, 2)) # Calculate the scores for each beam scores = _get_scores(log_probs=total_probs, sequence_lengths=new_prediction_lengths, length_penalty_weight=length_penalty_weight) time = ops.convert_to_tensor(time, name="time") # During the first time step we only consider the initial beam scores_shape = array_ops.shape(scores) scores_flat = control_flow_ops.cond( time > 0, lambda: array_ops.reshape(scores, [batch_size, -1]), lambda: scores[:, 0]) num_available_beam = control_flow_ops.cond( time > 0, lambda: math_ops.reduce_prod(scores_shape[1:]), lambda: math_ops.reduce_prod(scores_shape[2:])) # Pick the next beams according to the specified successors function next_beam_size = math_ops.minimum( ops.convert_to_tensor(beam_width, dtype=dtypes.int32, name="beam_width"), num_available_beam) next_beam_scores, word_indices = nn_ops.top_k(scores_flat, k=next_beam_size) next_beam_scores.set_shape([static_batch_size, beam_width]) word_indices.set_shape([static_batch_size, beam_width]) # Pick out the probs, beam_ids, and states according to the chosen predictions next_beam_probs = _tensor_gather_helper(gather_indices=word_indices, gather_from=total_probs, batch_size=batch_size, range_size=beam_width * vocab_size, gather_shape=[-1], name="next_beam_probs") # Note: just doing the following # math_ops.to_int32(word_indices % vocab_size, # name="next_beam_word_ids") # would be a lot cleaner but for reasons unclear, that hides the results of # the op which prevents capturing it with tfdbg debug ops. raw_next_word_ids = math_ops.mod(word_indices, vocab_size, name="next_beam_word_ids") next_word_ids = math_ops.to_int32(raw_next_word_ids) next_beam_ids = math_ops.to_int32(word_indices / vocab_size, name="next_beam_parent_ids") # Append new ids to current predictions previously_finished = _tensor_gather_helper( gather_indices=next_beam_ids, gather_from=previously_finished, batch_size=batch_size, range_size=beam_width, gather_shape=[-1]) next_finished = math_ops.logical_or(previously_finished, math_ops.equal(next_word_ids, end_token), name="next_beam_finished") # Calculate the length of the next predictions. # 1. Finished beams remain unchanged # 2. Beams that are now finished (EOS predicted) remain unchanged # 3. Beams that are not yet finished have their length increased by 1 lengths_to_add = math_ops.to_int64( math_ops.not_equal(next_word_ids, end_token)) lengths_to_add = (1 - math_ops.to_int64(next_finished)) * lengths_to_add next_prediction_len = _tensor_gather_helper(gather_indices=next_beam_ids, gather_from=beam_state.lengths, batch_size=batch_size, range_size=beam_width, gather_shape=[-1]) next_prediction_len += lengths_to_add # Pick out the cell_states according to the next_beam_ids. We use a # different gather_shape here because the cell_state tensors, i.e. # the tensors that would be gathered from, all have dimension # greater than two and we need to preserve those dimensions. # pylint: disable=g-long-lambda next_cell_state = nest.map_structure( lambda gather_from: _maybe_tensor_gather_helper( gather_indices=next_beam_ids, gather_from=gather_from, batch_size=batch_size, range_size=beam_width, gather_shape=[batch_size * beam_width, -1]), next_cell_state) # pylint: enable=g-long-lambda next_state = BeamSearchDecoderState(cell_state=next_cell_state, log_probs=next_beam_probs, lengths=next_prediction_len, finished=next_finished) output = BeamSearchDecoderOutput(scores=next_beam_scores, predicted_ids=next_word_ids, parent_ids=next_beam_ids) return output, next_state
def _serving_input_receiver_fn(): """A receiver function to be passed to export_savedmodel.""" placeholders = {} time_placeholder = array_ops.placeholder( name=feature_keys.TrainEvalFeatures.TIMES, dtype=dtypes.int64, shape=[default_batch_size, default_series_length]) placeholders[ feature_keys.TrainEvalFeatures.TIMES] = time_placeholder # Values are only necessary when filtering. For prediction the default # value will be ignored. placeholders[feature_keys.TrainEvalFeatures.VALUES] = ( array_ops.placeholder_with_default( name=feature_keys.TrainEvalFeatures.VALUES, input=array_ops.zeros(shape=[ default_batch_size if default_batch_size else 0, default_series_length if default_series_length else 0, self._model.num_features ], dtype=self._model.dtype), shape=(default_batch_size, default_series_length, self._model.num_features))) if self._model.exogenous_feature_columns: with ops.Graph().as_default(): # Default placeholders have only an unknown batch dimension. Make them # in a separate graph, then splice in the series length to the shapes # and re-create them in the outer graph. parsed_features = (feature_column.make_parse_example_spec( self._model.exogenous_feature_columns)) placeholder_features = parsing_ops.parse_example( serialized=array_ops.placeholder(shape=[None], dtype=dtypes.string), features=parsed_features) exogenous_feature_shapes = { key: (value.get_shape(), value.dtype) for key, value in placeholder_features.items() } for feature_key, (batch_only_feature_shape, value_dtype) in ( exogenous_feature_shapes.items()): batch_only_feature_shape = ( batch_only_feature_shape.with_rank_at_least( 1).as_list()) feature_shape = ( [default_batch_size, default_series_length] + batch_only_feature_shape[1:]) placeholders[feature_key] = array_ops.placeholder( dtype=value_dtype, name=feature_key, shape=feature_shape) # Models may not know the shape of their state without creating some # variables/ops. Avoid polluting the default graph by making a new one. We # use only static metadata from the returned Tensors. with ops.Graph().as_default(): self._model.initialize_graph() # Evaluate the initial state as same-dtype "zero" values. These zero # constants aren't used, but are necessary for feeding to # placeholder_with_default for the "cold start" case where state is not # fed to the model. def _zeros_like_constant(tensor): return tensor_util.constant_value( array_ops.zeros_like(tensor)) start_state = nest.map_structure(_zeros_like_constant, self._model.get_start_state()) batch_size_tensor = array_ops.shape(time_placeholder)[0] for prefixed_state_name, state in ts_head_lib.state_to_dictionary( start_state).items(): state_shape_with_batch = tensor_shape.TensorShape( (default_batch_size, )).concatenate(state.shape) default_state_broadcast = array_ops.tile( state[None, ...], multiples=array_ops.concat([ batch_size_tensor[None], array_ops.ones(len(state.shape), dtype=dtypes.int32) ], axis=0)) placeholders[ prefixed_state_name] = array_ops.placeholder_with_default( input=default_state_broadcast, name=prefixed_state_name, shape=state_shape_with_batch) return export_lib.ServingInputReceiver(placeholders, placeholders)
def _init_val_initializer(shape, dtype=None, partition_info=None): del dtype, partition_info # Unused by this unit-testing initializer. return array_ops.tile( constant_op.constant([[self.init_val]], dtype=dtypes.float32), shape)
def initialize(self, name=None): finished = array_ops.tile([False], [self._batch_size]) return (finished, self._start_inputs)
def lifted_struct_loss(labels, embeddings, margin=1.0): """Computes the lifted structured loss. The loss encourages the positive distances (between a pair of embeddings with the same labels) to be smaller than any negative distances (between a pair of embeddings with different labels) in the mini-batch in a way that is differentiable with respect to the embedding vectors. See: https://arxiv.org/abs/1511.06452. Args: labels: 1-D tf.int32 `Tensor` with shape [batch_size] of multiclass integer labels. embeddings: 2-D float `Tensor` of embedding vectors. Embeddings should not be l2 normalized. margin: Float, margin term in the loss definition. Returns: lifted_loss: tf.float32 scalar. """ # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor. lshape = array_ops.shape(labels) assert lshape.shape == 1 labels = array_ops.reshape(labels, [lshape[0], 1]) # Build pairwise squared distance matrix. pairwise_distances = pairwise_distance(embeddings) # Build pairwise binary adjacency matrix. adjacency = math_ops.equal(labels, array_ops.transpose(labels)) # Invert so we can select negatives only. adjacency_not = math_ops.logical_not(adjacency) batch_size = array_ops.size(labels) diff = margin - pairwise_distances mask = math_ops.cast(adjacency_not, dtype=dtypes.float32) # Safe maximum: Temporarily shift negative distances # above zero before taking max. # this is to take the max only among negatives. row_minimums = math_ops.reduce_min(diff, 1, keep_dims=True) row_negative_maximums = math_ops.reduce_max( math_ops.multiply( diff - row_minimums, mask), 1, keep_dims=True) + row_minimums # Compute the loss. # Keep track of matrix of maximums where M_ij = max(m_i, m_j) # where m_i is the max of alpha - negative D_i's. # This matches the Caffe loss layer implementation at: # https://github.com/rksltnl/Caffe-Deep-Metric-Learning-CVPR16/blob/0efd7544a9846f58df923c8b992198ba5c355454/src/caffe/layers/lifted_struct_similarity_softmax_layer.cpp # pylint: disable=line-too-long max_elements = math_ops.maximum( row_negative_maximums, array_ops.transpose(row_negative_maximums)) diff_tiled = array_ops.tile(diff, [batch_size, 1]) mask_tiled = array_ops.tile(mask, [batch_size, 1]) max_elements_vect = array_ops.reshape( array_ops.transpose(max_elements), [-1, 1]) loss_exp_left = array_ops.reshape( math_ops.reduce_sum(math_ops.multiply( math_ops.exp( diff_tiled - max_elements_vect), mask_tiled), 1, keep_dims=True), [batch_size, batch_size]) loss_mat = max_elements + math_ops.log( loss_exp_left + array_ops.transpose(loss_exp_left)) # Add the positive distance. loss_mat += pairwise_distances mask_positives = math_ops.cast( adjacency, dtype=dtypes.float32) - array_ops.diag( array_ops.ones([batch_size])) # *0.5 for upper triangular, and another *0.5 for 1/2 factor for loss^2. num_positives = math_ops.reduce_sum(mask_positives) / 2.0 lifted_loss = math_ops.truediv( 0.25 * math_ops.reduce_sum( math_ops.square( math_ops.maximum( math_ops.multiply(loss_mat, mask_positives), 0.0))), num_positives, name='liftedstruct_loss') return lifted_loss
def triplet_semihard_loss(labels, embeddings, margin=1.0): """Computes the triplet loss with semi-hard negative mining. The loss encourages the positive distances (between a pair of embeddings with the same labels) to be smaller than the minimum negative distance among which are at least greater than the positive distance plus the margin constant (called semi-hard negative) in the mini-batch. If no such negative exists, uses the largest negative distance instead. See: https://arxiv.org/abs/1503.03832. Args: labels: 1-D tf.int32 `Tensor` with shape [batch_size] of multiclass integer labels. embeddings: 2-D float `Tensor` of embedding vectors. Embeddings should be l2 normalized. margin: Float, margin term in the loss definition. Returns: triplet_loss: tf.float32 scalar. """ # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor. lshape = array_ops.shape(labels) assert lshape.shape == 1 labels = array_ops.reshape(labels, [lshape[0], 1]) # Build pairwise squared distance matrix. pdist_matrix = pairwise_distance(embeddings, squared=True) # Build pairwise binary adjacency matrix. adjacency = math_ops.equal(labels, array_ops.transpose(labels)) # Invert so we can select negatives only. adjacency_not = math_ops.logical_not(adjacency) batch_size = array_ops.size(labels) # Compute the mask. pdist_matrix_tile = array_ops.tile(pdist_matrix, [batch_size, 1]) mask = math_ops.logical_and( array_ops.tile(adjacency_not, [batch_size, 1]), math_ops.greater( pdist_matrix_tile, array_ops.reshape( array_ops.transpose(pdist_matrix), [-1, 1]))) mask_final = array_ops.reshape( math_ops.greater( math_ops.reduce_sum( math_ops.cast( mask, dtype=dtypes.float32), 1, keep_dims=True), 0.0), [batch_size, batch_size]) mask_final = array_ops.transpose(mask_final) adjacency_not = math_ops.cast(adjacency_not, dtype=dtypes.float32) mask = math_ops.cast(mask, dtype=dtypes.float32) # negatives_outside: smallest D_an where D_an > D_ap. negatives_outside = array_ops.reshape( masked_minimum(pdist_matrix_tile, mask), [batch_size, batch_size]) negatives_outside = array_ops.transpose(negatives_outside) # negatives_inside: largest D_an. negatives_inside = array_ops.tile( masked_maximum(pdist_matrix, adjacency_not), [1, batch_size]) semi_hard_negatives = array_ops.where( mask_final, negatives_outside, negatives_inside) loss_mat = math_ops.add(margin, pdist_matrix - semi_hard_negatives) mask_positives = math_ops.cast( adjacency, dtype=dtypes.float32) - array_ops.diag( array_ops.ones([batch_size])) # In lifted-struct, the authors multiply 0.5 for upper triangular # in semihard, they take all positive pairs except the diagonal. num_positives = math_ops.reduce_sum(mask_positives) triplet_loss = math_ops.truediv( math_ops.reduce_sum( math_ops.maximum( math_ops.multiply(loss_mat, mask_positives), 0.0)), num_positives, name='triplet_semihard_loss') return triplet_loss
def update_estimate_and_tile(num_examples_per_class_seen, c): updated_examples_per_class_seen, dist = _estimate_data_distribution( c, num_examples_per_class_seen) tiled_dist = array_ops.tile(array_ops.expand_dims(dist, 0), [dist_estimation_batch_size, 1]) return updated_examples_per_class_seen, tiled_dist
def _SumGrad(op, grad): """Gradient for Sum.""" _, new_output_shape, input_shape = _ReductionGradAssist(op) tile_scaling = input_shape // new_output_shape grad = array_ops.reshape(grad, new_output_shape) return [array_ops.tile(grad, tile_scaling), None]
def __init__(self, cell, embedding, start_tokens, end_token, initial_state, beam_width, output_layer=None, length_penalty_weight=0.0): """Initialize the BeamSearchDecoder. Args: cell: An `RNNCell` instance. embedding: A callable that takes a vector tensor of `ids` (argmax ids), or the `params` argument for `embedding_lookup`. start_tokens: `int32` vector shaped `[batch_size]`, the start tokens. end_token: `int32` scalar, the token that marks end of decoding. initial_state: A (possibly nested tuple of...) tensors and TensorArrays. beam_width: Python integer, the number of beams. output_layer: (Optional) An instance of `tf.layers.Layer`, i.e., `tf.layers.Dense`. Optional layer to apply to the RNN output prior to storing the result or sampling. length_penalty_weight: Float weight to penalize length. Disabled with 0.0. Raises: TypeError: if `cell` is not an instance of `RNNCell`, or `output_layer` is not an instance of `tf.layers.Layer`. ValueError: If `start_tokens` is not a vector or `end_token` is not a scalar. """ if not rnn_cell_impl._like_rnncell(cell): # pylint: disable=protected-access raise TypeError("cell must be an RNNCell, received: %s" % type(cell)) if (output_layer is not None and not isinstance(output_layer, layers_base.Layer)): raise TypeError("output_layer must be a Layer, received: %s" % type(output_layer)) self._cell = cell self._output_layer = output_layer if callable(embedding): self._embedding_fn = embedding else: self._embedding_fn = ( lambda ids: embedding_ops.embedding_lookup(embedding, ids)) self._start_tokens = ops.convert_to_tensor(start_tokens, dtype=dtypes.int32, name="start_tokens") if self._start_tokens.get_shape().ndims != 1: raise ValueError("start_tokens must be a vector") self._end_token = ops.convert_to_tensor(end_token, dtype=dtypes.int32, name="end_token") if self._end_token.get_shape().ndims != 0: raise ValueError("end_token must be a scalar") self._batch_size = array_ops.size(start_tokens) self._beam_width = beam_width self._length_penalty_weight = length_penalty_weight self._initial_cell_state = nest.map_structure( self._maybe_split_batch_beams, initial_state, self._cell.state_size) self._start_tokens = array_ops.tile( array_ops.expand_dims(self._start_tokens, 1), [1, self._beam_width]) self._start_inputs = self._embedding_fn(self._start_tokens) self._finished = array_ops.zeros([self._batch_size, self._beam_width], dtype=dtypes.bool)
def initialize(self, name=None): finished = array_ops.tile([False], [self._batch_size]) print('finished',finished,'and shape', finished.shape) return (finished, self._start_inputs)
def training_graph(self, input_data, input_labels, random_seed, data_spec, epoch=None): """Constructs a TF graph for training a random tree. Args: input_data: A tensor or SparseTensor or placeholder for input data. input_labels: A tensor or placeholder for labels associated with input_data. random_seed: The random number generator seed to use for this tree. 0 means use the current time as the seed. data_spec: A list of tf.dtype values specifying the original types of each column. epoch: A tensor or placeholder for the epoch the training data comes from. Returns: The last op in the random tree training graph. """ epoch = [0] if epoch is None else epoch sparse_indices = [] sparse_values = [] sparse_shape = [] if isinstance(input_data, ops.SparseTensor): sparse_indices = input_data.indices sparse_values = input_data.values sparse_shape = input_data.shape input_data = [] # Count extremely random stats. (node_sums, node_squares, splits_indices, splits_sums, splits_squares, totals_indices, totals_sums, totals_squares, input_leaves) = ( self.training_ops.count_extremely_random_stats( input_data, sparse_indices, sparse_values, sparse_shape, data_spec, input_labels, self.variables.tree, self.variables.tree_thresholds, self.variables.node_to_accumulator_map, self.variables.candidate_split_features, self.variables.candidate_split_thresholds, self.variables.start_epoch, epoch, num_classes=self.params.num_output_columns, regression=self.params.regression)) node_update_ops = [] node_update_ops.append( state_ops.assign_add(self.variables.node_sums, node_sums)) splits_update_ops = [] splits_update_ops.append(self.training_ops.scatter_add_ndim( self.variables.candidate_split_sums, splits_indices, splits_sums)) splits_update_ops.append(self.training_ops.scatter_add_ndim( self.variables.accumulator_sums, totals_indices, totals_sums)) if self.params.regression: node_update_ops.append(state_ops.assign_add(self.variables.node_squares, node_squares)) splits_update_ops.append(self.training_ops.scatter_add_ndim( self.variables.candidate_split_squares, splits_indices, splits_squares)) splits_update_ops.append(self.training_ops.scatter_add_ndim( self.variables.accumulator_squares, totals_indices, totals_squares)) # Sample inputs. update_indices, feature_updates, threshold_updates = ( self.training_ops.sample_inputs( input_data, sparse_indices, sparse_values, sparse_shape, self.variables.node_to_accumulator_map, input_leaves, self.variables.candidate_split_features, self.variables.candidate_split_thresholds, split_initializations_per_input=( self.params.split_initializations_per_input), split_sampling_random_seed=random_seed)) update_features_op = state_ops.scatter_update( self.variables.candidate_split_features, update_indices, feature_updates) update_thresholds_op = state_ops.scatter_update( self.variables.candidate_split_thresholds, update_indices, threshold_updates) # Calculate finished nodes. with ops.control_dependencies(splits_update_ops): children = array_ops.squeeze(array_ops.slice( self.variables.tree, [0, 0], [-1, 1]), squeeze_dims=[1]) is_leaf = math_ops.equal(constants.LEAF_NODE, children) leaves = math_ops.to_int32(array_ops.squeeze(array_ops.where(is_leaf), squeeze_dims=[1])) finished, stale = self.training_ops.finished_nodes( leaves, self.variables.node_to_accumulator_map, self.variables.candidate_split_sums, self.variables.candidate_split_squares, self.variables.accumulator_sums, self.variables.accumulator_squares, self.variables.start_epoch, epoch, num_split_after_samples=self.params.split_after_samples, min_split_samples=self.params.min_split_samples) # Update leaf scores. non_fertile_leaves = array_ops.boolean_mask( leaves, math_ops.less(array_ops.gather( self.variables.node_to_accumulator_map, leaves), 0)) # TODO(gilberth): It should be possible to limit the number of non # fertile leaves we calculate scores for, especially since we can only take # at most array_ops.shape(finished)[0] of them. with ops.control_dependencies(node_update_ops): sums = array_ops.gather(self.variables.node_sums, non_fertile_leaves) if self.params.regression: squares = array_ops.gather(self.variables.node_squares, non_fertile_leaves) non_fertile_leaf_scores = self._variance(sums, squares) else: non_fertile_leaf_scores = self._weighted_gini(sums) # Calculate best splits. with ops.control_dependencies(splits_update_ops): split_indices = self.training_ops.best_splits( finished, self.variables.node_to_accumulator_map, self.variables.candidate_split_sums, self.variables.candidate_split_squares, self.variables.accumulator_sums, self.variables.accumulator_squares, regression=self.params.regression) # Grow tree. with ops.control_dependencies([update_features_op, update_thresholds_op]): (tree_update_indices, tree_children_updates, tree_threshold_updates, tree_depth_updates, new_eot) = ( self.training_ops.grow_tree( self.variables.end_of_tree, self.variables.tree_depths, self.variables.node_to_accumulator_map, finished, split_indices, self.variables.candidate_split_features, self.variables.candidate_split_thresholds)) tree_update_op = state_ops.scatter_update( self.variables.tree, tree_update_indices, tree_children_updates) thresholds_update_op = state_ops.scatter_update( self.variables.tree_thresholds, tree_update_indices, tree_threshold_updates) depth_update_op = state_ops.scatter_update( self.variables.tree_depths, tree_update_indices, tree_depth_updates) # TODO(thomaswc): Only update the epoch on the new leaves. new_epoch_updates = epoch * array_ops.ones_like(tree_depth_updates) epoch_update_op = state_ops.scatter_update( self.variables.start_epoch, tree_update_indices, new_epoch_updates) # Update fertile slots. with ops.control_dependencies([depth_update_op]): (node_map_updates, accumulators_cleared, accumulators_allocated) = ( self.training_ops.update_fertile_slots( finished, non_fertile_leaves, non_fertile_leaf_scores, self.variables.end_of_tree, self.variables.tree_depths, self.variables.accumulator_sums, self.variables.node_to_accumulator_map, stale, max_depth=self.params.max_depth, regression=self.params.regression)) # Ensure end_of_tree doesn't get updated until UpdateFertileSlots has # used it to calculate new leaves. gated_new_eot, = control_flow_ops.tuple([new_eot], control_inputs=[node_map_updates]) eot_update_op = state_ops.assign(self.variables.end_of_tree, gated_new_eot) updates = [] updates.append(eot_update_op) updates.append(tree_update_op) updates.append(thresholds_update_op) updates.append(epoch_update_op) updates.append(state_ops.scatter_update( self.variables.node_to_accumulator_map, array_ops.squeeze(array_ops.slice(node_map_updates, [0, 0], [1, -1]), squeeze_dims=[0]), array_ops.squeeze(array_ops.slice(node_map_updates, [1, 0], [1, -1]), squeeze_dims=[0]))) cleared_and_allocated_accumulators = array_ops.concat( 0, [accumulators_cleared, accumulators_allocated]) # Calculate values to put into scatter update for candidate counts. # Candidate split counts are always reset back to 0 for both cleared # and allocated accumulators. This means some accumulators might be doubly # reset to 0 if the were released and not allocated, then later allocated. split_values = array_ops.tile( array_ops.expand_dims(array_ops.expand_dims( array_ops.zeros_like(cleared_and_allocated_accumulators, dtype=dtypes.float32), 1), 2), [1, self.params.num_splits_to_consider, self.params.num_output_columns]) updates.append(state_ops.scatter_update( self.variables.candidate_split_sums, cleared_and_allocated_accumulators, split_values)) if self.params.regression: updates.append(state_ops.scatter_update( self.variables.candidate_split_squares, cleared_and_allocated_accumulators, split_values)) # Calculate values to put into scatter update for total counts. total_cleared = array_ops.tile( array_ops.expand_dims( math_ops.neg(array_ops.ones_like(accumulators_cleared, dtype=dtypes.float32)), 1), [1, self.params.num_output_columns]) total_reset = array_ops.tile( array_ops.expand_dims( array_ops.zeros_like(accumulators_allocated, dtype=dtypes.float32), 1), [1, self.params.num_output_columns]) accumulator_updates = array_ops.concat(0, [total_cleared, total_reset]) updates.append(state_ops.scatter_update( self.variables.accumulator_sums, cleared_and_allocated_accumulators, accumulator_updates)) if self.params.regression: updates.append(state_ops.scatter_update( self.variables.accumulator_squares, cleared_and_allocated_accumulators, accumulator_updates)) # Calculate values to put into scatter update for candidate splits. split_features_updates = array_ops.tile( array_ops.expand_dims( math_ops.neg(array_ops.ones_like( cleared_and_allocated_accumulators)), 1), [1, self.params.num_splits_to_consider]) updates.append(state_ops.scatter_update( self.variables.candidate_split_features, cleared_and_allocated_accumulators, split_features_updates)) updates += self.finish_iteration() return control_flow_ops.group(*updates)
def safe_embedding_lookup_sparse(embedding_weights, sparse_ids, sparse_weights=None, combiner="mean", default_id=None, name=None, partition_strategy="div", max_norm=None): """Lookup embedding results, accounting for invalid IDs and empty features. The partitioned embedding in `embedding_weights` must all be the same shape except for the first dimension. The first dimension is allowed to vary as the vocabulary size is not necessarily a multiple of `P`. `embedding_weights` may be a `PartitionedVariable` as returned by using `tf.compat.v1.get_variable()` with a partitioner. Invalid IDs (< 0) are pruned from input IDs and weights, as well as any IDs with non-positive weight. For an entry with no features, the embedding vector for `default_id` is returned, or the 0-vector if `default_id` is not supplied. The ids and weights may be multi-dimensional. Embeddings are always aggregated along the last dimension. Args: embedding_weights: A list of `P` float `Tensor`s or values representing partitioned embedding `Tensor`s. Alternatively, a `PartitionedVariable` created by partitioning along dimension 0. The total unpartitioned shape should be `[e_0, e_1, ..., e_m]`, where `e_0` represents the vocab size and `e_1, ..., e_m` are the embedding dimensions. sparse_ids: `SparseTensor` of shape `[d_0, d_1, ..., d_n]` containing the ids. `d_0` is typically batch size. sparse_weights: `SparseTensor` of same shape as `sparse_ids`, containing float weights corresponding to `sparse_ids`, or `None` if all weights are be assumed to be 1.0. combiner: A string specifying how to combine embedding results for each entry. Currently "mean", "sqrtn" and "sum" are supported, with "mean" the default. default_id: The id to use for an entry with no features. name: A name for this operation (optional). partition_strategy: A string specifying the partitioning strategy. Currently `"div"` and `"mod"` are supported. Default is `"div"`. max_norm: If not `None`, all embeddings are l2-normalized to max_norm before combining. Returns: Dense `Tensor` of shape `[d_0, d_1, ..., d_{n-1}, e_1, ..., e_m]`. Raises: ValueError: if `embedding_weights` is empty. """ if embedding_weights is None: raise ValueError("Missing embedding_weights %s." % embedding_weights) if isinstance(embedding_weights, variables.PartitionedVariable): embedding_weights = list(embedding_weights) # get underlying Variables. if not isinstance(embedding_weights, list): embedding_weights = [embedding_weights] if len(embedding_weights) < 1: raise ValueError("Missing embedding_weights %s." % embedding_weights) dtype = sparse_weights.dtype if sparse_weights is not None else None embedding_weights = [ w if (isinstance(w, resource_variable_ops.ResourceVariable) and dtype in (None, w.dtype)) else ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights ] with ops.name_scope(name, "embedding_lookup", embedding_weights + [sparse_ids, sparse_weights]) as scope: # Reshape higher-rank sparse ids and weights to linear segment ids. original_shape = sparse_ids.dense_shape original_rank_dim = tensor_shape.dimension_value( sparse_ids.dense_shape.get_shape()[0]) original_rank = ( array_ops.size(original_shape) if original_rank_dim is None else original_rank_dim) sparse_ids = sparse_ops.sparse_reshape(sparse_ids, [ math_ops.reduce_prod( array_ops.slice(original_shape, [0], [original_rank - 1])), array_ops.gather(original_shape, original_rank - 1) ]) if sparse_weights is not None: sparse_weights = sparse_tensor.SparseTensor(sparse_ids.indices, sparse_weights.values, sparse_ids.dense_shape) # Prune invalid ids and weights. sparse_ids, sparse_weights = _prune_invalid_ids(sparse_ids, sparse_weights) if combiner != "sum": sparse_ids, sparse_weights = _prune_invalid_weights( sparse_ids, sparse_weights) # Fill in dummy values for empty features, if necessary. sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows( sparse_ids, default_id or 0) if sparse_weights is not None: sparse_weights, _ = sparse_ops.sparse_fill_empty_rows(sparse_weights, 1.0) result = embedding_lookup_sparse( embedding_weights, sparse_ids, sparse_weights, combiner=combiner, partition_strategy=partition_strategy, name=None if default_id is None else scope, max_norm=max_norm) if default_id is None: # Broadcast is_row_empty to the same shape as embedding_lookup_result, # for use in Select. is_row_empty = array_ops.tile( array_ops.reshape(is_row_empty, [-1, 1]), array_ops.stack([1, array_ops.shape(result)[1]])) result = array_ops.where( is_row_empty, array_ops.zeros_like(result), result, name=scope) # Reshape back from linear ids back into higher-dimensional dense result. final_result = array_ops.reshape( result, array_ops.concat([ array_ops.slice( math_ops.cast(original_shape, dtypes.int32), [0], [original_rank - 1]), array_ops.slice(array_ops.shape(result), [1], [-1]) ], 0)) final_result.set_shape( tensor_shape.unknown_shape( (tensor_shape.Dimension(original_rank_dim) - 1).value).concatenate( result.get_shape()[1:])) return final_result
def __init__(self, cell, embedding, start_tokens, end_token, initial_state, beam_width, output_layer=None, length_penalty_weight=0.0, coverage_penalty_weight=0.0, reorder_tensor_arrays=True): """Initialize the BeamSearchDecoder. Args: cell: An `RNNCell` instance. embedding: A callable that takes a vector tensor of `ids` (argmax ids), or the `params` argument for `embedding_lookup`. start_tokens: `int32` vector shaped `[batch_size]`, the start tokens. end_token: `int32` scalar, the token that marks end of decoding. initial_state: A (possibly nested tuple of...) tensors and TensorArrays. beam_width: Python integer, the number of beams. output_layer: (Optional) An instance of `tf.layers.Layer`, i.e., `tf.layers.Dense`. Optional layer to apply to the RNN output prior to storing the result or sampling. length_penalty_weight: Float weight to penalize length. Disabled with 0.0. coverage_penalty_weight: Float weight to penalize the coverage of source sentence. Disabled with 0.0. reorder_tensor_arrays: If `True`, `TensorArray`s' elements within the cell state will be reordered according to the beam search path. If the `TensorArray` can be reordered, the stacked form will be returned. Otherwise, the `TensorArray` will be returned as is. Set this flag to `False` if the cell state contains `TensorArray`s that are not amenable to reordering. Raises: TypeError: if `cell` is not an instance of `RNNCell`, or `output_layer` is not an instance of `tf.layers.Layer`. ValueError: If `start_tokens` is not a vector or `end_token` is not a scalar. """ rnn_cell_impl.assert_like_rnncell("cell", cell) # pylint: disable=protected-access if (output_layer is not None and not isinstance(output_layer, layers_base.Layer)): raise TypeError("output_layer must be a Layer, received: %s" % type(output_layer)) self._cell = cell self._output_layer = output_layer self._reorder_tensor_arrays = reorder_tensor_arrays if callable(embedding): self._embedding_fn = embedding else: self._embedding_fn = ( lambda ids: embedding_ops.embedding_lookup(embedding, ids)) self._start_tokens = ops.convert_to_tensor(start_tokens, dtype=dtypes.int32, name="start_tokens") if self._start_tokens.get_shape().ndims != 1: raise ValueError("start_tokens must be a vector") self._end_token = ops.convert_to_tensor(end_token, dtype=dtypes.int32, name="end_token") if self._end_token.get_shape().ndims != 0: raise ValueError("end_token must be a scalar") self._batch_size = array_ops.size(start_tokens) self._beam_width = beam_width self._length_penalty_weight = length_penalty_weight self._coverage_penalty_weight = coverage_penalty_weight self._initial_cell_state = nest.map_structure( self._maybe_split_batch_beams, initial_state, self._cell.state_size) self._start_tokens = array_ops.tile( array_ops.expand_dims(self._start_tokens, 1), [1, self._beam_width]) self._start_inputs = self._embedding_fn(self._start_tokens) self._finished = array_ops.one_hot(array_ops.zeros([self._batch_size], dtype=dtypes.int32), depth=self._beam_width, on_value=False, off_value=True, dtype=dtypes.bool)
def training_graph(self, input_data, input_labels, random_seed, data_spec, sparse_features=None, input_weights=None): """Constructs a TF graph for training a random tree. Args: input_data: A tensor or placeholder for input data. input_labels: A tensor or placeholder for labels associated with input_data. random_seed: The random number generator seed to use for this tree. 0 means use the current time as the seed. data_spec: A data_ops.TensorForestDataSpec object specifying the original feature/columns of the data. sparse_features: A tf.SparseTensor for sparse input data. input_weights: A float tensor or placeholder holding per-input weights, or None if all inputs are to be weighted equally. Returns: The last op in the random tree training graph. """ epoch = math_ops.to_int32(get_epoch_variable()) serialized_input_spec = data_spec.SerializeToString() if input_weights is None: input_weights = [] if input_data is None: input_data = [] sparse_indices = [] sparse_values = [] sparse_shape = [] if sparse_features is not None: sparse_indices = sparse_features.indices sparse_values = sparse_features.values sparse_shape = sparse_features.dense_shape # Count extremely random stats. (node_sums, node_squares, splits_indices, splits_sums, splits_squares, totals_indices, totals_sums, totals_squares, input_leaves) = (tensor_forest_ops.count_extremely_random_stats( input_data, sparse_indices, sparse_values, sparse_shape, input_labels, input_weights, self.variables.tree, self.variables.tree_thresholds, self.variables.node_to_accumulator_map, self.variables.candidate_split_features, self.variables.candidate_split_thresholds, self.variables.start_epoch, epoch, input_spec=serialized_input_spec, num_classes=self.params.num_output_columns, regression=self.params.regression)) node_update_ops = [] node_update_ops.append( state_ops.assign_add(self.variables.node_sums, node_sums)) splits_update_ops = [] splits_update_ops.append( tensor_forest_ops.scatter_add_ndim( self.variables.candidate_split_sums, splits_indices, splits_sums)) splits_update_ops.append( tensor_forest_ops.scatter_add_ndim(self.variables.accumulator_sums, totals_indices, totals_sums)) if self.params.regression: node_update_ops.append( state_ops.assign_add(self.variables.node_squares, node_squares)) splits_update_ops.append( tensor_forest_ops.scatter_add_ndim( self.variables.candidate_split_squares, splits_indices, splits_squares)) splits_update_ops.append( tensor_forest_ops.scatter_add_ndim( self.variables.accumulator_squares, totals_indices, totals_squares)) # Sample inputs. update_indices, feature_updates, threshold_updates = ( tensor_forest_ops.sample_inputs( input_data, sparse_indices, sparse_values, sparse_shape, input_weights, self.variables.node_to_accumulator_map, input_leaves, self.variables.candidate_split_features, self.variables.candidate_split_thresholds, input_spec=serialized_input_spec, split_initializations_per_input=( self.params.split_initializations_per_input), split_sampling_random_seed=random_seed)) update_features_op = state_ops.scatter_update( self.variables.candidate_split_features, update_indices, feature_updates) update_thresholds_op = state_ops.scatter_update( self.variables.candidate_split_thresholds, update_indices, threshold_updates) # Calculate finished nodes. with ops.control_dependencies(splits_update_ops): # Passing input_leaves to finished nodes here means that nodes that # have become stale won't be deallocated until an input reaches them, # because we're trying to avoid considering every fertile node for # performance reasons. finished, stale = tensor_forest_ops.finished_nodes( input_leaves, self.variables.node_to_accumulator_map, self.variables.candidate_split_sums, self.variables.candidate_split_squares, self.variables.accumulator_sums, self.variables.accumulator_squares, self.variables.start_epoch, epoch, num_split_after_samples=self.params.split_after_samples, min_split_samples=self.params.min_split_samples, dominate_method=self.params.dominate_method, dominate_fraction=self.params.dominate_fraction) # Update leaf scores. # TODO(thomaswc): Store the leaf scores in a TopN and only update the # scores of the leaves that were touched by this batch of input. children = array_ops.squeeze(array_ops.slice(self.variables.tree, [0, 0], [-1, 1]), squeeze_dims=[1]) is_leaf = math_ops.equal(constants.LEAF_NODE, children) leaves = math_ops.to_int32( array_ops.squeeze(array_ops.where(is_leaf), squeeze_dims=[1])) non_fertile_leaves = array_ops.boolean_mask( leaves, math_ops.less( array_ops.gather(self.variables.node_to_accumulator_map, leaves), 0)) # TODO(gilberth): It should be possible to limit the number of non # fertile leaves we calculate scores for, especially since we can only take # at most array_ops.shape(finished)[0] of them. with ops.control_dependencies(node_update_ops): sums = array_ops.gather(self.variables.node_sums, non_fertile_leaves) if self.params.regression: squares = array_ops.gather(self.variables.node_squares, non_fertile_leaves) non_fertile_leaf_scores = self._variance(sums, squares) else: non_fertile_leaf_scores = self._weighted_gini(sums) # Calculate best splits. with ops.control_dependencies(splits_update_ops): split_indices = tensor_forest_ops.best_splits( finished, self.variables.node_to_accumulator_map, self.variables.candidate_split_sums, self.variables.candidate_split_squares, self.variables.accumulator_sums, self.variables.accumulator_squares, regression=self.params.regression) # Grow tree. with ops.control_dependencies( [update_features_op, update_thresholds_op]): (tree_update_indices, tree_children_updates, tree_threshold_updates, new_eot) = (tensor_forest_ops.grow_tree( self.variables.end_of_tree, self.variables.node_to_accumulator_map, finished, split_indices, self.variables.candidate_split_features, self.variables.candidate_split_thresholds)) tree_update_op = state_ops.scatter_update(self.variables.tree, tree_update_indices, tree_children_updates) thresholds_update_op = state_ops.scatter_update( self.variables.tree_thresholds, tree_update_indices, tree_threshold_updates) # TODO(thomaswc): Only update the epoch on the new leaves. new_epoch_updates = epoch * array_ops.ones_like( tree_threshold_updates, dtype=dtypes.int32) epoch_update_op = state_ops.scatter_update( self.variables.start_epoch, tree_update_indices, new_epoch_updates) # Update fertile slots. with ops.control_dependencies([tree_update_op]): (n2a_map_updates, a2n_map_updates, accumulators_cleared, accumulators_allocated) = (tensor_forest_ops.update_fertile_slots( finished, non_fertile_leaves, non_fertile_leaf_scores, self.variables.end_of_tree, self.variables.accumulator_sums, self.variables.node_to_accumulator_map, stale, self.variables.node_sums, regression=self.params.regression)) # Ensure end_of_tree doesn't get updated until UpdateFertileSlots has # used it to calculate new leaves. gated_new_eot, = control_flow_ops.tuple( [new_eot], control_inputs=[n2a_map_updates]) eot_update_op = state_ops.assign(self.variables.end_of_tree, gated_new_eot) updates = [] updates.append(eot_update_op) updates.append(tree_update_op) updates.append(thresholds_update_op) updates.append(epoch_update_op) updates.append( state_ops.scatter_update(self.variables.node_to_accumulator_map, n2a_map_updates[0], n2a_map_updates[1])) updates.append( state_ops.scatter_update(self.variables.accumulator_to_node_map, a2n_map_updates[0], a2n_map_updates[1])) cleared_and_allocated_accumulators = array_ops.concat_v2( [accumulators_cleared, accumulators_allocated], 0) # Calculate values to put into scatter update for candidate counts. # Candidate split counts are always reset back to 0 for both cleared # and allocated accumulators. This means some accumulators might be doubly # reset to 0 if the were released and not allocated, then later allocated. split_values = array_ops.tile( array_ops.expand_dims( array_ops.expand_dims( array_ops.zeros_like(cleared_and_allocated_accumulators, dtype=dtypes.float32), 1), 2), [ 1, self.params.num_splits_to_consider, self.params.num_output_columns ]) updates.append( state_ops.scatter_update(self.variables.candidate_split_sums, cleared_and_allocated_accumulators, split_values)) if self.params.regression: updates.append( state_ops.scatter_update( self.variables.candidate_split_squares, cleared_and_allocated_accumulators, split_values)) # Calculate values to put into scatter update for total counts. total_cleared = array_ops.tile( array_ops.expand_dims( math_ops.negative( array_ops.ones_like(accumulators_cleared, dtype=dtypes.float32)), 1), [1, self.params.num_output_columns]) total_reset = array_ops.tile( array_ops.expand_dims( array_ops.zeros_like(accumulators_allocated, dtype=dtypes.float32), 1), [1, self.params.num_output_columns]) accumulator_updates = array_ops.concat_v2([total_cleared, total_reset], 0) updates.append( state_ops.scatter_update(self.variables.accumulator_sums, cleared_and_allocated_accumulators, accumulator_updates)) if self.params.regression: updates.append( state_ops.scatter_update(self.variables.accumulator_squares, cleared_and_allocated_accumulators, accumulator_updates)) # Calculate values to put into scatter update for candidate splits. split_features_updates = array_ops.tile( array_ops.expand_dims( math_ops.negative( array_ops.ones_like(cleared_and_allocated_accumulators)), 1), [1, self.params.num_splits_to_consider]) updates.append( state_ops.scatter_update(self.variables.candidate_split_features, cleared_and_allocated_accumulators, split_features_updates)) updates += self.finish_iteration() return control_flow_ops.group(*updates)
def update_confusion_matrix_variables(variables_to_update, y_true, y_pred, thresholds, top_k=None, class_id=None, sample_weight=None, multi_label=False, label_weights=None): """Returns op to update the given confusion matrix variables. For every pair of values in y_true and y_pred: true_positive: y_true == True and y_pred > thresholds false_negatives: y_true == True and y_pred <= thresholds true_negatives: y_true == False and y_pred <= thresholds false_positive: y_true == False and y_pred > thresholds The results will be weighted and added together. When multiple thresholds are provided, we will repeat the same for every threshold. For estimation of these metrics over a stream of data, the function creates an `update_op` operation that updates the given variables. If `sample_weight` is `None`, weights default to 1. Use weights of 0 to mask values. Args: variables_to_update: Dictionary with 'tp', 'fn', 'tn', 'fp' as valid keys and corresponding variables to update as values. y_true: A `Tensor` whose shape matches `y_pred`. Will be cast to `bool`. y_pred: A floating point `Tensor` of arbitrary shape and whose values are in the range `[0, 1]`. thresholds: A float value, float tensor, python list, or tuple of float thresholds in `[0, 1]`, or NEG_INF (used when top_k is set). top_k: Optional int, indicates that the positive labels should be limited to the top k predictions. class_id: Optional int, limits the prediction and labels to the class specified by this argument. sample_weight: Optional `Tensor` whose rank is either 0, or the same rank as `y_true`, and must be broadcastable to `y_true` (i.e., all dimensions must be either `1`, or the same as the corresponding `y_true` dimension). multi_label: Optional boolean indicating whether multidimensional prediction/labels should be treated as multilabel responses, or flattened into a single label. When True, the valus of `variables_to_update` must have a second dimension equal to the number of labels in y_true and y_pred, and those tensors must not be RaggedTensors. label_weights: (optional) tensor of non-negative weights for multilabel data. The weights are applied when calculating TP, FP, FN, and TN without explicit multilabel handling (i.e. when the data is to be flattened). Returns: Update op. Raises: ValueError: If `y_pred` and `y_true` have mismatched shapes, or if `sample_weight` is not `None` and its shape doesn't match `y_pred`, or if `variables_to_update` contains invalid keys. """ if multi_label and label_weights is not None: raise ValueError( '`label_weights` for multilabel data should be handled ' 'outside of `update_confusion_matrix_variables` when ' '`multi_label` is True.') if variables_to_update is None: return if not any(key for key in variables_to_update if key in list(ConfusionMatrix)): raise ValueError( 'Please provide at least one valid confusion matrix ' 'variable to update. Valid variable key options are: "{}". ' 'Received: "{}"'.format(list(ConfusionMatrix), variables_to_update.keys())) variable_dtype = list(variables_to_update.values())[0].dtype y_true = math_ops.cast(y_true, dtype=variable_dtype) y_pred = math_ops.cast(y_pred, dtype=variable_dtype) thresholds = ops.convert_to_tensor_v2_with_dispatch(thresholds, dtype=variable_dtype) num_thresholds = thresholds.shape[0] if multi_label: one_thresh = math_ops.equal(math_ops.cast(1, dtype=dtypes.int32), array_ops.rank(thresholds), name='one_set_of_thresholds_cond') else: [y_pred, y_true ], _ = ragged_assert_compatible_and_get_flat_values([y_pred, y_true], sample_weight) one_thresh = math_ops.cast(True, dtype=dtypes.bool) invalid_keys = [ key for key in variables_to_update if key not in list(ConfusionMatrix) ] if invalid_keys: raise ValueError( 'Invalid keys: {}. Valid variable key options are: "{}"'.format( invalid_keys, list(ConfusionMatrix))) with ops.control_dependencies([ check_ops.assert_greater_equal(y_pred, math_ops.cast(0.0, dtype=y_pred.dtype), message='predictions must be >= 0'), check_ops.assert_less_equal(y_pred, math_ops.cast(1.0, dtype=y_pred.dtype), message='predictions must be <= 1') ]): if sample_weight is None: y_pred, y_true = losses_utils.squeeze_or_expand_dimensions( y_pred, y_true) else: sample_weight = math_ops.cast(sample_weight, dtype=variable_dtype) y_pred, y_true, sample_weight = ( losses_utils.squeeze_or_expand_dimensions( y_pred, y_true, sample_weight=sample_weight)) y_pred.shape.assert_is_compatible_with(y_true.shape) if top_k is not None: y_pred = _filter_top_k(y_pred, top_k) if class_id is not None: y_true = y_true[..., class_id] y_pred = y_pred[..., class_id] pred_shape = array_ops.shape(y_pred) num_predictions = pred_shape[0] if y_pred.shape.ndims == 1: num_labels = 1 else: num_labels = gen_math_ops.Prod(input=pred_shape[1:], axis=0) thresh_label_tile = control_flow_ops.cond( one_thresh, lambda: num_labels, lambda: math_ops.cast(1, dtype=dtypes.int32)) # Reshape predictions and labels, adding a dim for thresholding. if multi_label: predictions_extra_dim = array_ops.expand_dims(y_pred, 0) labels_extra_dim = array_ops.expand_dims( math_ops.cast(y_true, dtype=dtypes.bool), 0) else: # Flatten predictions and labels when not multilabel. predictions_extra_dim = array_ops.reshape(y_pred, [1, -1]) labels_extra_dim = array_ops.reshape( math_ops.cast(y_true, dtype=dtypes.bool), [1, -1]) # Tile the thresholds for every prediction. if multi_label: thresh_pretile_shape = [num_thresholds, 1, -1] thresh_tiles = [1, num_predictions, thresh_label_tile] data_tiles = [num_thresholds, 1, 1] else: thresh_pretile_shape = [num_thresholds, -1] thresh_tiles = [1, num_predictions * num_labels] data_tiles = [num_thresholds, 1] thresh_tiled = array_ops.tile( array_ops.reshape(thresholds, thresh_pretile_shape), array_ops.stack(thresh_tiles)) # Tile the predictions for every threshold. preds_tiled = array_ops.tile(predictions_extra_dim, data_tiles) # Compare predictions and threshold. pred_is_pos = math_ops.greater(preds_tiled, thresh_tiled) # Tile labels by number of thresholds label_is_pos = array_ops.tile(labels_extra_dim, data_tiles) if sample_weight is not None: sample_weight = weights_broadcast_ops.broadcast_weights( math_ops.cast(sample_weight, dtype=variable_dtype), y_pred) weights_tiled = array_ops.tile( array_ops.reshape(sample_weight, thresh_tiles), data_tiles) else: weights_tiled = None if label_weights is not None and not multi_label: label_weights = array_ops.expand_dims(label_weights, 0) label_weights = weights_broadcast_ops.broadcast_weights( label_weights, y_pred) label_weights_tiled = array_ops.tile( array_ops.reshape(label_weights, thresh_tiles), data_tiles) if weights_tiled is None: weights_tiled = label_weights_tiled else: weights_tiled = math_ops.multiply(weights_tiled, label_weights_tiled) update_ops = [] def weighted_assign_add(label, pred, weights, var): label_and_pred = math_ops.cast(math_ops.logical_and(label, pred), dtype=var.dtype) if weights is not None: label_and_pred *= math_ops.cast(weights, dtype=var.dtype) return var.assign_add(math_ops.reduce_sum(label_and_pred, 1)) loop_vars = { ConfusionMatrix.TRUE_POSITIVES: (label_is_pos, pred_is_pos), } update_tn = ConfusionMatrix.TRUE_NEGATIVES in variables_to_update update_fp = ConfusionMatrix.FALSE_POSITIVES in variables_to_update update_fn = ConfusionMatrix.FALSE_NEGATIVES in variables_to_update if update_fn or update_tn: pred_is_neg = math_ops.logical_not(pred_is_pos) loop_vars[ConfusionMatrix.FALSE_NEGATIVES] = (label_is_pos, pred_is_neg) if update_fp or update_tn: label_is_neg = math_ops.logical_not(label_is_pos) loop_vars[ConfusionMatrix.FALSE_POSITIVES] = (label_is_neg, pred_is_pos) if update_tn: loop_vars[ConfusionMatrix.TRUE_NEGATIVES] = (label_is_neg, pred_is_neg) for matrix_cond, (label, pred) in loop_vars.items(): if matrix_cond in variables_to_update: update_ops.append( weighted_assign_add(label, pred, weights_tiled, variables_to_update[matrix_cond])) return control_flow_ops.group(update_ops)
def create_estimator_spec(self, features, mode, logits, labels=None, train_op_fn=None): """See `Head`.""" with ops.name_scope('head'): logits = _check_logits(logits, self.logits_dimension) # Predict. pred_keys = prediction_keys.PredictionKeys with ops.name_scope(None, 'predictions', (logits, )): # class_ids's shape is [batch_size] class_ids = math_ops.argmax(logits, 1, name=pred_keys.CLASS_IDS) class_ids = array_ops.expand_dims(class_ids, axis=(1, )) if self._label_vocabulary: table = lookup_ops.index_to_string_table_from_tensor( vocabulary_list=self._label_vocabulary, name='class_string_lookup') classes = table.lookup(class_ids) else: classes = string_ops.as_string(class_ids, name='str_classes') probabilities = nn.softmax(logits, name=pred_keys.PROBABILITIES) predictions = { pred_keys.LOGITS: logits, pred_keys.PROBABILITIES: probabilities, # Expand to [batch_size, 1] pred_keys.CLASS_IDS: class_ids, pred_keys.CLASSES: classes, } if mode == model_fn.ModeKeys.PREDICT: batch_size = array_ops.shape(probabilities)[0] export_class_list = self._label_vocabulary if not export_class_list: export_class_list = string_ops.as_string( math_ops.range(self._n_classes)) export_output_classes = array_ops.tile( input=array_ops.expand_dims(input=export_class_list, axis=0), multiples=[batch_size, 1]) return model_fn.EstimatorSpec( mode=model_fn.ModeKeys.PREDICT, predictions=predictions, export_outputs={ '': export_output.ClassificationOutput( scores=probabilities, # `ClassificationOutput` requires string classes. classes=export_output_classes) }) # Eval. unweighted_loss, label_ids = self.create_loss(features=features, mode=mode, logits=logits, labels=labels) weights = _weights(features, self._weight_column) training_loss = losses.compute_weighted_loss( unweighted_loss, weights=weights, reduction=losses.Reduction.SUM) if mode == model_fn.ModeKeys.EVAL: return model_fn.EstimatorSpec( mode=model_fn.ModeKeys.EVAL, predictions=predictions, loss=training_loss, eval_metric_ops=self._eval_metric_ops( labels=label_ids, probabilities=probabilities, logits=logits, class_ids=class_ids, unweighted_loss=unweighted_loss, weights=weights)) # Train. if train_op_fn is None: raise ValueError('train_op_fn can not be None.') with ops.name_scope(''): summary.scalar( _summary_key(self._head_name, metric_keys.MetricKeys.LOSS), training_loss) summary.scalar( _summary_key(self._head_name, metric_keys.MetricKeys.LOSS_MEAN), losses.compute_weighted_loss(unweighted_loss, weights=weights, reduction=losses.Reduction.MEAN)) return model_fn.EstimatorSpec(mode=model_fn.ModeKeys.TRAIN, predictions=predictions, loss=training_loss, train_op=train_op_fn(training_loss))
def _MatrixSquareRootGrad(op, grad): """Gradient for MatrixSquareRoot.""" # Let A be an m x m square matrix (or batch of matrices) # Let R = sqrtm(A) # By definition, A = RR # Take the differential: dA = d(RR) = RdR + dRR # Solve the resulting Sylvester equation for dR # Used to find Kronecker products within the Sylvester equation def _KroneckerProduct(b1, b2): """Computes the Kronecker product of two batches of square matrices""" b1_shape = array_ops.shape(b1) b2_shape = array_ops.shape(b2) b1_order = b1_shape[-1] b2_order = b2_shape[-1] shape_slice_size = [math_ops.subtract(array_ops.size(b1_shape), 2)] shape_slice = array_ops.slice( b1_shape, [0], shape_slice_size) # Same for both batches b1_reshape_shape = array_ops.concat( [shape_slice, [b1_order], [1], [b1_order], [1]], 0) b2_reshape_shape = array_ops.concat( [shape_slice, [1], [b2_order], [1], [b2_order]], 0) b1_reshape = array_ops.reshape(b1, b1_reshape_shape) b2_reshape = array_ops.reshape(b2, b2_reshape_shape) order_prod = b1_order * b2_order kprod_shape = array_ops.concat( [shape_slice, [order_prod], [order_prod]], 0) return array_ops.reshape(b1_reshape * b2_reshape, kprod_shape) sqrtm = op.outputs[0] # R shape = array_ops.shape(sqrtm) order = shape[-1] # m matrix_count = math_ops.reduce_prod(shape[0:-2]) # Get batch of m x m identity matrices eye = linalg_ops.eye(order, dtype=sqrtm.dtype) # m x m identity matrix eye_flat = array_ops.reshape(eye, [-1]) eye_tiled = array_ops.tile(eye_flat, [matrix_count]) eye_batch = array_ops.reshape(eye_tiled, shape) # The transpose of R is taken in the k1 term instead of k2 in # order to prevent redundant transposition of R (i.e. (R')' = R) sqrtm_transpose = array_ops.matrix_transpose(sqrtm) k1 = _KroneckerProduct(eye_batch, sqrtm_transpose) k2 = _KroneckerProduct(sqrtm, eye_batch) ksum = math_ops.add(k1, k2) # Vectorize dA shape_slice_size = [math_ops.subtract(array_ops.size(shape), 2)] shape_slice = array_ops.slice(shape, [0], shape_slice_size) shape_vec_da = array_ops.concat([shape_slice, [order * order], [1]], 0) vec_da = array_ops.reshape(array_ops.matrix_transpose(grad), shape_vec_da) # Solve for vec(dR) vec_dsqrtm = linalg_ops.matrix_solve(ksum, vec_da) # Solve for dR by inverse vectorizing vec(dR) dsqrtm_transpose = array_ops.reshape(vec_dsqrtm, shape) return array_ops.matrix_transpose(dsqrtm_transpose)
def safe_embedding_lookup_sparse( embedding_weights, sparse_ids, sparse_weights=None, combiner="mean", default_id=None, name="safe_embedding_lookup_sparse", partition_strategy=None, # no used max_norm=None, return_trainable=False, ): """Provides a dynamic version of `tf.nn.safe_embedding_lookup_sparse`. Lookup embedding results, accounting for empty features and invalid weights. Any IDs will be treated as valid include non-positive IDs. Invalid weights (<= 0) are pruned from input weights, as well as any IDs with non-positive weight. For an entry with no features, the embedding vector for `default_id` is returned, or the 0-vector if `default_id` is not supplied. The ids and weights may be multi-dimensional. Embeddings are always aggregated along the last dimension. Args: embedding_weights: A single `dynamic_embedding.Variable` instance representing the complete embedding tensor. sparse_ids: `SparseTensor` of shape `[d_0, d_1, ..., d_n]` containing the ids. `d_0` is typically batch size. sparse_weights: `SparseTensor` of same shape as `sparse_ids`, containing float weights corresponding to `sparse_ids`, or `None` if all weights are be assumed to be 1.0. combiner: A string specifying how to combine embedding results for each entry. Currently "mean", "sqrtn" and "sum" are supported, with "mean" the default. default_id: The id to use for an entry with no features. name: A name for this operation (optional). partition_strategy: A string specifying the partitioning strategy. Currently `"div"` and `"mod"` are supported. Default is `"div"`. max_norm: If not `None`, all embeddings are l2-normalized to max_norm before combining. Returns: combined_embeddings: A dense `Tensor` of shape `[d_0, d_1, ..., d_{n-1}, e_1, ..., e_m]`. trainable_wrap: A TrainableWrapper object used to fill the Optimizers `var_list` Only provided if `return_trainable` is True. Raises: ValueError: if `embedding_weights` is empty. """ if embedding_weights is None: raise ValueError("Missing embedding_weights %s." % embedding_weights) if embedding_weights.key_dtype != sparse_ids.dtype: raise TypeError( "embedding_weights.key_dtype should be same with sparse_ids.dtype: " "{} vs. {}".format(embedding_weights.key_dtype, sparse_ids.dtype)) weights_dtype = sparse_weights.dtype if sparse_weights is not None else None if weights_dtype and embedding_weights.value_dtype != weights_dtype: raise TypeError( "embedding_weights.value_dtype should be same with sparse_weights.dtype" ": {} vs. {}".format(embedding_weights.value_dtype, weights_dtype)) scope = variable_scope.get_variable_scope() full_name = scope.name + "/" + name if scope.name else name with ops.name_scope(full_name + "/"): # Reshape higher-rank sparse ids and weights to linear segment ids. original_shape = sparse_ids.dense_shape original_rank_dim = tensor_shape.dimension_value( sparse_ids.dense_shape.get_shape()[0]) original_rank = (array_ops.size(original_shape) if original_rank_dim is None else original_rank_dim) sparse_ids = sparse_ops.sparse_reshape( sparse_ids, [ math_ops.reduce_prod( array_ops.slice(original_shape, [0], [original_rank - 1])), array_ops.gather(original_shape, original_rank - 1), ], ) if sparse_weights is not None: sparse_weights = sparse_tensor.SparseTensor( sparse_ids.indices, sparse_weights.values, sparse_ids.dense_shape) # Prune invalid weights. if combiner != "sum": sparse_ids, sparse_weights = _prune_invalid_weights( sparse_ids, sparse_weights) # Fill in dummy values for empty features, if necessary. sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows( sparse_ids, default_id or 0) if sparse_weights is not None: sparse_weights, _ = sparse_ops.sparse_fill_empty_rows( sparse_weights, 1.0) result, trainable_ = embedding_lookup_sparse( embedding_weights, sparse_ids, sparse_weights, combiner=combiner, partition_strategy=partition_strategy, name=name + "/embedding_lookup_sparse", max_norm=max_norm, return_trainable=True, ) if default_id is None: # Broadcast is_row_empty to the same shape as embedding_lookup_result, # for use in Select. is_row_empty = array_ops.tile( array_ops.reshape(is_row_empty, [-1, 1]), array_ops.stack([1, array_ops.shape(result)[1]]), ) result = array_ops.where(is_row_empty, array_ops.zeros_like(result), result, name="where") # Reshape back from linear ids back into higher-dimensional dense result. final_result = array_ops.reshape( result, array_ops.concat( [ array_ops.slice( math_ops.cast(original_shape, dtypes.int32), [0], [original_rank - 1], ), array_ops.slice(array_ops.shape(result), [1], [-1]), ], 0, ), ) final_result.set_shape( tensor_shape.unknown_shape( (tensor_shape.Dimension(original_rank_dim) - 1).value).concatenate(result.get_shape()[1:])) return (final_result, trainable_) if return_trainable else final_result
def loop_fn(i): x1 = array_ops.gather(x, i) return array_ops.tile(x1, [i, 1])
def batch_matrix_pow(matrices, powers): """Compute powers of matrices, e.g. A^3 = matmul(matmul(A, A), A). Uses exponentiation by squaring, with O(log(p)) matrix multiplications to compute A^p. Args: matrices: [batch size x N x N] powers: Which integer power to raise each matrix to [batch size] Returns: The matrices raised to their respective powers, same dimensions as the "matrices" argument. """ def terminate_when_all_zero(current_argument, residual_powers, accumulator): del current_argument, accumulator # not used for condition do_exit = math_ops.reduce_any( math_ops.greater(residual_powers, array_ops.ones_like(residual_powers))) return do_exit def do_iteration(current_argument, residual_powers, accumulator): """Compute one step of iterative exponentiation by squaring. The recursive form is: power(A, p) = { power(matmul(A, A), p / 2) for even p { matmul(A, power(matmul(A, A), (p - 1) / 2)) for odd p power(A, 0) = I The power(A, 0) = I case is handeled by starting with accumulator set to the identity matrix; matrices with zero residual powers are passed through unchanged. Args: current_argument: On this step, what is the first argument (A^2..^2) to the (unrolled) recursive function? [batch size x N x N] residual_powers: On this step, what is the second argument (residual p)? [batch_size] accumulator: Accumulates the exterior multiplications from the odd powers (initially the identity matrix). [batch_size x N x N] Returns: Updated versions of each argument for one step of the unrolled computation. Does not change parts of the batch which have a residual power of zero. """ is_even = math_ops.equal( residual_powers % 2, array_ops.zeros(array_ops.shape(residual_powers), dtype=dtypes.int32)) new_accumulator = array_ops.where( is_even, accumulator, math_ops.matmul(accumulator, current_argument)) new_argument = math_ops.matmul(current_argument, current_argument) do_update = math_ops.greater(residual_powers, 1) new_residual_powers = residual_powers - residual_powers % 2 new_residual_powers //= 2 # Stop updating if we've reached our base case; some batch elements may # finish sooner than others accumulator = array_ops.where(do_update, new_accumulator, accumulator) current_argument = array_ops.where(do_update, new_argument, current_argument) residual_powers = array_ops.where(do_update, new_residual_powers, residual_powers) return (current_argument, residual_powers, accumulator) matrices = ops.convert_to_tensor(matrices) powers = math_ops.cast(powers, dtype=dtypes.int32) ident = array_ops.expand_dims( array_ops.diag( array_ops.ones([array_ops.shape(matrices)[1]], dtype=matrices.dtype)), 0) ident_tiled = array_ops.tile(ident, [array_ops.shape(matrices)[0], 1, 1]) (final_argument, final_residual_power, final_accumulator) = control_flow_ops.while_loop( terminate_when_all_zero, do_iteration, [matrices, powers, ident_tiled]) return array_ops.where( math_ops.equal( final_residual_power, array_ops.zeros_like(final_residual_power, dtype=dtypes.int32)), ident_tiled, math_ops.matmul(final_argument, final_accumulator))
def _sample_n(self, n, seed=None): with ops.control_dependencies(self._assertions): n = ops.convert_to_tensor(n, name="n") static_n = tensor_util.constant_value(n) n = int(static_n) if static_n is not None else n cat_samples = self.cat.sample_n(n, seed=seed) static_samples_shape = cat_samples.get_shape() if static_samples_shape.is_fully_defined(): samples_shape = static_samples_shape.as_list() samples_size = static_samples_shape.num_elements() else: samples_shape = array_ops.shape(cat_samples) samples_size = array_ops.size(cat_samples) static_batch_shape = self.get_batch_shape() if static_batch_shape.is_fully_defined(): batch_shape = static_batch_shape.as_list() batch_size = static_batch_shape.num_elements() else: batch_shape = self.batch_shape() batch_size = array_ops.reduce_prod(batch_shape) static_event_shape = self.get_event_shape() if static_event_shape.is_fully_defined(): event_shape = np.array(static_event_shape.as_list(), dtype=np.int32) else: event_shape = self.event_shape() # Get indices into the raw cat sampling tensor. We will # need these to stitch sample values back out after sampling # within the component partitions. samples_raw_indices = array_ops.reshape( math_ops.range(0, samples_size), samples_shape) # Partition the raw indices so that we can use # dynamic_stitch later to reconstruct the samples from the # known partitions. partitioned_samples_indices = data_flow_ops.dynamic_partition( data=samples_raw_indices, partitions=cat_samples, num_partitions=self.num_components) # Copy the batch indices n times, as we will need to know # these to pull out the appropriate rows within the # component partitions. batch_raw_indices = array_ops.reshape( array_ops.tile(math_ops.range(0, batch_size), [n]), samples_shape) # Explanation of the dynamic partitioning below: # batch indices are i.e., [0, 1, 0, 1, 0, 1] # Suppose partitions are: # [1 1 0 0 1 1] # After partitioning, batch indices are cut as: # [batch_indices[x] for x in 2, 3] # [batch_indices[x] for x in 0, 1, 4, 5] # i.e. # [1 1] and [0 0 0 0] # Now we sample n=2 from part 0 and n=4 from part 1. # For part 0 we want samples from batch entries 1, 1 (samples 0, 1), # and for part 1 we want samples from batch entries 0, 0, 0, 0 # (samples 0, 1, 2, 3). partitioned_batch_indices = data_flow_ops.dynamic_partition( data=batch_raw_indices, partitions=cat_samples, num_partitions=self.num_components) samples_class = [None for _ in range(self.num_components)] for c in range(self.num_components): n_class = array_ops.size(partitioned_samples_indices[c]) seed = distribution_util.gen_new_seed(seed, "mixture") samples_class_c = self.components[c].sample_n(n_class, seed=seed) # Pull out the correct batch entries from each index. # To do this, we may have to flatten the batch shape. # For sample s, batch element b of component c, we get the # partitioned batch indices from # partitioned_batch_indices[c]; and shift each element by # the sample index. The final lookup can be thought of as # a matrix gather along locations (s, b) in # samples_class_c where the n_class rows correspond to # samples within this component and the batch_size columns # correspond to batch elements within the component. # # Thus the lookup index is # lookup[c, i] = batch_size * s[i] + b[c, i] # for i = 0 ... n_class[c] - 1. lookup_partitioned_batch_indices = ( batch_size * math_ops.range(n_class) + partitioned_batch_indices[c]) samples_class_c = array_ops.reshape( samples_class_c, array_ops.concat_v2(([n_class * batch_size], event_shape), 0)) samples_class_c = array_ops.gather( samples_class_c, lookup_partitioned_batch_indices, name="samples_class_c_gather") samples_class[c] = samples_class_c # Stitch back together the samples across the components. lhs_flat_ret = data_flow_ops.dynamic_stitch( indices=partitioned_samples_indices, data=samples_class) # Reshape back to proper sample, batch, and event shape. ret = array_ops.reshape( lhs_flat_ret, array_ops.concat_v2((samples_shape, self.event_shape()), 0)) ret.set_shape( tensor_shape.TensorShape(static_samples_shape).concatenate( self.get_event_shape())) return ret
def matrix_to_powers(matrix, powers): """Raise a single matrix to multiple powers.""" matrix_tiled = array_ops.tile(array_ops.expand_dims(matrix, 0), [array_ops.size(powers), 1, 1]) return batch_matrix_pow(matrix_tiled, powers)
def initialize(self, name=None): with ops.name_scope(name, "TrainingHelperInitialize"): finished = array_ops.tile([False], [self._batch_size]) all_finished = math_ops.reduce_all(finished) next_inputs = self._embedding_fn(self._input_tas.read(0)) return (finished, next_inputs)
def _find_loss_augmented_facility_idx(pairwise_distances, labels, chosen_ids, candidate_ids, margin_multiplier, margin_type): """Find the next centroid that maximizes the loss augmented inference. This function is a subroutine called from compute_augmented_facility_locations Args: pairwise_distances: 2-D Tensor of pairwise distances. labels: 1-D Tensor of ground truth cluster assignment. chosen_ids: 1-D Tensor of current centroid indices. candidate_ids: 1-D Tensor of candidate indices. margin_multiplier: multiplication constant. margin_type: Type of structured margin to use. Default is nmi. Returns: integer index. """ num_candidates = array_ops.shape(candidate_ids)[0] pairwise_distances_chosen = array_ops.gather(pairwise_distances, chosen_ids) pairwise_distances_candidate = array_ops.gather( pairwise_distances, candidate_ids) pairwise_distances_chosen_tile = array_ops.tile( pairwise_distances_chosen, [1, num_candidates]) candidate_scores = -1.0 * math_ops.reduce_sum( array_ops.reshape( math_ops.reduce_min( array_ops.concat([ pairwise_distances_chosen_tile, array_ops.reshape(pairwise_distances_candidate, [1, -1]) ], 0), axis=0, keep_dims=True), [num_candidates, -1]), axis=1) nmi_scores = array_ops.zeros([num_candidates]) iteration = array_ops.constant(0) def func_cond(iteration, nmi_scores): del nmi_scores # Unused in func_cond() return iteration < num_candidates def func_body(iteration, nmi_scores): predictions = get_cluster_assignment( pairwise_distances, array_ops.concat([chosen_ids, [candidate_ids[iteration]]], 0)) nmi_score_i = compute_clustering_score(labels, predictions, margin_type) pad_before = array_ops.zeros([iteration]) pad_after = array_ops.zeros([num_candidates - 1 - iteration]) # return 1 - NMI score as the structured loss. # because NMI is higher the better [0,1]. return iteration + 1, nmi_scores + array_ops.concat( [pad_before, [1.0 - nmi_score_i], pad_after], 0) _, nmi_scores = control_flow_ops.while_loop( func_cond, func_body, [iteration, nmi_scores]) candidate_scores = math_ops.add( candidate_scores, margin_multiplier * nmi_scores) argmax_index = math_ops.to_int32( math_ops.argmax(candidate_scores, dimension=0)) return candidate_ids[argmax_index]
def triplet_loss_adapted_from_tf(y_true, y_pred): del y_true margin = 1. labels = y_pred[:, :1] labels = tf.cast(labels, dtype='int32') embeddings = y_pred[:, 1:] ### Code from Tensorflow function [tf.contrib.losses.metric_learning.triplet_semihard_loss] starts here: # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor. # lshape=array_ops.shape(labels) # assert lshape.shape == 1 # labels = array_ops.reshape(labels, [lshape[0], 1]) # Build pairwise squared distance matrix. pdist_matrix = pairwise_distance(embeddings, squared=True) # Build pairwise binary adjacency matrix. adjacency = math_ops.equal(labels, array_ops.transpose(labels)) # Invert so we can select negatives only. adjacency_not = math_ops.logical_not(adjacency) # global batch_size batch_size = array_ops.size(labels) # was 'array_ops.size(labels)' # Compute the mask. pdist_matrix_tile = array_ops.tile(pdist_matrix, [batch_size, 1]) mask = math_ops.logical_and( array_ops.tile(adjacency_not, [batch_size, 1]), math_ops.greater( pdist_matrix_tile, array_ops.reshape(array_ops.transpose(pdist_matrix), [-1, 1]))) mask_final = array_ops.reshape( math_ops.greater( math_ops.reduce_sum(math_ops.cast(mask, dtype=dtypes.float32), 1, keepdims=True), 0.0), [batch_size, batch_size]) mask_final = array_ops.transpose(mask_final) adjacency_not = math_ops.cast(adjacency_not, dtype=dtypes.float32) mask = math_ops.cast(mask, dtype=dtypes.float32) # negatives_outside: smallest D_an where D_an > D_ap. negatives_outside = array_ops.reshape( masked_minimum(pdist_matrix_tile, mask), [batch_size, batch_size]) negatives_outside = array_ops.transpose(negatives_outside) # negatives_inside: largest D_an. negatives_inside = array_ops.tile( masked_maximum(pdist_matrix, adjacency_not), [1, batch_size]) semi_hard_negatives = array_ops.where(mask_final, negatives_outside, negatives_inside) loss_mat = math_ops.add(margin, pdist_matrix - semi_hard_negatives) mask_positives = math_ops.cast(adjacency, dtype=dtypes.float32) - array_ops.diag( array_ops.ones([batch_size])) # In lifted-struct, the authors multiply 0.5 for upper triangular # in semihard, they take all positive pairs except the diagonal. num_positives = math_ops.reduce_sum(mask_positives) semi_hard_triplet_loss_distance = math_ops.truediv( math_ops.reduce_sum( math_ops.maximum(math_ops.multiply(loss_mat, mask_positives), 0.0)), num_positives, name='triplet_semihard_loss') ### Code from Tensorflow function semi-hard triplet loss ENDS here. return semi_hard_triplet_loss_distance