def testConstraints(self): # Conv1D k_constraint = lambda x: x / math_ops.reduce_sum(x) b_constraint = lambda x: x / math_ops.reduce_max(x) conv1d = conv_layers.Conv1D(2, 3, kernel_constraint=k_constraint, bias_constraint=b_constraint) inputs = random_ops.random_uniform((5, 3, 5), seed=1) conv1d(inputs) self.assertEqual(conv1d.kernel_constraint, k_constraint) self.assertEqual(conv1d.bias_constraint, b_constraint) # Conv2D k_constraint = lambda x: x / math_ops.reduce_sum(x) b_constraint = lambda x: x / math_ops.reduce_max(x) conv2d = conv_layers.Conv2D(2, 3, kernel_constraint=k_constraint, bias_constraint=b_constraint) inputs = random_ops.random_uniform((5, 3, 3, 5), seed=1) conv2d(inputs) self.assertEqual(conv2d.kernel_constraint, k_constraint) self.assertEqual(conv2d.bias_constraint, b_constraint) # Conv3D k_constraint = lambda x: x / math_ops.reduce_sum(x) b_constraint = lambda x: x / math_ops.reduce_max(x) conv3d = conv_layers.Conv3D(2, 3, kernel_constraint=k_constraint, bias_constraint=b_constraint) inputs = random_ops.random_uniform((5, 3, 3, 3, 5), seed=1) conv3d(inputs) self.assertEqual(conv3d.kernel_constraint, k_constraint) self.assertEqual(conv3d.bias_constraint, b_constraint)
def confusion_matrix(predictions, labels, num_classes=None, dtype=dtypes.int32, name=None): """Computes the confusion matrix from predictions and labels. Calculate the Confusion Matrix for a pair of prediction and label 1-D int arrays. Considering a prediction array such as: `[1, 2, 3]` And a label array such as: `[2, 2, 3]` The confusion matrix returned would be the following one: [[0, 0, 0] [0, 1, 0] [0, 1, 0] [0, 0, 1]] Where the matrix rows represent the prediction labels and the columns represents the real labels. The confusion matrix is always a 2-D array of shape [n, n], where n is the number of valid labels for a given classification task. Both prediction and labels must be 1-D arrays of the same shape in order for this function to work. Args: predictions: A 1-D array represeting the predictions for a given classification. labels: A 1-D represeting the real labels for the classification task. num_classes: The possible number of labels the classification task can have. If this value is not provided, it will be calculated using both predictions and labels array. dtype: Data type of the confusion matrix. name: Scope name. Returns: A k X k matrix represeting the confusion matrix, where k is the number of possible labels in the classification task. Raises: ValueError: If both predictions and labels are not 1-D vectors and do not have the same size. """ with ops.name_scope(name, 'confusion_matrix', [predictions, labels, num_classes]) as name: predictions, labels = metric_ops_util.remove_squeezable_dimensions( ops.convert_to_tensor( predictions, name='predictions', dtype=dtypes.int64), ops.convert_to_tensor(labels, name='labels', dtype=dtypes.int64)) if num_classes is None: num_classes = math_ops.maximum(math_ops.reduce_max(predictions), math_ops.reduce_max(labels)) + 1 shape = array_ops.pack([num_classes, num_classes]) indices = array_ops.transpose(array_ops.pack([predictions, labels])) values = array_ops.ones_like(predictions, dtype) cm_sparse = ops.SparseTensor( indices=indices, values=values, shape=shape) zero_matrix = array_ops.zeros(math_ops.to_int32(shape), dtype) return sparse_ops.sparse_add(zero_matrix, cm_sparse)
def softmax(x, axis=-1): """The softmax activation function transforms the outputs so that all values are in range (0, 1) and sum to 1. It is often used as the activation for the last layer of a classification network because the result could be interpreted as a probability distribution. The softmax of x is calculated by exp(x)/tf.reduce_sum(exp(x)). Arguments: x : Input tensor. axis: Integer, axis along which the softmax normalization is applied. Returns: Tensor, output of softmax transformation (all values are non-negative and sum to 1). Raises: ValueError: In case `dim(x) == 1`. """ ndim = K.ndim(x) if ndim == 2: return nn.softmax(x) elif ndim > 2: e = math_ops.exp(x - math_ops.reduce_max(x, axis=axis, keepdims=True)) s = math_ops.reduce_sum(e, axis=axis, keepdims=True) return e / s else: raise ValueError('Cannot apply softmax to a tensor that is 1D. ' 'Received input: %s' % (x,))
def finalize(self, outputs, final_state, sequence_lengths): """Finalize and return the predicted_ids. Args: outputs: An instance of BeamSearchDecoderOutput. final_state: An instance of BeamSearchDecoderState. Passed through to the output. sequence_lengths: An `int64` tensor shaped `[batch_size, beam_width]`. The sequence lengths determined for each beam during decode. **NOTE** These are ignored; the updated sequence lengths are stored in `final_state.lengths`. Returns: outputs: An instance of `FinalBeamSearchDecoderOutput` where the predicted_ids are the result of calling _gather_tree. final_state: The same input instance of `BeamSearchDecoderState`. """ del sequence_lengths # Get max_sequence_length across all beams for each batch. max_sequence_lengths = math_ops.to_int32( math_ops.reduce_max(final_state.lengths, axis=1)) predicted_ids = beam_search_ops.gather_tree( outputs.predicted_ids, outputs.parent_ids, max_sequence_lengths=max_sequence_lengths, end_token=self._end_token) outputs = FinalBeamSearchDecoderOutput( beam_search_decoder_output=outputs, predicted_ids=predicted_ids) return outputs, final_state
def grow_tree_from_stats_summaries(stats_summary_list): """Updates ensemble based on the best gains from stats summaries.""" (node_ids_per_feature, gains_list, thresholds_list, left_node_contribs_list, right_node_contribs_list) = ( boosted_trees_ops.calculate_best_gains_per_feature( node_id_range=array_ops.stack([ math_ops.reduce_min(node_ids), math_ops.reduce_max(node_ids) ]), stats_summary_list=stats_summary_list, l1=tree_hparams.l1, l2=tree_hparams.l2, tree_complexity=tree_hparams.tree_complexity, max_splits=max_splits)) grow_op = boosted_trees_ops.update_ensemble( # Confirm if local_tree_ensemble or tree_ensemble should be used. tree_ensemble.resource_handle, feature_ids=math_ops.range(0, num_features, dtype=dtypes.int32), node_ids=node_ids_per_feature, gains=gains_list, thresholds=thresholds_list, left_node_contribs=left_node_contribs_list, right_node_contribs=right_node_contribs_list, learning_rate=tree_hparams.learning_rate, max_depth=tree_hparams.max_depth, pruning_mode=boosted_trees_ops.PruningMode.NO_PRUNING) return grow_op
def testDictionary(self): with ops.Graph().as_default() as G: with ops.device('/cpu:0'): x = array_ops.placeholder(dtypes.float32) pi = array_ops.placeholder(dtypes.int64) gi = array_ops.placeholder(dtypes.int64) v = 2. * (array_ops.zeros([128, 128]) + x) with ops.device(test.gpu_device_name()): stager = data_flow_ops.MapStagingArea( [dtypes.float32, dtypes.float32], shapes=[[], [128, 128]], names=['x', 'v']) stage = stager.put(pi, {'x': x, 'v': v}) key, ret = stager.get(gi) z = ret['x'] y = ret['v'] y = math_ops.reduce_max(z * math_ops.matmul(y, y)) G.finalize() with self.session(use_gpu=True, graph=G) as sess: sess.run(stage, feed_dict={x: -1, pi: 0}) for i in range(10): _, yval = sess.run([stage, y], feed_dict={x: i, pi: i + 1, gi: i}) self.assertAllClose( 4 * (i - 1) * (i - 1) * (i - 1) * 128, yval, rtol=1e-4)
def functional_rnn(cell, inputs, sequence_length=None, initial_state=None, dtype=None, time_major=False, scope=None, use_tpu=False): """Same interface as `tf.nn.dynamic_rnn`.""" with variable_scope.variable_scope(scope or 'rnn'): if not time_major: inputs = nest.map_structure( lambda t: array_ops.transpose(t, [1, 0, 2]), inputs) inputs_flat = nest.flatten(inputs) batch_size = array_ops.shape(inputs_flat[0])[1] if initial_state is None: initial_state = cell.zero_state(batch_size, dtype) func_cell = _FunctionalRnnCell(cell, inputs, initial_state) if sequence_length is not None: max_length = math_ops.reduce_max(sequence_length) else: max_length = None extended_acc_state, extended_final_state = recurrent.Recurrent( theta=func_cell.theta, state0=func_cell.extended_initial_state, inputs=inputs, cell_fn=func_cell.cell_step, max_input_length=max_length, use_tpu=use_tpu) tf_output, tf_state = _PostProcessOutput( extended_acc_state, extended_final_state, func_cell, inputs_flat[0].shape[0], sequence_length) if time_major: tf_output = array_ops.transpose(tf_output, [1, 0, 2]) return tf_output, tf_state
def dense_labels_to_sparse(dense, length): """Convert dense labels with sequence lengths to sparse tensor. Args: dense: tensor of shape [batch, max_length] length: int tensor of shape [batch] The length of each sequence in dense. Returns: tf.SparseTensor with values only for the valid elements of sequences. """ flat_values = array_ops.reshape(dense, [-1]) flat_indices = math_ops.range( array_ops.shape(flat_values, out_type=dtypes.int64)[0]) mask = array_ops.sequence_mask(length, maxlen=array_ops.shape(dense)[1]) flat_mask = array_ops.reshape(mask, [-1]) indices = array_ops.expand_dims( array_ops.boolean_mask(flat_indices, flat_mask), 1) values = array_ops.boolean_mask(flat_values, flat_mask) sparse = sparse_tensor.SparseTensor( indices=indices, values=math_ops.cast(values, dtypes.int32), dense_shape=array_ops.shape(flat_values, out_type=dtypes.int64)) reshaped = sparse_ops.sparse_reshape(sparse, array_ops.shape(dense)) max_length = math_ops.reduce_max(length) return sparse_tensor.SparseTensor( indices=reshaped.indices, values=reshaped.values, dense_shape=[ math_ops.cast(reshaped.dense_shape[0], dtypes.int64), math_ops.cast(max_length, dtypes.int64)])
def __call__(self, inputs, state, scope=None): """Build the CrfDecodeForwardRnnCell. Args: inputs: A [batch_size, num_tags] matrix of unary potentials. state: A [batch_size, num_tags] matrix containing the previous step's score values. scope: Unused variable scope of this cell. Returns: backpointers: A [batch_size, num_tags] matrix of backpointers. new_state: A [batch_size, num_tags] matrix of new score values. """ # For simplicity, in shape comments, denote: # 'batch_size' by 'B', 'max_seq_len' by 'T' , 'num_tags' by 'O' (output). state = array_ops.expand_dims(state, 2) # [B, O, 1] # This addition op broadcasts self._transitions_params along the zeroth # dimension and state along the second dimension. # [B, O, 1] + [1, O, O] -> [B, O, O] transition_scores = state + self._transition_params # [B, O, O] new_state = inputs + math_ops.reduce_max(transition_scores, [1]) # [B, O] backpointers = math_ops.argmax(transition_scores, 1) backpointers = math_ops.cast(backpointers, dtype=dtypes.int32) # [B, O] return backpointers, new_state
def collapse_repeated(labels, seq_length, name=None): """Merge repeated labels into single labels. Args: labels: Tensor of shape [batch, max value in seq_length] seq_length: Tensor of shape [batch], sequence length of each batch element. name: A name for this `Op`. Defaults to "collapse_repeated_labels". Returns: A tuple `(collapsed_labels, new_seq_length)` where collapsed_labels: Tensor of shape [batch, max_seq_length] with repeated labels collapsed and padded to max_seq_length, eg: `[[A, A, B, B, A], [A, B, C, D, E]] => [[A, B, A, 0, 0], [A, B, C, D, E]]` new_seq_length: int tensor of shape [batch] with new sequence lengths. """ with ops.name_scope(name, "collapse_repeated_labels", [labels, seq_length]): labels = ops.convert_to_tensor(labels, name="labels") seq_length = ops.convert_to_tensor(seq_length, name="seq_length") # Mask labels that don't equal previous label. label_mask = array_ops.concat([ array_ops.ones_like(labels[:, :1], dtypes.bool), math_ops.not_equal(labels[:, 1:], labels[:, :-1]) ], axis=1) # Filter labels that aren't in the original sequence. maxlen = _get_dim(labels, 1) seq_mask = array_ops.sequence_mask(seq_length, maxlen=maxlen) label_mask = math_ops.logical_and(label_mask, seq_mask) # Count masks for new sequence lengths. new_seq_len = math_ops.reduce_sum( math_ops.cast(label_mask, dtypes.int32), axis=1) # Mask indexes based on sequence length mask. new_maxlen = math_ops.reduce_max(new_seq_len) idx_mask = array_ops.sequence_mask(new_seq_len, maxlen=new_maxlen) # Flatten everything and mask out labels to keep and sparse indices. flat_labels = array_ops.reshape(labels, [-1]) flat_label_mask = array_ops.reshape(label_mask, [-1]) flat_idx_mask = array_ops.reshape(idx_mask, [-1]) idx = math_ops.range(_get_dim(flat_idx_mask, 0)) # Scatter to flat shape. flat = array_ops.scatter_nd( indices=array_ops.expand_dims( array_ops.boolean_mask(idx, flat_idx_mask), axis=1), updates=array_ops.boolean_mask(flat_labels, flat_label_mask), shape=array_ops.shape(flat_idx_mask)) # Reshape back to square batch. batch_size = _get_dim(labels, 0) new_shape = [batch_size, new_maxlen] return (array_ops.reshape(flat, new_shape), math_ops.cast(new_seq_len, seq_length.dtype))
def _call_cell(self, inputs, initial_cell_state=None, initial_output=None, dtype=None, sequence_length=None): """Run this LSTM on inputs, starting from the given state. Args: inputs: `3-D` tensor with shape `[time_len, batch_size, input_size]` initial_cell_state: initial value for cell state, shape `[batch_size, self._num_units]` initial_output: initial value of cell output, shape `[batch_size, self._num_units]` dtype: The data type for the initial state and expected output. sequence_length: Specifies the length of each sequence in inputs. An `int32` or `int64` vector (tensor) size `[batch_size]`, values in `[0, time_len)` or None. Returns: A pair containing: - Cell state (cs): A `3-D` tensor of shape `[time_len, batch_size, output_size]` - Output (h): A `3-D` tensor of shape `[time_len, batch_size, output_size]` """ inputs_shape = inputs.get_shape().with_rank(3) time_len = inputs_shape.dims[0].value if time_len is None: time_len = array_ops.shape(inputs)[0] if self._use_peephole: wci = self._w_i_diag wco = self._w_o_diag wcf = self._w_f_diag else: wci = wcf = wco = array_ops.zeros([self._num_units], dtype=dtype) if sequence_length is None: max_seq_len = math_ops.cast(time_len, dtypes.int64) else: max_seq_len = math_ops.cast(math_ops.reduce_max(sequence_length), dtypes.int64) _, cs, _, _, _, _, h = gen_lstm_ops.block_lstm( seq_len_max=max_seq_len, x=inputs, cs_prev=initial_cell_state, h_prev=initial_output, w=self._kernel, wci=wci, wcf=wcf, wco=wco, b=self._bias, forget_bias=self._forget_bias, cell_clip=self._cell_clip, use_peephole=self._use_peephole) return cs, h
def _call_cell(self, inputs, initial_cell_state, initial_output, dtype, sequence_length): """Run this LSTM on inputs, starting from the given state. Args: inputs: `3-D` tensor with shape `[time_len x batch_size x input_size]` initial_cell_state: initial value for cell state, shape `[batch_size, self._num_units]` initial_output: initial value of cell output, shape `[batch_size, self._num_units]` dtype: The data type for the initial state and expected output. sequence_length: Specifies the length of each sequence in inputs. An int32 or int64 vector (tensor) size [batch_size], values in [0, time_len) or None. Returns: A pair containing: - Cell state (cs): A `3-D` tensor of shape `[time_len x batch_size x output_size]` - Output (h): A `3-D` tensor of shape `[time_len x batch_size x output_size]` """ inputs_shape = inputs.get_shape().with_rank(3) time_len = inputs_shape[0].value if time_len is None: time_len = array_ops.shape(inputs)[0] input_size = inputs_shape[2].value w = vs.get_variable( "W_0", [input_size + self._num_units, self._num_units * 4], dtype=dtype) b = vs.get_variable( "B", [w.get_shape().with_rank(2)[1]], initializer=init_ops.constant_initializer(0.0), dtype=dtype) if self._use_peephole: wci = vs.get_variable("W_I_diag", [self._num_units], dtype=dtype) wco = vs.get_variable("W_O_diag", [self._num_units], dtype=dtype) wcf = vs.get_variable("W_F_diag", [self._num_units], dtype=dtype) else: wci = wco = wcf = array_ops.zeros([self._num_units], dtype=dtype) if sequence_length is None: max_seq_len = time_len else: max_seq_len = math_ops.to_int64(math_ops.reduce_max(sequence_length)) _, cs, _, _, _, _, h = _lstm_ops_so.block_lstm( seq_len_max=max_seq_len, x=inputs, cs_prev=initial_cell_state, h_prev=initial_output, w=w, wci=wci, wco=wco, wcf=wcf, b=b, forget_bias=self._forget_bias, cell_clip=self._cell_clip, use_peephole=self._use_peephole) return cs, h
def gather_tree_from_array(t, parent_ids, sequence_length): """Calculates the full beams for `TensorArray`s. Args: t: A stacked `TensorArray` of size `max_time` that contains `Tensor`s of shape `[batch_size, beam_width, s]` or `[batch_size * beam_width, s]` where `s` is the depth shape. parent_ids: The parent ids of shape `[max_time, batch_size, beam_width]`. sequence_length: The sequence length of shape `[batch_size, beam_width]`. Returns: A `Tensor` which is a stacked `TensorArray` of the same size and type as `t` and where beams are sorted in each `Tensor` according to `parent_ids`. """ max_time = parent_ids.shape[0].value or array_ops.shape(parent_ids)[0] batch_size = parent_ids.shape[1].value or array_ops.shape(parent_ids)[1] beam_width = parent_ids.shape[2].value or array_ops.shape(parent_ids)[2] # Generate beam ids that will be reordered by gather_tree. beam_ids = array_ops.expand_dims( array_ops.expand_dims(math_ops.range(beam_width), 0), 0) beam_ids = array_ops.tile(beam_ids, [max_time, batch_size, 1]) mask = array_ops.sequence_mask( sequence_length, maxlen=max_time, dtype=dtypes.int32) mask = array_ops.transpose(mask, perm=[2, 0, 1]) # Use beam_width + 1 to mark the end of beam. masked_beam_ids = (beam_ids * mask) + (1 - mask) * (beam_width + 1) max_sequence_lengths = math_ops.to_int32( math_ops.reduce_max(sequence_length, axis=1)) sorted_beam_ids = beam_search_ops.gather_tree( step_ids=masked_beam_ids, parent_ids=parent_ids, max_sequence_lengths=max_sequence_lengths, end_token=beam_width + 1) # For out of range steps, simply copy the same beam. sorted_beam_ids = array_ops.where( math_ops.cast(mask, dtypes.bool), x=sorted_beam_ids, y=beam_ids) # Generate indices for gather_nd. time_ind = array_ops.tile(array_ops.reshape( math_ops.range(max_time), [-1, 1, 1]), [1, batch_size, beam_width]) batch_ind = array_ops.tile(array_ops.reshape( math_ops.range(batch_size), [-1, 1, 1]), [1, max_time, beam_width]) batch_ind = array_ops.transpose(batch_ind, perm=[1, 0, 2]) indices = array_ops.stack([time_ind, batch_ind, sorted_beam_ids], -1) # Gather from a tensor with collapsed additional dimensions. gather_from = t final_shape = array_ops.shape(gather_from) gather_from = array_ops.reshape( gather_from, [max_time, batch_size, beam_width, -1]) ordered = array_ops.gather_nd(gather_from, indices) ordered = array_ops.reshape(ordered, final_shape) return ordered
def _show_max_abs(tensor): tensor = math_ops.cast(tensor, dtypes.float32) output_tensor = math_ops.reduce_max(math_ops.abs(tensor)) zero = constant_op.constant(0, dtypes.float32) output_tensor = gen_math_ops.maximum(zero, output_tensor) # The shape has to be 1. Set it if it does not have the information. output_tensor = array_ops.reshape(output_tensor, [1]) return output_tensor
def testGradient4(self): s = [2, 3, 4, 2] x = np.arange(1.0, 49.0).reshape(s).astype(np.float64) with self.test_session(): t = ops.convert_to_tensor(x) su = math_ops.reduce_max(t) jacob_t, jacob_n = gradient_checker.compute_gradient( t, s, su, [1], x_init_value=x, delta=1) self.assertAllClose(jacob_t, jacob_n, rtol=1e-8, atol=1e-8)
def compute_best_f1_score(tp, fp, fn, name): precision_at_t = math_ops.div(tp, epsilon + tp + fp, name='precision_' + name) recall_at_t = math_ops.div(tp, epsilon + tp + fn, name='recall_' + name) # Compute F1 score. f1_at_thresholds = ( 2.0 * precision_at_t * recall_at_t / (precision_at_t + recall_at_t + epsilon)) return math_ops.reduce_max(f1_at_thresholds)
def _compare_cdf(self, values): abs_values = math_ops.abs(values) max_value = math_ops.reduce_max(abs_values) with self.cached_session(): variables.global_variables_initializer().run() cdf_from_histogram = pruning_utils.compute_cdf_from_histogram( abs_values, [0.0, max_value], nbins=pruning_utils._NBINS) cdf = pruning_utils.compute_cdf(abs_values, [0.0, max_value]) self.assertAllEqual(cdf.eval(), cdf_from_histogram.eval())
def compute_cdf(values, value_range, **kwargs): """Returns the normalized cumulative distribution of the given values tensor. Uses tf.while_loop to directly compute the cdf of the values. Number of bins for histogram is fixed at _NBINS=255 Args: values: Numeric `Tensor`. value_range: Shape [2] `Tensor` of same `dtype` as `values` **kwargs: keyword arguments: name Returns: A 1-D `Tensor` holding normalized cdf of values. """ nbins = _NBINS name = kwargs.get('name', None) with ops.name_scope(name, 'cdf', [values, value_range, nbins]): values = ops.convert_to_tensor(values, name='values') value_range = ops.convert_to_tensor(value_range, name='value_range') nbins_float = np.float32(nbins) # Map tensor values that fall within value_range to [0, 1]. scaled_values = math_ops.truediv( values - value_range[0], value_range[1] - value_range[0], name='scaled_values') # map tensor values within the open interval value_range to {0,.., nbins-1}, # values outside the open interval will be zero or less, or nbins or more. indices = math_ops.floor(nbins_float * scaled_values, name='indices') # Clip edge cases (e.g. value = value_range[1]) or "outliers." indices = math_ops.cast( clip_ops.clip_by_value(indices, 0, nbins_float - 1), dtypes.int32) cdf = array_ops.zeros(nbins) i = constant_op.constant(0) def loop_cond(loop_count, _): return math_ops.less(loop_count, nbins) def loop_body(loop_count, cdf): temp = math_ops.reduce_sum( math_ops.cast( math_ops.less_equal(indices, loop_count), dtypes.float32)) cdf = math_ops.add( cdf, array_ops.one_hot( loop_count, depth=_NBINS, on_value=temp, off_value=0.0)) return [loop_count + 1, cdf] _, cdf = control_flow_ops.while_loop( loop_cond, loop_body, [i, cdf], maximum_iterations=nbins) return math_ops.div(cdf, math_ops.reduce_max(cdf))
def testConstraints(self): g_constraint = lambda x: x / math_ops.reduce_sum(x) b_constraint = lambda x: x / math_ops.reduce_max(x) bn = normalization_layers.BatchNormalization(axis=1, gamma_constraint=g_constraint, beta_constraint=b_constraint) inputs = random_ops.random_uniform((5, 4, 3), seed=1) bn(inputs) self.assertEqual(bn.gamma_constraint, g_constraint) self.assertEqual(bn.beta_constraint, b_constraint)
def testConstraints(self): k_constraint = lambda x: x / math_ops.reduce_sum(x) b_constraint = lambda x: x / math_ops.reduce_max(x) dense = core_layers.Dense(2, kernel_constraint=k_constraint, bias_constraint=b_constraint) inputs = random_ops.random_uniform((5, 3), seed=1) dense(inputs) self.assertEqual(dense.kernel_constraint, k_constraint) self.assertEqual(dense.bias_constraint, b_constraint)
def testConstraints(self): k_constraint = lambda x: x / math_ops.reduce_sum(x) b_constraint = lambda x: x / math_ops.reduce_max(x) layer = conv_layers.Conv2DTranspose(2, 3, kernel_constraint=k_constraint, bias_constraint=b_constraint) inputs = random_ops.random_uniform((5, 3, 3, 5), seed=1) layer(inputs) self.assertEqual(layer.kernel_constraint, k_constraint) self.assertEqual(layer.bias_constraint, b_constraint)
def sparse_categorical_accuracy(y_true, y_pred): y_true = math_ops.reduce_max(y_true, axis=-1) y_pred = math_ops.argmax(y_pred, axis=-1) # If the expected labels are float, we need to cast the int returned by # argmax to compare. if K.dtype(y_true) == K.floatx(): y_pred = math_ops.cast(y_pred, K.floatx()) return math_ops.cast(math_ops.equal(y_true, y_pred), K.floatx())
def advanced_softmax(logits, mask=None): """ Computes softmax function manually. Avoids numeric overflow. Args: logits: A Tensor. The softmax will apply on the last dimension of it. mask: A Tensor with the same shape as `logits`. Returns: The softmax results. """ num_shapes = logits.get_shape().ndims if mask is not None: scores_exp = math_ops.exp(logits - math_ops.reduce_max(logits, axis=num_shapes - 1, keepdims=True)) * mask else: scores_exp = math_ops.exp(logits - math_ops.reduce_max(logits, axis=num_shapes - 1, keepdims=True)) scores_sum = math_ops.reduce_sum(scores_exp, axis=num_shapes - 1, keepdims=True) x_sm = scores_exp / scores_sum return x_sm
def crf_decode(potentials, transition_params, sequence_length): """Decode the highest scoring sequence of tags in TensorFlow. This is a function for tensor. Args: potentials: A [batch_size, max_seq_len, num_tags] tensor of unary potentials. transition_params: A [num_tags, num_tags] matrix of binary potentials. sequence_length: A [batch_size] vector of true sequence lengths. Returns: decode_tags: A [batch_size, max_seq_len] matrix, with dtype `tf.int32`. Contains the highest scoring tag indicies. best_score: A [batch_size] vector, containing the score of `decode_tags`. """ # For simplicity, in shape comments, denote: # 'batch_size' by 'B', 'max_seq_len' by 'T' , 'num_tags' by 'O' (output). num_tags = potentials.get_shape()[2].value # Computes forward decoding. Get last score and backpointers. crf_fwd_cell = CrfDecodeForwardRnnCell(transition_params) initial_state = array_ops.slice(potentials, [0, 0, 0], [-1, 1, -1]) initial_state = array_ops.squeeze(initial_state, axis=[1]) # [B, O] inputs = array_ops.slice(potentials, [0, 1, 0], [-1, -1, -1]) # [B, T-1, O] backpointers, last_score = rnn.dynamic_rnn( crf_fwd_cell, inputs=inputs, sequence_length=sequence_length - 1, initial_state=initial_state, time_major=False, dtype=dtypes.int32) # [B, T - 1, O], [B, O] backpointers = gen_array_ops.reverse_sequence( backpointers, sequence_length - 1, seq_dim=1) # [B, T-1, O] # Computes backward decoding. Extract tag indices from backpointers. crf_bwd_cell = CrfDecodeBackwardRnnCell(num_tags) initial_state = math_ops.cast(math_ops.argmax(last_score, axis=1), dtype=dtypes.int32) # [B] initial_state = array_ops.expand_dims(initial_state, axis=-1) # [B, 1] decode_tags, _ = rnn.dynamic_rnn( crf_bwd_cell, inputs=backpointers, sequence_length=sequence_length - 1, initial_state=initial_state, time_major=False, dtype=dtypes.int32) # [B, T - 1, 1] decode_tags = array_ops.squeeze(decode_tags, axis=[2]) # [B, T - 1] decode_tags = array_ops.concat([initial_state, decode_tags], axis=1) # [B, T] decode_tags = gen_array_ops.reverse_sequence( decode_tags, sequence_length, seq_dim=1) # [B, T] best_score = math_ops.reduce_max(last_score, axis=1) # [B] return decode_tags, best_score
def my_rnn(alphabetEnc, cell, inputs, initial_state=None, dtype=None, sequence_length=None, scope=None): if not isinstance(cell, rnn_cell.RNNCell): raise TypeError("cell must be an instance of RNNCell") if not isinstance(inputs, list): raise TypeError("inputs must be a list") if not inputs: raise ValueError("inputs must not be empty") outputs = [] with vs.variable_scope(scope or "RNN"): fixed_batch_size = inputs[0].get_shape().with_rank_at_least(1)[0] if fixed_batch_size.value: batch_size = fixed_batch_size.value else: batch_size = array_ops.shape(inputs[0])[0] if initial_state is not None: state = initial_state else: if not dtype: raise ValueError("If no initial_state is provided, dtype must be.") state = cell.zero_state(batch_size, dtype) if sequence_length is not None: sequence_length = math_ops.to_int32(sequence_length) if sequence_length: # Prepare variables zero_output = array_ops.zeros( array_ops.pack([batch_size, cell.output_size]), inputs[0].dtype) zero_output.set_shape( tensor_shape.TensorShape([fixed_batch_size.value, cell.output_size])) min_sequence_length = math_ops.reduce_min(sequence_length) max_sequence_length = math_ops.reduce_max(sequence_length) for time, input_ in enumerate(inputs): if time > 0: vs.get_variable_scope().reuse_variables() # pylint: disable=cell-var-from-loop call_cell = lambda: cell([ input_ , alphabetEnc[time] ], state) # pylint: enable=cell-var-from-loop if sequence_length: (output, state) = _rnn_step( time, sequence_length, min_sequence_length, max_sequence_length, zero_output, state, call_cell) else: (output, state) = call_cell() outputs.append(output) return (outputs, state)
def testLargeFeed(self): server = self._cached_server with session.Session(server.target, config=self._useRPCConfig()) as sess: feed_val = np.empty([10000, 3000], dtype=np.float32) feed_val.fill(0.5) p = array_ops.placeholder(dtypes.float32, shape=[10000, 3000]) min_t = math_ops.reduce_min(p) max_t = math_ops.reduce_max(p) min_val, max_val = sess.run([min_t, max_t], feed_dict={p: feed_val}) self.assertEqual(0.5, min_val) self.assertEqual(0.5, max_val)
def _numerically_stable_global_norm(tensor_list): """Compute the global norm of a list of Tensors, with improved stability. The global norm computation sometimes overflows due to the intermediate L2 step. To avoid this, we divide by a cheap-to-compute max over the matrix elements. Args: tensor_list: A list of tensors, or `None`. Returns: A scalar tensor with the global norm. """ if np.all([x is None for x in tensor_list]): return 0.0 list_max = math_ops.reduce_max([math_ops.reduce_max(math_ops.abs(x)) for x in tensor_list if x is not None]) return list_max * clip_ops.global_norm([x / list_max for x in tensor_list if x is not None])
def _max_condition_number_to_be_non_singular(self): """Return the maximum condition number that we consider nonsingular.""" with ops.name_scope("max_nonsingular_condition_number"): dtype_eps = np.finfo(self.dtype.as_numpy_dtype).eps eps = math_ops.cast( math_ops.reduce_max([ 100., math_ops.cast(self.range_dimension_tensor(), self.dtype), math_ops.cast(self.domain_dimension_tensor(), self.dtype) ]), self.dtype) * dtype_eps return 1. / eps
def _calculate_acceptance_probabilities(init_probs, target_probs): """Calculate the per-class acceptance rates. Args: init_probs: The class probabilities of the data. target_probs: The desired class proportion in minibatches. Returns: A list of the per-class acceptance probabilities. This method is based on solving the following analysis: Let F be the probability of a rejection (on any example). Let p_i be the proportion of examples in the data in class i (init_probs) Let a_i is the rate the rejection sampler should *accept* class i Let t_i is the target proportion in the minibatches for class i (target_probs) ``` F = sum_i(p_i * (1-a_i)) = 1 - sum_i(p_i * a_i) using sum_i(p_i) = 1 ``` An example with class `i` will be accepted if `k` rejections occur, then an example with class `i` is seen by the rejector, and it is accepted. This can be written as follows: ``` t_i = sum_k=0^inf(F^k * p_i * a_i) = p_i * a_j / (1 - F) using geometric series identity, since 0 <= F < 1 = p_i * a_i / sum_j(p_j * a_j) using F from above ``` Note that the following constraints hold: ``` 0 <= p_i <= 1, sum_i(p_i) = 1 0 <= a_i <= 1 0 <= t_i <= 1, sum_i(t_i) = 1 ``` A solution for a_i in terms of the other variabes is the following: ```a_i = (t_i / p_i) / max_i[t_i / p_i]``` """ # Make list of t_i / p_i. ratio_l = target_probs / init_probs # Replace NaNs with 0s. ratio_l = math_ops.select(math_ops.is_nan(ratio_l), array_ops.zeros_like(ratio_l), ratio_l) # Calculate list of acceptance probabilities. max_ratio = math_ops.reduce_max(ratio_l) return ratio_l / max_ratio
def testConstraints(self): d_constraint = lambda x: x / math_ops.reduce_sum(x) p_constraint = lambda x: x / math_ops.reduce_sum(x) b_constraint = lambda x: x / math_ops.reduce_max(x) layer = conv_layers.SeparableConv2D(2, 3, depthwise_constraint=d_constraint, pointwise_constraint=p_constraint, bias_constraint=b_constraint) inputs = random_ops.random_uniform((5, 3, 3, 5), seed=1) layer(inputs) self.assertEqual(layer.depthwise_constraint, d_constraint) self.assertEqual(layer.pointwise_constraint, p_constraint) self.assertEqual(layer.bias_constraint, b_constraint)
def matrix_exponential(input, name=None): # pylint: disable=redefined-builtin r"""Computes the matrix exponential of one or more square matrices. exp(A) = \sum_{n=0}^\infty A^n/n! The exponential is computed using a combination of the scaling and squaring method and the Pade approximation. Details can be found in: Nicholas J. Higham, "The scaling and squaring method for the matrix exponential revisited," SIAM J. Matrix Anal. Applic., 26:1179-1193, 2005. The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions form square matrices. The output is a tensor of the same shape as the input containing the exponential for all input submatrices `[..., :, :]`. Args: input: A `Tensor`. Must be `float16`, `float32`, `float64`, `complex64`, or `complex128` with shape `[..., M, M]`. name: A name to give this `Op` (optional). Returns: the matrix exponential of the input. Raises: ValueError: An unsupported type is provided as input. @compatibility(scipy) Equivalent to scipy.linalg.expm @end_compatibility """ with ops.name_scope(name, 'matrix_exponential', [input]): matrix = ops.convert_to_tensor(input, name='input') if matrix.shape[-2:] == [0, 0]: return matrix batch_shape = matrix.shape[:-2] if not batch_shape.is_fully_defined(): batch_shape = array_ops.shape(matrix)[:-2] # reshaping the batch makes the where statements work better matrix = array_ops.reshape( matrix, array_ops.concat(([-1], array_ops.shape(matrix)[-2:]), axis=0)) l1_norm = math_ops.reduce_max(math_ops.reduce_sum( math_ops.abs(matrix), axis=array_ops.size(array_ops.shape(matrix)) - 2), axis=-1) const = lambda x: constant_op.constant(x, l1_norm.dtype) def _nest_where(vals, cases): assert len(vals) == len(cases) - 1 if len(vals) == 1: return array_ops.where(math_ops.less(l1_norm, const(vals[0])), cases[0], cases[1]) else: return array_ops.where(math_ops.less(l1_norm, const(vals[0])), cases[0], _nest_where(vals[1:], cases[1:])) if matrix.dtype in [dtypes.float16, dtypes.float32, dtypes.complex64]: maxnorm = const(3.925724783138660) squarings = math_ops.maximum( math_ops.floor( math_ops.log(l1_norm / maxnorm) / math_ops.log(const(2.0))), 0) u3, v3 = _matrix_exp_pade3(matrix) u5, v5 = _matrix_exp_pade5(matrix) u7, v7 = _matrix_exp_pade7(matrix / math_ops.pow( constant_op.constant(2.0, dtype=matrix.dtype), math_ops.cast(squarings, matrix.dtype))[..., array_ops.newaxis, array_ops.newaxis]) conds = (4.258730016922831e-001, 1.880152677804762e+000) u = _nest_where(conds, (u3, u5, u7)) v = _nest_where(conds, (v3, v5, v7)) elif matrix.dtype in [dtypes.float64, dtypes.complex128]: maxnorm = const(5.371920351148152) squarings = math_ops.maximum( math_ops.floor( math_ops.log(l1_norm / maxnorm) / math_ops.log(const(2.0))), 0) u3, v3 = _matrix_exp_pade3(matrix) u5, v5 = _matrix_exp_pade5(matrix) u7, v7 = _matrix_exp_pade7(matrix) u9, v9 = _matrix_exp_pade9(matrix) u13, v13 = _matrix_exp_pade13(matrix / math_ops.pow( constant_op.constant(2.0, dtype=matrix.dtype), math_ops.cast(squarings, matrix.dtype))[..., array_ops.newaxis, array_ops.newaxis]) conds = (1.495585217958292e-002, 2.539398330063230e-001, 9.504178996162932e-001, 2.097847961257068e+000) u = _nest_where(conds, (u3, u5, u7, u9, u13)) v = _nest_where(conds, (v3, v5, v7, v9, v13)) else: raise ValueError( 'tf.linalg.expm does not support matrices of type %s' % matrix.dtype) numer = u + v denom = -u + v result = linalg_ops.matrix_solve(denom, numer) max_squarings = math_ops.reduce_max(squarings) i = const(0.0) c = lambda i, r: math_ops.less(i, max_squarings) def b(i, r): return i + 1, array_ops.where(math_ops.less(i, squarings), math_ops.matmul(r, r), r) _, result = control_flow_ops.while_loop(c, b, [i, result]) if not matrix.shape.is_fully_defined(): return array_ops.reshape( result, array_ops.concat((batch_shape, array_ops.shape(result)[-2:]), axis=0)) return array_ops.reshape(result, batch_shape.concatenate(result.shape[-2:]))
def ragged_reduce_aggregate(reduce_op, unsorted_segment_op, rt_input, axis, keepdims, separator=None, name=None): """Aggregates across axes of a RaggedTensor using the given `Tensor` ops. Reduces `rt_input` along the dimensions given in `axis`. The rank of the tensor is reduced by 1 for each entry in `axis`. If `axis` is not specified, then all dimensions are reduced, and a scalar value is returned. This op assumes that `reduce_op` and `unsorted_segment_op` are associative; if not, then reducing multiple axes will return incorrect results. (In particular, reducing multiple axes is currently implemented by reducing the axes one at a time.) Args: reduce_op: The tensorflow `op` that should be used to reduce values in uniform dimensions. Must have the same signature and basic behavior as `reduce_sum`, `reduce_max`, etc. unsorted_segment_op: The tensorflow `op` that should be used to combine values in ragged dimensions. Must have the same signature and basic behavior as `unsorted_segment_sum`, `unsorted_segment_max`, etc. rt_input: A `Tensor` or `RaggedTensor` containing the values to be reduced. axis: The axis or axes to reduce. May be `None` (to reduce all axes), an `int` (to reduce a single axis), a `list` or `tuple` of `int` (to reduce a given set of axes), or a `Tensor` with a constant value. Must be in the range `[0, rt_input.rank)`. keepdims: If true, retains reduced dimensions with length 1. separator: An optional string. Defaults to None. The separator to use when joining. The separator must not be set for non-string data types. (i.e. if separator is not None then it uses string ops) name: A name prefix for the returned tensor (optional). Returns: A `RaggedTensor` containing the reduced values. The returned tensor has the same dtype as `data`, and its shape is given by removing the dimensions specified in `axis` from `rt_input.shape`. The `ragged_rank` of the returned tensor is given by substracting any ragged dimensions specified in `axis` from `rt_input.ragged_rank`. Raises: ValueError: If `axis` contains a `Tensor` whose value is not constant. """ if not ragged_tensor.is_ragged(rt_input): if separator is None: return reduce_op(rt_input, axis, keepdims=keepdims, name=name) else: # When separator is not None, We infer that dtype is string and # reduce_join will be called. return reduce_op( rt_input, axis, keepdims=keepdims, name=name, separator=separator) if isinstance(axis, ops.Tensor): axis = tensor_util.constant_value(axis) if axis is None: raise ValueError('axis must be known at graph construction time.') if isinstance(axis, np.ndarray): axis = axis.tolist() # When reducing all axes, just ignore splits & reduce the inner values. if axis is None: result = reduce_op(rt_input.flat_values, None, keepdims=keepdims, name=name) if keepdims: # Expand the result to the input number of dimensions. for _ in rt_input.shape[1:]: result = array_ops.expand_dims(result, axis=0) return result with ops.name_scope(name, 'RaggedReduce', [rt_input, axis]): if isinstance(axis, (tuple, list)): if not axis: return rt_input elif len(axis) == 1: axis = axis[0] else: # When reducing multiple axes, as we reduce one at a time (see below), # the negative axis has to be converted to positive at the first run # as the sort with negative axis will have different orders. # See GitHub issue 27497. axis = [ array_ops.get_positive_axis(a, rt_input.shape.ndims, 'axis[%s]' % i, 'rank(input_tensor)') for i, a in enumerate(axis) ] # When reducing multiple axes, just reduce one at a time. This is less # efficient, and only works for associative ops. (In particular, it # does not work for reduce_mean.) However, reducing multiple axes at # once will probably require a nontrivial c++ op. axis = sorted(axis) inner_reduced = ragged_reduce_aggregate(reduce_op, unsorted_segment_op, rt_input, axis[-1], keepdims, separator) return ragged_reduce_aggregate(reduce_op, unsorted_segment_op, inner_reduced, axis[:-1], keepdims, separator) rt_input = ragged_tensor.convert_to_tensor_or_ragged_tensor( rt_input, name='rt_input') axis = array_ops.get_positive_axis( axis, rt_input.shape.ndims, ndims_name='rank(input_tensor)') if axis == 0: # out[i_1, i_2, ..., i_N] = sum_{j} rt_input[j, i_1, i_2, ..., i_N] row_lengths = rt_input.row_splits[1:] - rt_input.row_splits[:-1] num_segments = math_ops.maximum(math_ops.reduce_max(row_lengths), 0) segment_ids = range(row_lengths).values result = _ragged_segment_aggregate(unsorted_segment_op, rt_input.values, segment_ids, num_segments, separator) if keepdims: result = array_ops.expand_dims(result, axis=0) return result elif axis == 1: # out[i_0, i_1, i_2, ..., i_N] = sum_{j} rt_input[i_0, j, i_2, ..., i_N] num_segments = array_ops.shape(rt_input.row_splits)[0] - 1 segment_ids = segment_id_ops.row_splits_to_segment_ids( rt_input.row_splits) result = _ragged_segment_aggregate(unsorted_segment_op, rt_input.values, segment_ids, num_segments, separator) if keepdims: result = array_ops.expand_dims(result, axis=1) return result else: # out[i_0, ..., i_[axis-1], i_axis+1], ..., i_N] = # sum_{j} rt_input [i_0, ..., i_[axis-1], j, i_axis+1], ..., i_N] return rt_input.with_values( ragged_reduce_aggregate(reduce_op, unsorted_segment_op, rt_input.values, axis - 1, keepdims, separator))
def _iter_condition(i, mat_m, _): return math_ops.logical_and( i < iter_count, math_ops.reduce_max(math_ops.abs(mat_m - identity)) > epsilon)
def _calculate_acceptance_probs_with_mixing(initial_probs, target_probs): """Calculates the acceptance probabilities and mixing ratio. In this case, we assume that we can *either* sample from the original data distribution with probability `m`, or sample from a reshaped distribution that comes from rejection sampling on the original distribution. This rejection sampling is done on a per-class basis, with `a_i` representing the probability of accepting data from class `i`. This method is based on solving the following analysis for the reshaped distribution: Let F be the probability of a rejection (on any example). Let p_i be the proportion of examples in the data in class i (init_probs) Let a_i is the rate the rejection sampler should *accept* class i Let t_i is the target proportion in the minibatches for class i (target_probs) ``` F = sum_i(p_i * (1-a_i)) = 1 - sum_i(p_i * a_i) using sum_i(p_i) = 1 ``` An example with class `i` will be accepted if `k` rejections occur, then an example with class `i` is seen by the rejector, and it is accepted. This can be written as follows: ``` t_i = sum_k=0^inf(F^k * p_i * a_i) = p_i * a_j / (1 - F) using geometric series identity, since 0 <= F < 1 = p_i * a_i / sum_j(p_j * a_j) using F from above ``` Note that the following constraints hold: ``` 0 <= p_i <= 1, sum_i(p_i) = 1 0 <= a_i <= 1 0 <= t_i <= 1, sum_i(t_i) = 1 ``` A solution for a_i in terms of the other variables is the following: ```a_i = (t_i / p_i) / max_i[t_i / p_i]``` If we try to minimize the amount of data rejected, we get the following: M_max = max_i [ t_i / p_i ] M_min = min_i [ t_i / p_i ] The desired probability of accepting data if it comes from class `i`: a_i = (t_i/p_i - m) / (M_max - m) The desired probability of pulling a data element from the original dataset, rather than the filtered one: m = M_min Args: initial_probs: A Tensor of the initial probability distribution, given or estimated. target_probs: A Tensor of the corresponding classes. Returns: (A 1D Tensor with the per-class acceptance probabilities, the desired probability of pull from the original distribution.) """ ratio_l = _get_target_to_initial_ratio(initial_probs, target_probs) max_ratio = math_ops.reduce_max(ratio_l) min_ratio = math_ops.reduce_min(ratio_l) # Target prob to sample from original distribution. m = min_ratio # TODO(joelshor): Simplify fraction, if possible. a_i = (ratio_l - m) / (max_ratio - m) return a_i, m
def _single_seq_fn(): squeezed_potentials = array_ops.squeeze(potentials, [1]) decode_tags = array_ops.expand_dims( math_ops.argmax(squeezed_potentials, axis=1), 1) best_score = math_ops.reduce_max(squeezed_potentials, axis=1) return math_ops.cast(decode_tags, dtype=dtypes.int32), best_score
def _sample_max(values): """Max over sample indices. In this module this is always [0].""" return math_ops.reduce_max(values, reduction_indices=[0])
def _training_examples_and_variables(): """Returns dictionaries for training examples and variables.""" batch_size = targets.get_shape()[0] # Iterate over all feature columns and create appropriate lists for dense # and sparse features as well as dense and sparse weights (variables) for # SDCA. # TODO(sibyl-vie3Poto): Reshape variables stored as values in column_to_variables # dict as 1-dimensional tensors. dense_features, sparse_features, sparse_feature_with_values = [], [], [] dense_feature_weights = [] sparse_feature_weights, sparse_feature_with_values_weights = [], [] for column in sorted(columns_to_variables.keys(), key=lambda x: x.key): transformed_tensor = features[column] if isinstance(column, layers.feature_column._RealValuedColumn): # pylint: disable=protected-access # A real-valued column corresponds to a dense feature in SDCA. A # transformed tensor corresponding to a RealValuedColumn should have # rank at most 2. In order to be passed to SDCA, its rank needs to be # exactly 2 (i.e., its shape should be [batch_size, column.dim]). check_rank_op = control_flow_ops.Assert( math_ops.less_equal(array_ops.rank(transformed_tensor), 2), ['transformed_tensor shouls have rank at most 2.']) # Reshape to [batch_size, dense_column_dimension]. with ops.control_dependencies([check_rank_op]): transformed_tensor = array_ops.reshape(transformed_tensor, [ array_ops.shape(transformed_tensor)[0], -1 ]) dense_features.append(transformed_tensor) # For real valued columns, the variables list contains exactly one # element. dense_feature_weights.append(columns_to_variables[column][0]) elif isinstance(column, layers.feature_column._BucketizedColumn): # pylint: disable=protected-access # A bucketized column corresponds to a sparse feature in SDCA. The # bucketized feature is "sparsified" for SDCA by converting it to a # SparseFeatureColumn respresenting the one-hot encoding of the # bucketized feature. # # TODO(sibyl-vie3Poto): Explore whether it is more efficient to translate a # bucketized feature column to a dense feature in SDCA. This will # likely depend on the number of buckets. dense_bucket_tensor = column._to_dnn_input_layer(transformed_tensor) # pylint: disable=protected-access sparse_feature_column = _dense_tensor_to_sparse_feature_column( dense_bucket_tensor) sparse_feature_with_values.append(sparse_feature_column) # If a partitioner was used during variable creation, we will have a # list of Variables here larger than 1. vars_to_append = columns_to_variables[column][0] if len(columns_to_variables[column]) > 1: vars_to_append = columns_to_variables[column] sparse_feature_with_values_weights.append(vars_to_append) elif isinstance( column, ( layers.feature_column._WeightedSparseColumn, # pylint: disable=protected-access layers.feature_column._CrossedColumn, # pylint: disable=protected-access layers.feature_column._SparseColumn)): # pylint: disable=protected-access if isinstance(column, layers.feature_column._WeightedSparseColumn): # pylint: disable=protected-access id_tensor = column.id_tensor(transformed_tensor) weight_tensor = array_ops.reshape( column.weight_tensor(transformed_tensor).values, [-1]) else: id_tensor = transformed_tensor weight_tensor = array_ops.ones( [array_ops.shape(id_tensor.indices)[0]], dtypes.float32) example_ids = array_ops.reshape(id_tensor.indices[:, 0], [-1]) flat_ids = array_ops.reshape(id_tensor.values, [-1]) # Prune invalid IDs (< 0) from the flat_ids, example_ids, and # weight_tensor. These can come from looking up an OOV entry in the # vocabulary (default value being -1). is_id_valid = math_ops.greater_equal(flat_ids, 0) flat_ids = array_ops.boolean_mask(flat_ids, is_id_valid) example_ids = array_ops.boolean_mask(example_ids, is_id_valid) weight_tensor = array_ops.boolean_mask(weight_tensor, is_id_valid) projection_length = math_ops.reduce_max(flat_ids) + 1 # project ids based on example ids so that we can dedup ids that # occur multiple times for a single example. projected_ids = projection_length * example_ids + flat_ids # Remove any redudant ids. ids, idx = array_ops.unique(projected_ids) # Keep only one example id per duplicated ids. example_ids_filtered = math_ops.unsorted_segment_min( example_ids, idx, array_ops.shape(ids)[0]) # reproject ids back feature id space. reproject_ids = (ids - projection_length * example_ids_filtered) weights = array_ops.reshape( math_ops.unsorted_segment_sum(weight_tensor, idx, array_ops.shape(ids)[0]), [-1]) sparse_feature_with_values.append( SparseFeatureColumn(example_ids_filtered, reproject_ids, weights)) # If a partitioner was used during variable creation, we will have a # list of Variables here larger than 1. vars_to_append = columns_to_variables[column][0] if len(columns_to_variables[column]) > 1: vars_to_append = columns_to_variables[column] sparse_feature_with_values_weights.append(vars_to_append) else: raise ValueError('SDCAOptimizer does not support column type %s.' % type(column).__name__) example_weights = array_ops.reshape( features[weight_column_name], shape=[-1]) if weight_column_name else array_ops.ones([batch_size]) example_ids = features[self._example_id_column] sparse_feature_with_values.extend(sparse_features) sparse_feature_with_values_weights.extend(sparse_feature_weights) examples = dict( sparse_features=sparse_feature_with_values, dense_features=dense_features, example_labels=math_ops.to_float( array_ops.reshape(targets, shape=[-1])), example_weights=example_weights, example_ids=example_ids) sdca_variables = dict( sparse_features_weights=sparse_feature_with_values_weights, dense_features_weights=dense_feature_weights) return examples, sdca_variables
def norm(tensor, ord='euclidean', axis=None, keepdims=None, name=None, keep_dims=None): r"""Computes the norm of vectors, matrices, and tensors. This function can compute several different vector norms (the 1-norm, the Euclidean or 2-norm, the inf-norm, and in general the p-norm for p > 0) and matrix norms (Frobenius, 1-norm, 2-norm and inf-norm). Args: tensor: `Tensor` of types `float32`, `float64`, `complex64`, `complex128` ord: Order of the norm. Supported values are 'fro', 'euclidean', `1`, `2`, `np.inf` and any positive real number yielding the corresponding p-norm. Default is 'euclidean' which is equivalent to Frobenius norm if `tensor` is a matrix and equivalent to 2-norm for vectors. Some restrictions apply: a) The Frobenius norm `fro` is not defined for vectors, b) If axis is a 2-tuple (matrix norm), only 'euclidean', 'fro', `1`, `2`, `np.inf` are supported. See the description of `axis` on how to compute norms for a batch of vectors or matrices stored in a tensor. axis: If `axis` is `None` (the default), the input is considered a vector and a single vector norm is computed over the entire set of values in the tensor, i.e. `norm(tensor, ord=ord)` is equivalent to `norm(reshape(tensor, [-1]), ord=ord)`. If `axis` is a Python integer, the input is considered a batch of vectors, and `axis` determines the axis in `tensor` over which to compute vector norms. If `axis` is a 2-tuple of Python integers it is considered a batch of matrices and `axis` determines the axes in `tensor` over which to compute a matrix norm. Negative indices are supported. Example: If you are passing a tensor that can be either a matrix or a batch of matrices at runtime, pass `axis=[-2,-1]` instead of `axis=None` to make sure that matrix norms are computed. keepdims: If True, the axis indicated in `axis` are kept with size 1. Otherwise, the dimensions in `axis` are removed from the output shape. name: The name of the op. keep_dims: Deprecated alias for `keepdims`. Returns: output: A `Tensor` of the same type as tensor, containing the vector or matrix norms. If `keepdims` is True then the rank of output is equal to the rank of `tensor`. Otherwise, if `axis` is none the output is a scalar, if `axis` is an integer, the rank of `output` is one less than the rank of `tensor`, if `axis` is a 2-tuple the rank of `output` is two less than the rank of `tensor`. Raises: ValueError: If `ord` or `axis` is invalid. @compatibility(numpy) Mostly equivalent to numpy.linalg.norm. Not supported: ord <= 0, 2-norm for matrices, nuclear norm. Other differences: a) If axis is `None`, treats the flattened `tensor` as a vector regardless of rank. b) Explicitly supports 'euclidean' norm as the default, including for higher order tensors. @end_compatibility """ keepdims = deprecation.deprecated_argument_lookup('keepdims', keepdims, 'keep_dims', keep_dims) if keepdims is None: keepdims = False is_matrix_norm = ((isinstance(axis, tuple) or isinstance(axis, list)) and len(axis) == 2) if is_matrix_norm: axis = tuple(axis) if (not isinstance(axis[0], int) or not isinstance(axis[1], int) or axis[0] == axis[1]): raise ValueError( "'axis' must be None, an integer, or a tuple of 2 unique integers" ) supported_matrix_norms = ['euclidean', 'fro', 1, 2, np.inf] if ord not in supported_matrix_norms: raise ValueError( "'ord' must be a supported matrix norm in %s, got %s" % (supported_matrix_norms, ord)) else: if not (isinstance(axis, int) or axis is None): raise ValueError( "'axis' must be None, an integer, or a tuple of 2 unique integers" ) supported_vector_norms = ['euclidean', 1, 2, np.inf] if (not np.isreal(ord) or ord <= 0) and ord not in supported_vector_norms: raise ValueError("'ord' must be a supported vector norm, got %s" % ord) if axis is not None: axis = (axis, ) with ops.name_scope(name, 'norm', [tensor]): tensor = ops.convert_to_tensor(tensor) if ord in ['fro', 'euclidean', 2, 2.0]: if is_matrix_norm and ord in [2, 2.0]: rank = array_ops.rank(tensor) positive_axis = functional_ops.map_fn( lambda i: control_flow_ops.cond(i >= 0, lambda: i, lambda: i + rank), ops.convert_to_tensor(axis)) axes = math_ops.range(rank) perm_before = array_ops.concat([ array_ops.setdiff1d(axes, positive_axis)[0], positive_axis ], axis=0) perm_after = functional_ops.map_fn( lambda i: math_ops.cast(array_ops.squeeze( array_ops.where(math_ops.equal(perm_before, i))), dtype=dtypes.int32), axes) permed = array_ops.transpose(tensor, perm=perm_before) matrix_2_norm = array_ops.expand_dims(math_ops.reduce_max( math_ops.abs( gen_linalg_ops.svd(permed, compute_uv=False)[0]), axis=-1, keepdims=True), axis=-1) result = array_ops.transpose(matrix_2_norm, perm=perm_after) else: result = math_ops.sqrt( math_ops.reduce_sum(tensor * math_ops.conj(tensor), axis, keepdims=True)) else: result = math_ops.abs(tensor) if ord == 1: sum_axis = None if axis is None else axis[0] result = math_ops.reduce_sum(result, sum_axis, keepdims=True) if is_matrix_norm: result = math_ops.reduce_max(result, axis[-1], keepdims=True) elif ord == np.inf: if is_matrix_norm: result = math_ops.reduce_sum(result, axis[1], keepdims=True) max_axis = None if axis is None else axis[0] result = math_ops.reduce_max(result, max_axis, keepdims=True) else: # General p-norms (positive p only) result = math_ops.pow( math_ops.reduce_sum(math_ops.pow(result, ord), axis, keepdims=True), 1.0 / ord) if not keepdims: result = array_ops.squeeze(result, axis) return result
def _multidimensional_dynamic_rnn_loop(cell, inputs, initial_state, height, width, parallel_iterations, swap_memory, sequence_length=None, dtype=None): """Internal implementation of Dynamic RNN. Args: cell: An instance of RNNCell. inputs: A `Tensor` of shape [time, batch_size, input_size], or a nested tuple of such elements. initial_state: A `Tensor` of shape `[batch_size, state_size]`, or if `cell.state_size` is a tuple, then this should be a tuple of tensors having shapes `[batch_size, s] for s in cell.state_size`. parallel_iterations: Positive Python int. swap_memory: A Python boolean sequence_length: (optional) An `int32` `Tensor` of shape [batch_size]. dtype: (optional) Expected dtype of output. If not specified, inferred from initial_state. Returns: Tuple `(final_outputs, final_state)`. final_outputs: A `Tensor` of shape `[time, batch_size, cell.output_size]`. If `cell.output_size` is a (possibly nested) tuple of ints or `TensorShape` objects, then this returns a (possibly nsted) tuple of Tensors matching the corresponding shapes. final_state: A `Tensor`, or possibly nested tuple of Tensors, matching in length and shapes to `initial_state`. Raises: ValueError: If the input depth cannot be inferred via shape inference from the inputs. """ state = initial_state assert isinstance(parallel_iterations, int), "parallel_iterations must be int" state_size = cell.state_size flat_input = nest.flatten(inputs) flat_output_size = nest.flatten(cell.output_size) # Construct an initial output input_shape = array_ops.shape(flat_input[0]) time_steps = input_shape[0] batch_size = _best_effort_input_batch_size(flat_input) inputs_got_shape = tuple(input_.get_shape().with_rank_at_least(3) for input_ in flat_input) const_time_steps, const_batch_size = inputs_got_shape[0].as_list()[:2] for shape in inputs_got_shape: if not shape[2:].is_fully_defined(): raise ValueError( "Input size (depth of inputs) must be accessible via shape inference," " but saw value None.") got_time_steps = shape[0].value got_batch_size = shape[1].value if const_time_steps != got_time_steps: raise ValueError( "Time steps is not the same for all the elements in the input in a " "batch.") if const_batch_size != got_batch_size: raise ValueError( "Batch_size is not the same for all the elements in the input." ) # Prepare dynamic conditional copying of state & output def _create_zero_arrays(size): size = _concat(batch_size, size) return array_ops.zeros(array_ops.stack(size), _infer_state_dtype(dtype, state)) flat_zero_output = tuple( _create_zero_arrays(output) for output in flat_output_size) zero_output = nest.pack_sequence_as(structure=cell.output_size, flat_sequence=flat_zero_output) if sequence_length is not None: min_sequence_length = math_ops.reduce_min(sequence_length) max_sequence_length = math_ops.reduce_max(sequence_length) time = array_ops.constant(0, dtype=dtypes.int32, name="time") with ops.name_scope("dynamic_rnn") as scope: base_name = scope def _create_ta(name, dtype): return tensor_array_ops.TensorArray(dtype=dtype, size=time_steps, tensor_array_name=base_name + name) output_ta = tuple( _create_ta("output_%d" % i, _infer_state_dtype(dtype, state)) for i in range(len(flat_output_size))) input_ta = tuple( _create_ta("input_%d" % i, flat_input[i].dtype) for i in range(len(flat_input))) input_ta = tuple( ta.unstack(input_) for ta, input_ in zip(input_ta, flat_input)) def _time_step(time, output_ta_t, state): """Take a time step of the dynamic RNN. Args: time: int32 scalar Tensor. output_ta_t: List of `TensorArray`s that represent the output. state: nested tuple of vector tensors that represent the state. Returns: The tuple (time + 1, output_ta_t with updated flow, new_state). """ input_t = tuple(ta.read(time) for ta in input_ta) # Restore some shape information for input_, shape in zip(input_t, inputs_got_shape): input_.set_shape(shape[1:]) input_t = nest.pack_sequence_as(structure=inputs, flat_sequence=input_t) call_cell = lambda: cell.call(input_t, state, height, width, time) if sequence_length is not None: (output, new_state) = _rnn_step(time=time, sequence_length=sequence_length, min_sequence_length=min_sequence_length, max_sequence_length=max_sequence_length, zero_output=zero_output, state=state, call_cell=call_cell, state_size=state_size, skip_conditionals=True) else: (output, new_state) = call_cell() # Pack state if using state tuples output = nest.flatten(output) output_ta_t = tuple( ta.write(time, out) for ta, out in zip(output_ta_t, output)) return (time + 1, output_ta_t, new_state) _, output_final_ta, final_state = control_flow_ops.while_loop( cond=lambda time, *_: time < time_steps, body=_time_step, loop_vars=(time, output_ta, state), parallel_iterations=parallel_iterations, swap_memory=swap_memory) # Unpack final output if not using output tuples. final_outputs = tuple(ta.stack() for ta in output_final_ta) # Restore some shape information for output, output_size in zip(final_outputs, flat_output_size): shape = _concat([const_time_steps, const_batch_size], output_size, static=True) output.set_shape(shape) final_outputs = nest.pack_sequence_as(structure=cell.output_size, flat_sequence=final_outputs) return (final_outputs, final_state)
def confusion_matrix(labels, predictions, num_classes=None, dtype=dtypes.int32, name=None, weights=None): """Computes the confusion matrix from predictions and labels. Calculate the Confusion Matrix for a pair of prediction and label 1-D int arrays. The matrix columns represent the prediction labels and the rows represent the real labels. The confusion matrix is always a 2-D array of shape `[n, n]`, where `n` is the number of valid labels for a given classification task. Both prediction and labels must be 1-D arrays of the same shape in order for this function to work. If `num_classes` is None, then `num_classes` will be set to the one plus the maximum value in either predictions or labels. Class labels are expected to start at 0. E.g., if `num_classes` was three, then the possible labels would be `[0, 1, 2]`. If `weights` is not `None`, then each prediction contributes its corresponding weight to the total value of the confusion matrix cell. For example: ```python tf.contrib.metrics.confusion_matrix([1, 2, 4], [2, 2, 4]) ==> [[0 0 0 0 0] [0 0 1 0 0] [0 0 1 0 0] [0 0 0 0 0] [0 0 0 0 1]] ``` Note that the possible labels are assumed to be `[0, 1, 2, 3, 4]`, resulting in a 5x5 confusion matrix. Args: labels: 1-D `Tensor` of real labels for the classification task. predictions: 1-D `Tensor` of predictions for a given classification. num_classes: The possible number of labels the classification task can have. If this value is not provided, it will be calculated using both predictions and labels array. dtype: Data type of the confusion matrix. name: Scope name. weights: An optional `Tensor` whose shape matches `predictions`. Returns: A k X k matrix representing the confusion matrix, where k is the number of possible labels in the classification task. Raises: ValueError: If both predictions and labels are not 1-D vectors and have mismatched shapes, or if `weights` is not `None` and its shape doesn't match `predictions`. """ with ops.name_scope(name, 'confusion_matrix', (predictions, labels, num_classes, weights)) as name: labels, predictions = remove_squeezable_dimensions( ops.convert_to_tensor(labels, name='labels'), ops.convert_to_tensor(predictions, name='predictions')) predictions = math_ops.cast(predictions, dtypes.int64) labels = math_ops.cast(labels, dtypes.int64) # Sanity checks - underflow or overflow can cause memory corruption. labels = control_flow_ops.with_dependencies([ check_ops.assert_non_negative( labels, message='`labels` contains negative values') ], labels) predictions = control_flow_ops.with_dependencies([ check_ops.assert_non_negative( predictions, message='`predictions` contains negative values') ], predictions) if num_classes is None: num_classes = math_ops.maximum(math_ops.reduce_max(predictions), math_ops.reduce_max(labels)) + 1 else: num_classes_int64 = math_ops.cast(num_classes, dtypes.int64) labels = control_flow_ops.with_dependencies([ check_ops.assert_less( labels, num_classes_int64, message='`labels` out of bound') ], labels) predictions = control_flow_ops.with_dependencies([ check_ops.assert_less(predictions, num_classes_int64, message='`predictions` out of bound') ], predictions) if weights is not None: predictions.get_shape().assert_is_compatible_with( weights.get_shape()) weights = math_ops.cast(weights, dtype) shape = array_ops.stack([num_classes, num_classes]) indices = array_ops.transpose(array_ops.stack([labels, predictions])) values = (array_ops.ones_like(predictions, dtype) if weights is None else weights) cm_sparse = sparse_tensor.SparseTensor( indices=indices, values=values, dense_shape=math_ops.to_int64(shape)) zero_matrix = array_ops.zeros(math_ops.to_int32(shape), dtype) return sparse_ops.sparse_add(zero_matrix, cm_sparse)
def _call_cell(self, inputs, initial_cell_state, initial_output, dtype, sequence_length): """Run this LSTM on inputs, starting from the given state. Args: inputs: `3-D` tensor with shape `[time_len, batch_size, input_size]` initial_cell_state: initial value for cell state, shape `[batch_size, self._num_units]` initial_output: initial value of cell output, shape `[batch_size, self._num_units]` dtype: The data type for the initial state and expected output. sequence_length: Specifies the length of each sequence in inputs. An `int32` or `int64` vector (tensor) size `[batch_size]`, values in `[0, time_len)` or None. Returns: A pair containing: - Cell state (cs): A `3-D` tensor of shape `[time_len, batch_size, output_size]` - Output (h): A `3-D` tensor of shape `[time_len, batch_size, output_size]` """ inputs_shape = inputs.get_shape().with_rank(3) time_len = inputs_shape[0].value if time_len is None: time_len = array_ops.shape(inputs)[0] input_size = inputs_shape[2].value w = vs.get_variable( "weights", [input_size + self._num_units, self._num_units * 4], dtype=dtype) b = vs.get_variable("biases", [w.get_shape().with_rank(2)[1]], initializer=init_ops.constant_initializer(0.0), dtype=dtype) if self._use_peephole: wci = vs.get_variable("w_i_diag", [self._num_units], dtype=dtype) wco = vs.get_variable("w_o_diag", [self._num_units], dtype=dtype) wcf = vs.get_variable("w_f_diag", [self._num_units], dtype=dtype) else: wci = wco = wcf = array_ops.zeros([self._num_units], dtype=dtype) if sequence_length is None: max_seq_len = time_len else: max_seq_len = math_ops.to_int64( math_ops.reduce_max(sequence_length)) _, cs, _, _, _, _, h = _lstm_ops_so.block_lstm( seq_len_max=max_seq_len, x=inputs, cs_prev=initial_cell_state, h_prev=initial_output, w=w, wci=wci, wco=wco, wcf=wcf, b=b, forget_bias=self._forget_bias, cell_clip=self._cell_clip, use_peephole=self._use_peephole) return cs, h
def LastValueQuantize(inputs, per_channel=False, init_min=-6.0, init_max=6.0, vars_collection=ops.GraphKeys.MOVING_AVERAGE_VARIABLES, name_prefix='LastValueQuant', reuse=None, is_training=True, num_bits=8, narrow_range=False): """Adds a layer that collects quantization ranges as last input ranges. LastValueQuantize creates variables called 'min' and 'max', representing the interval used for quantization and clamping. Args: inputs: a tensor containing values to be quantized. per_channel: (Optional) a boolean specifying whether to use different quantization ranges per output channel. init_min: a float scalar, the initial value for variable min. init_max: a float scalar, the initial value for variable max. vars_collection: (Optional) collection where to store variables for quantization interval ends. name_prefix: name_prefix for created nodes. reuse: whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. is_training: Whether the op is applied to a training or eval graph. num_bits: Number of bits to use for quantization, must be between 2 and 8. narrow_range: Whether to use the narrow quantization range [1; 2^num_bits - 1] or wide range [0; 2^num_bits - 1]. Returns: a tensor containing quantized values. """ with variable_scope.variable_scope(None, default_name=name_prefix, values=[inputs], reuse=reuse) as scope: scope.set_partitioner(None) input_shape = inputs.get_shape() input_dim = len(input_shape) if per_channel: # Only support quantizing 1-, 2- and 4-dimensional tensors. assert input_dim in [1, 2, 4 ], ('Expected 1D, 2D or 4D input, was: %s in ' ' scope: %s' % (input_shape, name_prefix)) min_max_shape = [input_shape[-1]] else: min_max_shape = [] min_var = model_variable( 'min', shape=min_max_shape, initializer=init_ops.constant_initializer(init_min), collections=[vars_collection], trainable=False) max_var = model_variable( 'max', shape=min_max_shape, initializer=init_ops.constant_initializer(init_max), collections=[vars_collection], trainable=False) if not is_training: return _FakeQuantWithMinMaxVars(inputs, min_var, max_var, per_channel=per_channel, num_bits=num_bits, narrow_range=narrow_range) if per_channel: if input_dim == 2: reduce_dims = [0] elif input_dim == 4: reduce_dims = [0, 1, 2] if per_channel: if input_dim >= 2: batch_min = math_ops.reduce_min(inputs, reduction_indices=reduce_dims, name='BatchMin') else: batch_min = inputs else: batch_min = math_ops.reduce_min(inputs, name='BatchMin') # TFLite requires that 0.0 if always in the [min; max] range. batch_min = math_ops.minimum(batch_min, 0.0) assign_min = state_ops.assign(min_var, batch_min, name='AssignMinLast') if per_channel: if input_dim >= 2: batch_max = math_ops.reduce_max(inputs, reduction_indices=reduce_dims, name='BatchMax') else: batch_max = inputs else: batch_max = math_ops.reduce_max(inputs, name='BatchMax') # TFLite requires that 0.0 if always in the [min; max] range. batch_max = math_ops.maximum(batch_max, 0.0) assign_max = state_ops.assign(max_var, batch_max, name='AssignMaxLast') return _FakeQuantWithMinMaxVars(inputs, assign_min, assign_max, per_channel=per_channel, num_bits=num_bits, narrow_range=narrow_range)
def _InsertCalibOp(context, name, producer, consumers, vars_collection=ops.GraphKeys.GLOBAL_VARIABLES, producer_scope=None, consumer_scope=None): """Inserts calibration ops between a producer op and (multiple) consumer ops. Args: context: Context where producer and consumer operations are nested. name: Name for the new calibration op within the context. producer: Producer operation of the pairs where calibration will be inserted. consumers: Consumer operations of the pairs. producer_scope: The restriction of producer scope. If not None, the new op will be inserted only when the producer is in this scope. consumer_scope: The restriction of consumer scope. If not None, the new op will be inserted only when all the consumers are in this scope. Raises: ValueError: When producer operation is not directly connected to the consumer operation. """ if producer_scope and not producer.name.startswith(producer_scope): logging.info( '_InsertCalibOp ignores context="%s" name="%s" ' 'because producer "%s" is not in scope "%s"', context, name, producer.name, producer_scope) return if consumer_scope: consumers_in_scope = [] for consumer in consumers: if consumer.name.startswith(consumer_scope): consumers_in_scope.append(consumer) else: logging.info( '_InsertCalibOp context="%s" name="%s" ignores ' 'consumer "%s" because it is not in scope "%s"', context, name, consumer.name, consumer_scope) return consumers = consumers_in_scope name_prefix = _AddContextToName(context, name) name_scope = ops.get_name_scope() if name_scope: name_prefix = common.DropStringPrefix(name_prefix, name_scope + '/') inputs = producer.outputs[0] # Prevent ops from being modified multiple times. Bypass ops can sometimes # overlap between multiple matches, so we need to ensure that we don't # add duplicate calibration operations. #if _FollowedByFakeQuant(inputs): # return with variable_scope.variable_scope(None, default_name=name_prefix, values=[inputs]) as scope: # Currently no per channel. min_max_shape = [] vars_collections = [vars_collection] if vars_collection else [] min_var = _ModelVariable('min', shape=min_max_shape, initializer=init_ops.constant_initializer( float('inf')), collections=vars_collections, trainable=False) max_var = _ModelVariable( 'max', shape=min_max_shape, initializer=init_ops.constant_initializer(-float('inf')), collections=vars_collections, trainable=False) batch_min = math_ops.reduce_min(inputs, name='BatchMin') batch_max = math_ops.reduce_max(inputs, name='BatchMax') range_min = math_ops.minimum(batch_min, min_var, name=name_prefix + '/range_min') range_max = math_ops.maximum(batch_max, max_var, name=name_prefix + '/range_max') return range_min, range_max
def pinv(a, rcond=None, validate_args=False, name=None): """Compute the Moore-Penrose pseudo-inverse of one or more matrices. Calculate the [generalized inverse of a matrix]( https://en.wikipedia.org/wiki/Moore%E2%80%93Penrose_inverse) using its singular-value decomposition (SVD) and including all large singular values. The pseudo-inverse of a matrix `A`, is defined as: 'the matrix that 'solves' [the least-squares problem] `A @ x = b`,' i.e., if `x_hat` is a solution, then `A_pinv` is the matrix such that `x_hat = A_pinv @ b`. It can be shown that if `U @ Sigma @ V.T = A` is the singular value decomposition of `A`, then `A_pinv = V @ inv(Sigma) U^T`. [(Strang, 1980)][1] This function is analogous to [`numpy.linalg.pinv`]( https://docs.scipy.org/doc/numpy/reference/generated/numpy.linalg.pinv.html). It differs only in default value of `rcond`. In `numpy.linalg.pinv`, the default `rcond` is `1e-15`. Here the default is `10. * max(num_rows, num_cols) * np.finfo(dtype).eps`. Args: a: (Batch of) `float`-like matrix-shaped `Tensor`(s) which are to be pseudo-inverted. rcond: `Tensor` of small singular value cutoffs. Singular values smaller (in modulus) than `rcond` * largest_singular_value (again, in modulus) are set to zero. Must broadcast against `tf.shape(a)[:-2]`. Default value: `10. * max(num_rows, num_cols) * np.finfo(a.dtype).eps`. validate_args: When `True`, additional assertions might be embedded in the graph. Default value: `False` (i.e., no graph assertions are added). name: Python `str` prefixed to ops created by this function. Default value: 'pinv'. Returns: a_pinv: (Batch of) pseudo-inverse of input `a`. Has same shape as `a` except rightmost two dimensions are transposed. Raises: TypeError: if input `a` does not have `float`-like `dtype`. ValueError: if input `a` has fewer than 2 dimensions. #### Examples ```python import tensorflow as tf import tensorflow_probability as tfp a = tf.constant([[1., 0.4, 0.5], [0.4, 0.2, 0.25], [0.5, 0.25, 0.35]]) tf.matmul(tf.linalg..pinv(a), a) # ==> array([[1., 0., 0.], [0., 1., 0.], [0., 0., 1.]], dtype=float32) a = tf.constant([[1., 0.4, 0.5, 1.], [0.4, 0.2, 0.25, 2.], [0.5, 0.25, 0.35, 3.]]) tf.matmul(tf.linalg..pinv(a), a) # ==> array([[ 0.76, 0.37, 0.21, -0.02], [ 0.37, 0.43, -0.33, 0.02], [ 0.21, -0.33, 0.81, 0.01], [-0.02, 0.02, 0.01, 1. ]], dtype=float32) ``` #### References [1]: G. Strang. 'Linear Algebra and Its Applications, 2nd Ed.' Academic Press, Inc., 1980, pp. 139-142. """ with ops.name_scope(name or 'pinv'): a = ops.convert_to_tensor(a, name='a') assertions = _maybe_validate_matrix(a, validate_args) if assertions: with ops.control_dependencies(assertions): a = array_ops.identity(a) dtype = a.dtype.as_numpy_dtype if rcond is None: def get_dim_size(dim): dim_val = tensor_shape.dimension_value(a.shape[dim]) if dim_val is not None: return dim_val return array_ops.shape(a)[dim] num_rows = get_dim_size(-2) num_cols = get_dim_size(-1) if isinstance(num_rows, int) and isinstance(num_cols, int): max_rows_cols = float(max(num_rows, num_cols)) else: max_rows_cols = math_ops.cast( math_ops.maximum(num_rows, num_cols), dtype) rcond = 10. * max_rows_cols * np.finfo(dtype).eps rcond = ops.convert_to_tensor(rcond, dtype=dtype, name='rcond') # Calculate pseudo inverse via SVD. # Note: if a is Hermitian then u == v. (We might observe additional # performance by explicitly setting `v = u` in such cases.) [ singular_values, # Sigma left_singular_vectors, # U right_singular_vectors, # V ] = svd(a, full_matrices=False, compute_uv=True) # Saturate small singular values to inf. This has the effect of make # `1. / s = 0.` while not resulting in `NaN` gradients. cutoff = rcond * math_ops.reduce_max(singular_values, axis=-1) singular_values = array_ops.where_v2( singular_values > array_ops.expand_dims_v2(cutoff, -1), singular_values, np.array(np.inf, dtype)) # By the definition of the SVD, `a == u @ s @ v^H`, and the pseudo-inverse # is defined as `pinv(a) == v @ inv(s) @ u^H`. a_pinv = math_ops.matmul(right_singular_vectors / array_ops.expand_dims_v2(singular_values, -2), left_singular_vectors, adjoint_b=True) if a.shape is not None and a.shape.rank is not None: a_pinv.set_shape(a.shape[:-2].concatenate( [a.shape[-1], a.shape[-2]])) return a_pinv
def _call_cell(self, inputs, initial_cell_state=None, initial_output=None, dtype=None, sequence_length=None): """Run this LSTM on inputs, starting from the given state. Args: inputs: `3-D` tensor with shape `[time_len, batch_size, input_size]` initial_cell_state: initial value for cell state, shape `[batch_size, self._num_units]` initial_output: initial value of cell output, shape `[batch_size, self._num_units]` dtype: The data type for the initial state and expected output. sequence_length: Specifies the length of each sequence in inputs. An `int32` or `int64` vector (tensor) size `[batch_size]`, values in `[0, time_len)` or None. Returns: A pair containing: - Cell state (cs): A `3-D` tensor of shape `[time_len, batch_size, output_size]` - Output (h): A `3-D` tensor of shape `[time_len, batch_size, output_size]` """ inputs_shape = inputs.get_shape().with_rank(3) time_len = inputs_shape.dims[0].value if time_len is None: time_len = array_ops.shape(inputs)[0] if self._use_peephole: wci = self._w_i_diag wco = self._w_o_diag wcf = self._w_f_diag else: wci = wcf = wco = array_ops.zeros([self._num_units], dtype=dtype) if sequence_length is None: max_seq_len = math_ops.to_int64(time_len) else: max_seq_len = math_ops.to_int64( math_ops.reduce_max(sequence_length)) _, cs, _, _, _, _, h = _lstm_ops_so.block_lstm_fused_our( seq_len_max=max_seq_len, x=inputs, cs_prev=initial_cell_state, h_prev=initial_output, w=self._kernel, wci=wci, wcf=wcf, wco=wco, b=self._bias, group_size_attr=self._group_size, forget_bias=self._forget_bias, cell_clip=self._cell_clip, use_peephole=self._use_peephole) return cs, h
def _embedding_lookup_with_distributed_aggregation(params, ids, partition_strategy="mod", name=None, max_norm=None, weights=None, idx=None, segment_ids=None): """Lookup helper for embedding_lookup_sparse_with_distributed_aggregation.""" if params is None or params == []: # pylint: disable=g-explicit-bool-comparison raise ValueError("Need at least one param") if isinstance(params, variables.PartitionedVariable): params = list(params) # Iterate to get the underlying Variables. if not isinstance(params, list): params = [params] def maybe_normalize(x): if max_norm is not None: if x.get_shape().ndims is not None: ndims = x.get_shape().ndims else: ndims = array_ops.size(array_ops.shape(x)) return clip_ops.clip_by_norm(x, max_norm, axes=list(range(1, ndims))) return x with ops.name_scope(name, "embedding_lookup_with_distributed_aggregation", params + [ids]) as name: np = len(params) # Number of partitions # Preserve the resource variable status to avoid accidental dense reads. if not any( isinstance(p, resource_variable_ops.ResourceVariable) for p in params): params = ops.convert_n_to_tensor_or_indexed_slices(params, name="params") if np == 1: with ops.colocate_with(params[0]): ret = maybe_normalize(_do_gather(params[0], ids)) ignore_weights = weights is None if not ignore_weights: if weights.dtype != ret.dtype: weights = math_ops.cast(weights, ret.dtype) # Reshape to allow broadcast ones = array_ops.fill( array_ops.expand_dims(array_ops.rank(ret) - 1, 0), 1) bcast_weights_shape = array_ops.concat( [array_ops.shape(weights), ones], 0) orig_weights_shape = weights.get_shape() weights = array_ops.reshape(weights, bcast_weights_shape) # Set weights shape after reshape if ret.get_shape().ndims is not None: weights.set_shape( orig_weights_shape.concatenate( [1 for _ in range(ret.get_shape().ndims - 1)])) ret *= weights return math_ops.segment_sum(ret, segment_ids, name=name) else: return math_ops.sparse_segment_sum(ret, idx, segment_ids, name=name) else: ids = ops.convert_to_tensor(ids, name="ids") flat_ids = array_ops.reshape(ids, [-1]) original_indices = math_ops.range(array_ops.size(flat_ids)) # Create p_assignments and set new_ids depending on the strategy. if partition_strategy == "mod": p_assignments = flat_ids % np new_ids = flat_ids // np elif partition_strategy == "div": # Compute num_total_ids as the sum of dim-0 of params, then assign to # partitions based on a constant number of ids per partition. Optimize # if we already know the full shape statically. dim_0_size = params[0].get_shape()[0] for p in xrange(1, np): dim_0_size += params[p].get_shape()[0] if dim_0_size.value: num_total_ids = constant_op.constant( dim_0_size.value, flat_ids.dtype) else: dim_0_sizes = [] for p in xrange(np): if params[p].get_shape()[0].value is not None: dim_0_sizes.append(params[p].get_shape()[0].value) else: with ops.colocate_with(params[p]): dim_0_sizes.append( array_ops.shape(params[p])[0]) num_total_ids = math_ops.reduce_sum( math_ops.cast(array_ops.stack(dim_0_sizes), flat_ids.dtype)) ids_per_partition = num_total_ids // np extras = num_total_ids % np p_assignments = math_ops.maximum( flat_ids // (ids_per_partition + 1), (flat_ids - extras) // ids_per_partition) # Emulate a conditional using a boolean indicator tensor is_in_first_extras_partitions = math_ops.cast( p_assignments < extras, flat_ids.dtype) new_ids = (is_in_first_extras_partitions * (flat_ids % (ids_per_partition + 1)) + (1 - is_in_first_extras_partitions) * ((flat_ids - extras) % ids_per_partition)) else: raise ValueError("Unrecognized partition strategy: " + partition_strategy) # Cast partition assignments to int32 for use in dynamic_partition. # There really should not be more than 2^32 partitions. p_assignments = math_ops.cast(p_assignments, dtypes.int32) # Partition list of ids based on assignments into np separate lists gather_ids = data_flow_ops.dynamic_partition( new_ids, p_assignments, np) # Similarly, partition the original indices. pindices = data_flow_ops.dynamic_partition(original_indices, p_assignments, np) # Do np separate lookups, finding embeddings for plist[p] in params[p] partitioned_result = [] for p in xrange(np): with ops.colocate_with(params[p]): partitioned_result.append( _do_gather(params[p], gather_ids[p])) ignore_weights = weights is None if not ignore_weights: # Partition weights according to pindices. partitioned_weight = [] for p in xrange(np): partitioned_weight.append( array_ops.gather(weights, pindices[p])) # Reshape each partition result. element_shape = params[0].get_shape()[1:] for p in params[1:]: element_shape = element_shape.merge_with(p.get_shape()[1:]) if element_shape.is_fully_defined(): for p in xrange(np): with ops.colocate_with(params[p]): partitioned_result[p] = array_ops.reshape( partitioned_result[p], array_ops.concat( [array_ops.shape(pindices[p]), element_shape], 0)) else: with ops.colocate_with(params[0]): params_shape = array_ops.shape(params[0]) for p in xrange(np): with ops.colocate_with(params[p]): partitioned_result[p] = array_ops.reshape( partitioned_result[p], array_ops.concat([ array_ops.shape(pindices[p]), array_ops.slice(params_shape, [1], [-1]) ], 0)) # Normalize each partition result. for p in xrange(np): with ops.colocate_with(params[p]): partitioned_result[p] = maybe_normalize( partitioned_result[p]) if not ignore_weights: # Multiply each partition result with partition weights. for p in xrange(np): with ops.colocate_with(params[p]): if partitioned_weight[p].dtype != partitioned_result[ p].dtype: partitioned_weight[p] = math_ops.cast( partitioned_weight[p], partitioned_result[p].dtype) # Reshape partition weights. ones = array_ops.fill( array_ops.expand_dims( array_ops.rank(partitioned_result[p]) - 1, 0), 1) bcast_weights_shape = array_ops.concat( [array_ops.shape(partitioned_weight[p]), ones], 0) orig_weights_shape = partitioned_weight[p].get_shape() partitioned_weight[p] = array_ops.reshape( partitioned_weight[p], bcast_weights_shape) if partitioned_result[p].get_shape().ndims is not None: partitioned_weight[p].set_shape( orig_weights_shape.concatenate([ 1 for _ in range(partitioned_result[p]. get_shape().ndims - 1) ])) partitioned_result[p] *= partitioned_weight[p] partitioned_segment_ids = [] for p in xrange(np): if not ignore_weights: # Partition segment_ids according to pindices. p_segment_ids = array_ops.gather(segment_ids, pindices[p]) # Number the p_segment_ids to meet segment_sum's requirements. Note # that unique_p_segment_ids contains unique segment ids of this # partition and these ids' order is unchanged. unique_p_segment_ids, unique_p_segment_idx = array_ops.unique( p_segment_ids) partitioned_segment_ids.append(unique_p_segment_ids) # segment_sum this partition's result. with ops.colocate_with(params[p]): partitioned_result[p] = math_ops.segment_sum( partitioned_result[p], unique_p_segment_idx) else: # When ignore weights, we need to get indexs of elements in idx and # segment_ids. _, exclude_idx = array_ops.setdiff1d(idx, pindices[p]) all_idx = math_ops.range(array_ops.shape(idx)[0]) _, include_idx = array_ops.setdiff1d(all_idx, exclude_idx) # Gather segment_ids and idx according to indexs. p_segment_ids = array_ops.gather(segment_ids, include_idx) p_idx = array_ops.gather(idx, include_idx) # Number the p_segment_ids, same as ignore_weights case above. unique_p_segment_ids, unique_p_segment_idx = array_ops.unique( p_segment_ids) _, unique_p_idx_idx = array_ops.unique(p_idx) partitioned_segment_ids.append(unique_p_segment_ids) with ops.colocate_with(params[p]): partitioned_result[p] = math_ops.sparse_segment_sum( partitioned_result[p], unique_p_idx_idx, unique_p_segment_idx) # Concat each partition's segment_ids and result for final segment_sum. concat_segment_ids = array_ops.concat(partitioned_segment_ids, 0) concat_partitioned_result = array_ops.concat(partitioned_result, 0) return math_ops.unsorted_segment_sum( concat_partitioned_result, concat_segment_ids, math_ops.reduce_max(concat_segment_ids) + 1, name=name)
def categorical_hinge(y_true, y_pred): y_pred = ops.convert_to_tensor(y_pred) y_true = math_ops.cast(y_true, y_pred.dtype) pos = math_ops.reduce_sum(y_true * y_pred, axis=-1) neg = math_ops.reduce_max((1. - y_true) * y_pred, axis=-1) return math_ops.maximum(0., neg - pos + 1.)
def testMaxGradient(self): inputs = constant_op.constant([1.0], dtype=dtypes.float32) outputs = math_ops.reduce_max(array_ops.concat([inputs, inputs], 0)) with self.cached_session(): error = gradient_checker.compute_gradient_error(inputs, [1], outputs, []) self.assertLess(error, 1e-4)
def tpu_fn(x, y): a = x + 7.0 b = y * 2.0 c, d, e = tpu.outside_compilation(outside_fn, a, b) return (math_ops.reduce_max(c) + math_ops.reduce_min(d) + math_ops.reduce_sum(e))
def PPD_ADAGRAD(objective, stage_idx, T0, eta, W, W0, a, b, a0, b0, alpha, gamma, factor=1.2): update_ops = [] grad_w = tf.gradients(objective, W) grad_a = tf.gradients(objective, a) grad_b = tf.gradients(objective, b) grad_alpha = tf.gradients(objective, alpha)[0] grad_v = grad_w + grad_a + grad_b + [-grad_alpha] V = W + [a, b] + [alpha] V0 = W0 + [a0, b0] + [0] d = np.sum([np.prod(v.get_shape().as_list()) for v in W]) epsilon = 0.5 accumulators = [ tf.Variable(tf.zeros(w.get_shape().as_list()), dtype=tf.float32, name='acc') for w in V ] #[K.zeros(w.get_shape().as_list()) for w in V] # grad_accumulators = [ tf.Variable(tf.zeros(w.get_shape().as_list()), dtype=tf.float32, name='grad_acc') for w in V ] M_s = T0 * math_ops.sqrt(factor**(stage_idx - 1)) max_i = tf.Variable(0, dtype=tf.float32, name='max_i') sum_gt = tf.Variable(0, dtype=tf.float32, name='sum_gt') update_ops_bn = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops_bn): count = 0 for p, g, a, g_a, p0 in zip(V, grad_v, accumulators, grad_accumulators, V0): if count != len(V) - 1: new_g = g + (1 / gamma) * (p - p0) else: new_g = g new_g = g + (1 / gamma) * (p - p0) new_a = a + math_ops.square(new_g) update_ops.append(a.assign(new_a)) new_g_a = g_a + new_g # gradient accumlators update_ops.append(g_a.assign(new_g_a)) tmp_max_i = math_ops.maximum(max_i, K.sqrt(math_ops.reduce_max(new_a))) tmp_sum_gt = K.sum(K.sqrt(new_a)) + sum_gt update_ops.append(max_i.assign(tmp_max_i)) update_ops.append(sum_gt.assign(tmp_sum_gt)) new_p = -eta * (new_g_a) / (K.sqrt(new_a) + epsilon) + p0 update_ops.append(p.assign(new_p)) count += 1 return_value = M_s * math_ops.sqrt((max_i + epsilon) * (sum_gt / (d + 3))) return update_ops, return_value, accumulators, grad_accumulators, max_i, sum_gt, M_s
def vectorized_map(fn, elems, fallback_to_while_loop=True): """Parallel map on the list of tensors unpacked from `elems` on dimension 0. This method works similar to `tf.map_fn` but is optimized to run much faster, possibly with a much larger memory footprint. The speedups are obtained by vectorization (see [Auto-Vectorizing TensorFlow Graphs: Jacobians, Auto-Batching and Beyond](https://arxiv.org/pdf/1903.04243.pdf)). The idea behind vectorization is to semantically launch all the invocations of `fn` in parallel and fuse corresponding operations across all these invocations. This fusion is done statically at graph generation time and the generated code is often similar in performance to a manually fused version. Because `tf.vectorized_map` fully parallelizes the batch, this method will generally be significantly faster than using `tf.map_fn`, especially in eager mode. However this is an experimental feature and currently has a lot of limitations: - There should be no data dependency between the different semantic invocations of `fn`, i.e. it should be safe to map the elements of the inputs in any order. - Stateful kernels may mostly not be supported since these often imply a data dependency. We do support a limited set of such stateful kernels though (like RandomFoo, Variable operations like reads, etc). - `fn` has limited support for control flow operations. - `fn` should return nested structure of Tensors or Operations. However if an Operation is returned, it should have zero outputs. - The shape and dtype of any intermediate or output tensors in the computation of `fn` should not depend on the input to `fn`. Examples: ```python def outer_product(a): return tf.tensordot(a, a, 0) batch_size = 100 a = tf.ones((batch_size, 32, 32)) c = tf.vectorized_map(outer_product, a) assert c.shape == (batch_size, 32, 32, 32, 32) ``` ```python # Computing per-example gradients batch_size = 10 num_features = 32 layer = tf.keras.layers.Dense(1) def model_fn(arg): with tf.GradientTape() as g: inp, label = arg inp = tf.expand_dims(inp, 0) label = tf.expand_dims(label, 0) prediction = layer(inp) loss = tf.nn.l2_loss(label - prediction) return g.gradient(loss, (layer.kernel, layer.bias)) inputs = tf.random.uniform([batch_size, num_features]) labels = tf.random.uniform([batch_size, 1]) per_example_gradients = tf.vectorized_map(model_fn, (inputs, labels)) assert per_example_gradients[0].shape == (batch_size, num_features, 1) assert per_example_gradients[1].shape == (batch_size, 1) ``` Args: fn: The callable to be performed. It accepts one argument, which will have the same (possibly nested) structure as `elems`, and returns a possibly nested structure of Tensors and Operations, which may be different than the structure of `elems`. elems: A tensor or (possibly nested) sequence of tensors, each of which will be unpacked along their first dimension. The nested sequence of the resulting slices will be mapped over by `fn`. The first dimensions of all elements must broadcast to a consistent value; equivalently, each element tensor must have first dimension of either `B` or `1`, for some common batch size `B >= 1`. fallback_to_while_loop: If true, on failing to vectorize an operation, the unsupported op is wrapped in a tf.while_loop to execute the map iterations. Note that this fallback only happens for unsupported ops and other parts of `fn` are still vectorized. If false, on encountering an unsupported op, a ValueError is thrown. Note that the fallbacks can result in slowdowns since vectorization often yields speedup of one to two orders of magnitude. Returns: A tensor or (possibly nested) sequence of tensors. Each tensor packs the results of applying fn to tensors unpacked from elems along the first dimension, from first to last. Although they are less common as user-visible inputs and outputs, note that tensors of type `tf.variant` which represent tensor lists (for example from `tf.raw_ops.TensorListFromTensor`) are vectorized by stacking the list contents rather than the variant itself, and so the container tensor will have a scalar shape when returned rather than the usual stacked shape. This improves the performance of control flow gradient vectorization. Raises: ValueError: If vectorization fails and fallback_to_while_loop is False. """ elems = nest.map_structure(ops.convert_to_tensor, elems, expand_composites=True) def loop_fn(i): gathered_elems = nest.map_structure( lambda x: _gather_from_tensor_or_composite(x, i), elems) return fn(gathered_elems) # Extract batch size from the maximum first dimension of any element. flat_elems = nest.flatten( nest.map_structure( functools.partial(_composite_to_tensors, is_batched=True), elems)) def _get_shape(x): if x.shape.rank is None: return None return x.shape.as_list()[0] static_first_dims = [_get_shape(elem) for elem in flat_elems] if any(s is None for s in static_first_dims): batch_size = math_ops.reduce_max( [array_ops.shape(elem)[0] for elem in flat_elems]) else: batch_size = max(static_first_dims) return pfor(loop_fn, batch_size, fallback_to_while_loop=fallback_to_while_loop)
def static_rnn(cell, inputs, initial_state=None, dtype=None, sequence_length=None, scope=None): """Creates a recurrent neural network specified by RNNCell `cell`. The simplest form of RNN network generated is: ```python state = cell.zero_state(...) outputs = [] for input_ in inputs: output, state = cell(input_, state) outputs.append(output) return (outputs, state) ``` However, a few other options are available: An initial state can be provided. If the sequence_length vector is provided, dynamic calculation is performed. This method of calculation does not compute the RNN steps past the maximum sequence length of the minibatch (thus saving computational time), and properly propagates the state at an example's sequence length to the final state output. The dynamic calculation performed is, at time `t` for batch row `b`, ```python (output, state)(b, t) = (t >= sequence_length(b)) ? (zeros(cell.output_size), states(b, sequence_length(b) - 1)) : cell(input(b, t), state(b, t - 1)) ``` Args: cell: An instance of RNNCell. inputs: A length T list of inputs, each a `Tensor` of shape `[batch_size, input_size]`, or a nested tuple of such elements. initial_state: (optional) An initial state for the RNN. If `cell.state_size` is an integer, this must be a `Tensor` of appropriate type and shape `[batch_size, cell.state_size]`. If `cell.state_size` is a tuple, this should be a tuple of tensors having shapes `[batch_size, s] for s in cell.state_size`. dtype: (optional) The data type for the initial state and expected output. Required if initial_state is not provided or RNN state has a heterogeneous dtype. sequence_length: Specifies the length of each sequence in inputs. An int32 or int64 vector (tensor) size `[batch_size]`, values in `[0, T)`. scope: VariableScope for the created subgraph; defaults to "rnn". Returns: A pair (outputs, state) where: - outputs is a length T list of outputs (one for each input), or a nested tuple of such elements. - state is the final state Raises: TypeError: If `cell` is not an instance of RNNCell. ValueError: If `inputs` is `None` or an empty list, or if the input depth (column size) cannot be inferred from inputs via shape inference. """ if not isinstance(cell, core_rnn_cell.RNNCell): raise TypeError("cell must be an instance of RNNCell") if not nest.is_sequence(inputs): raise TypeError("inputs must be a sequence") if not inputs: raise ValueError("inputs must not be empty") outputs = [] # Create a new scope in which the caching device is either # determined by the parent scope, or is set to place the cached # Variable using the same placement as for the rest of the RNN. with vs.variable_scope(scope or "rnn") as varscope: if varscope.caching_device is None: varscope.set_caching_device(lambda op: op.device) # Obtain the first sequence of the input first_input = inputs while nest.is_sequence(first_input): first_input = first_input[0] # Temporarily avoid EmbeddingWrapper and seq2seq badness # TODO(lukaszkaiser): remove EmbeddingWrapper if first_input.get_shape().ndims != 1: input_shape = first_input.get_shape().with_rank_at_least(2) fixed_batch_size = input_shape[0] flat_inputs = nest.flatten(inputs) for flat_input in flat_inputs: input_shape = flat_input.get_shape().with_rank_at_least(2) batch_size, input_size = input_shape[0], input_shape[1:] fixed_batch_size.merge_with(batch_size) for i, size in enumerate(input_size): if size.value is None: raise ValueError( "Input size (dimension %d of inputs) must be accessible via " "shape inference, but saw value None." % i) else: fixed_batch_size = first_input.get_shape().with_rank_at_least(1)[0] if fixed_batch_size.value: batch_size = fixed_batch_size.value else: batch_size = array_ops.shape(first_input)[0] if initial_state is not None: state = initial_state else: if not dtype: raise ValueError("If no initial_state is provided, " "dtype must be specified") state = cell.zero_state(batch_size, dtype) if sequence_length is not None: # Prepare variables sequence_length = ops.convert_to_tensor(sequence_length, name="sequence_length") if sequence_length.get_shape().ndims not in (None, 1): raise ValueError( "sequence_length must be a vector of length batch_size") def _create_zero_output(output_size): # convert int to TensorShape if necessary size = _state_size_with_prefix(output_size, prefix=[batch_size]) output = array_ops.zeros(array_ops.stack(size), _infer_state_dtype(dtype, state)) shape = _state_size_with_prefix( output_size, prefix=[fixed_batch_size.value]) output.set_shape(tensor_shape.TensorShape(shape)) return output output_size = cell.output_size flat_output_size = nest.flatten(output_size) flat_zero_output = tuple( _create_zero_output(size) for size in flat_output_size) zero_output = nest.pack_sequence_as(structure=output_size, flat_sequence=flat_zero_output) sequence_length = math_ops.to_int32(sequence_length) min_sequence_length = math_ops.reduce_min(sequence_length) max_sequence_length = math_ops.reduce_max(sequence_length) for time, input_ in enumerate(inputs): if time > 0: varscope.reuse_variables() # pylint: disable=cell-var-from-loop call_cell = lambda: cell(input_, state) # pylint: enable=cell-var-from-loop if sequence_length is not None: (output, state) = _rnn_step(time=time, sequence_length=sequence_length, min_sequence_length=min_sequence_length, max_sequence_length=max_sequence_length, zero_output=zero_output, state=state, call_cell=call_cell, state_size=cell.state_size) else: (output, state) = call_cell() outputs.append(output) return (outputs, state)
def normalize(x): """Project `x` into the range [0, 1]""" return (x - math_ops.reduce_min(x)) / (math_ops.reduce_max(x) - math_ops.reduce_min(x))
def all_gather(self, input_tensor, axis, communication_hint='AUTO', timeout=0): """All-gather a dense tensor. This method must be called inside a tf.function. Args: input_tensor: a dense tensor. It must have the same rank on all replicas, and dimensions other than `axis` need to be the same as well. axis: 0-D int32 Tensor. Dimension along which to gather. Must be in the range [0, rank(value)). communication_hint: string providing hint to runtime for choosing collective implementation. Available options are `AUTO`, `NCCL`, and `RING`. timeout: a float. The timeout in seconds. Returns: The gathered Tensor. Raises: RuntimeError: if called in eager mode. """ if context.executing_eagerly(): raise RuntimeError('all_gather in eager mode is not supported') with ops.device(self._device), \ ops.control_dependencies([array_ops.identity(input_tensor)]): # 1. Transpose # E.g. Given an input_tensor with shape [2,2,5,1] and axis to gather is 3, # we use perm_pre=[3 0 1 2] to reshape it to [1,2,2,5], which # brings the 3rd dim first; afterwards we use perm_after=[1,2,3,0] to # place it back. perm_pre = array_ops.concat( ([axis], math_ops.range(axis), math_ops.range(axis + 1, array_ops.rank(input_tensor))), axis=0) input_tensor_t = array_ops.transpose(input_tensor, perm=perm_pre) # 2. Pad gathered_shape = self._all_gather( array_ops.expand_dims_v2(array_ops.shape_v2(input_tensor_t), axis=0), communication_hint, timeout=timeout) first_dims = gathered_shape[:, 0] full_axis_dim = math_ops.reduce_max(first_dims) padded_input_tensor = _pad_util(input_tensor_t, full_axis_dim) # 3. Gather gather_padded_out_tensor = self._all_gather( padded_input_tensor, communication_hint, timeout=timeout) # 4. Unpad split_tensors = [] for i in range(self._group_size): start_pos = i * full_axis_dim split_tensors.append(gather_padded_out_tensor[start_pos:start_pos + first_dims[i]]) out_tensor_t = array_ops.concat(split_tensors, 0) # 5. Transpose back perm_after = array_ops.concat( (math_ops.range(1, axis + 1), [0], math_ops.range(axis + 1, array_ops.rank(input_tensor_t))), axis=0) return array_ops.transpose(out_tensor_t, perm=perm_after)
def indicators_to_sparse_ids(indicators, ignore_value=None, dtype=dtypes.int64): """Convert a dense indicator tensor to sparse IDs. This is commonly used for converting a dense classification label to sparse. In the following example, we have an input of shape (2, 2, num_classes), where num_classes=4. ```python indicators = [ [ [0, 0, 1, 0], [0, 0, 0, 0] ], [ [1, 0, 1, 1], [0, 0, 1, 0] ] ] sparse_ids = indicator_to_sparse_ids(indicators) ``` `sparse_ids` in "jagged" format: [ [ [2], [] ], [ [0, 2, 3], [2] ] ] `sparse_ids` in `SparseTensor` format: ```python { indices: [[0, 0, 1], [1, 0, 0], [1, 0, 1], [1, 0, 2], [1, 1, 0]], values: [2, 0, 2, 3, 2], dense_shape: [2, 2, 3] } ``` Args: indicators: Dense `Tensor` of shape `(d0, ..., dn, num_classes)`. `ignore_value` values are ignored. For other values (typically, ones), the index along the last dimension is returned. ignore_value: Entries in `indicators` equal to this value will be absent from the returned `SparseTensor`. If `None`, default value of `indicators` dtype will be used (e.g. '' for `str`, 0 for `int`). dtype: Type of result, must be integer type. Returns: `SparseTensor` of type `dtype` and shape `(d0, ..., dn, max_num_labels)`, where `max_num_labels` is the maximum number of non-zero values in any row (in the example above, row (1, 1) has 3 non-zero values, so the result shape is (2, 2, 3)). The values of this `SparseTensor` are in the range `[0, num_classes)` and correspond to the index of non-ignore values along the last dimension of `indicators`. Raises: ValueError: if `dtype` is not integer. """ if not dtype.is_integer: raise ValueError("Invalid dtype {} not integer.".format(dtype)) with ops.name_scope(None, "indicators_to_sparse_ids", (indicators, ignore_value)): # Convert indicators to binary ones and zeros. We use int64 since # SparseTensor requires int64 indices. indicators = ops.convert_to_tensor(indicators, name="indicators") missing_indicators = math_ops.equal(indicators, _ignore_value_tensor( indicators.dtype, ignore_value), name="missing") zeros_like_indicators = array_ops.zeros_like(indicators, dtype=dtypes.int64, name="zeros") binary_indicators = array_ops.where(missing_indicators, zeros_like_indicators, array_ops.ones_like( indicators, dtype=dtypes.int64, name="ones"), name="binary_indicators") # Use cumsum along the last dimension to generate per-row indexes. # Note that these are 1-based (since 0 indicates missing values), so they're # off-by-1 from the actual indices. We'll subtract 1 below. Since they're # off-by-one, the max value is the size of the last dimension (i.e., # last_index + 1). row_index_indicators = array_ops.where( missing_indicators, zeros_like_indicators, math_ops.cumsum(binary_indicators, axis=-1), "row_index_indicators") result_last_dim = array_ops.reshape( math_ops.reduce_max(row_index_indicators), shape=(1, ), name="result_last_dim") # Convert to a SparseTensor. The values of this SparseTensor are the last # indices of our result, and the last indices of this SparseTensor (i.e., # the class IDs indicated by `indicators`) are the values of our result, so # we use tensor slicing and concat to swap them. sparse_row_index_indicators = dense_to_sparse_tensor( row_index_indicators, ignore_value=0) return sparse_tensor.SparseTensor( indices=array_ops.concat( (sparse_row_index_indicators.indices[:, :-1], array_ops.reshape(sparse_row_index_indicators.values - 1, (-1, 1))), axis=1, name="indices"), values=math_ops.cast(sparse_row_index_indicators.indices[:, -1], dtype=dtype, name="values"), dense_shape=array_ops.concat( (sparse_row_index_indicators.dense_shape[0:-1], result_last_dim), axis=0, name="dense_shape"))
def build(self, input_shape): """Builds the layer. Creates the variables for the network modeling the densities, creates the auxiliary loss estimating the median and tail quantiles of the densities, and then uses that to create the probability mass functions and the update op that produces the discrete cumulative density functions used by the range coder. Args: input_shape: Shape of the input tensor, used to get the number of channels. Raises: ValueError: if `input_shape` doesn't specify the length of the channel dimension. """ input_shape = tensor_shape.TensorShape(input_shape) channel_axis = self._channel_axis(input_shape.ndims) channels = input_shape[channel_axis].value if channels is None: raise ValueError( "The channel dimension of the inputs must be defined.") self.input_spec = engine.InputSpec(ndim=input_shape.ndims, axes={channel_axis: channels}) filters = (1, ) + self.filters + (1, ) scale = self.init_scale**(1 / (len(self.filters) + 1)) # Create variables. self._matrices = [] self._biases = [] self._factors = [] for i in range(len(self.filters) + 1): init = np.log(np.expm1(1 / scale / filters[i + 1])) matrix = self.add_variable("matrix_{}".format(i), dtype=self.dtype, shape=(channels, filters[i + 1], filters[i]), initializer=init_ops.Constant(init)) matrix = nn.softplus(matrix) self._matrices.append(matrix) bias = self.add_variable("bias_{}".format(i), dtype=self.dtype, shape=(channels, filters[i + 1], 1), initializer=init_ops.RandomUniform( -.5, .5)) self._biases.append(bias) if i < len(self.filters): factor = self.add_variable("factor_{}".format(i), dtype=self.dtype, shape=(channels, filters[i + 1], 1), initializer=init_ops.Zeros()) factor = math_ops.tanh(factor) self._factors.append(factor) # To figure out what range of the densities to sample, we need to compute # the quantiles given by `tail_mass / 2` and `1 - tail_mass / 2`. Since we # can't take inverses of the cumulative directly, we make it an optimization # problem: # `quantiles = argmin(|logit(cumulative) - target|)` # where `target` is `logit(tail_mass / 2)` or `logit(1 - tail_mass / 2)`. # Taking the logit (inverse of sigmoid) of the cumulative makes the # representation of the right target more numerically stable. # Numerically stable way of computing logits of `tail_mass / 2` # and `1 - tail_mass / 2`. target = np.log(2 / self.tail_mass - 1) # Compute lower and upper tail quantile as well as median. target = constant_op.constant([-target, 0, target], dtype=self.dtype) def quantiles_initializer(shape, dtype=None, partition_info=None): del partition_info # unused assert tuple(shape[1:]) == (1, 3) init = constant_op.constant( [[[-self.init_scale, 0, self.init_scale]]], dtype=dtype) return array_ops.tile(init, (shape[0], 1, 1)) quantiles = self.add_variable("quantiles", shape=(channels, 1, 3), dtype=self.dtype, initializer=quantiles_initializer) logits = self._logits_cumulative(quantiles, stop_gradient=True) loss = math_ops.reduce_sum(abs(logits - target)) self.add_loss(loss, inputs=None) # Save medians for `call`, `compress`, and `decompress`. self._medians = quantiles[:, :, 1:2] if not self.optimize_integer_offset: self._medians = math_ops.round(self._medians) # Largest distance observed between lower tail quantile and median, # or between median and upper tail quantile. minima = math_ops.reduce_max(self._medians - quantiles[:, :, 0:1]) maxima = math_ops.reduce_max(quantiles[:, :, 2:3] - self._medians) minmax = math_ops.maximum(minima, maxima) minmax = math_ops.ceil(minmax) minmax = math_ops.maximum(minmax, 1) # Sample the density up to `minmax` around the median. samples = math_ops.range(-minmax, minmax + 1, dtype=self.dtype) samples += self._medians half = constant_op.constant(.5, dtype=self.dtype) # We strip the sigmoid from the end here, so we can use the special rule # below to only compute differences in the left tail of the sigmoid. # This increases numerical stability (see explanation in `call`). lower = self._logits_cumulative(samples - half, stop_gradient=True) upper = self._logits_cumulative(samples + half, stop_gradient=True) # Flip signs if we can move more towards the left tail of the sigmoid. sign = -math_ops.sign(math_ops.add_n([lower, upper])) pmf = abs( math_ops.sigmoid(sign * upper) - math_ops.sigmoid(sign * lower)) # Add tail masses to first and last bin of pmf, as we clip values for # compression, meaning that out-of-range values get mapped to these bins. pmf = array_ops.concat([ math_ops.add_n([pmf[:, 0, :1], math_ops.sigmoid(lower[:, 0, :1])]), pmf[:, 0, 1:-1], math_ops.add_n( [pmf[:, 0, -1:], math_ops.sigmoid(-upper[:, 0, -1:])]), ], axis=-1) self._pmf = pmf cdf = coder_ops.pmf_to_quantized_cdf( pmf, precision=self.range_coder_precision) def cdf_getter(*args, **kwargs): del args, kwargs # ignored return variable_scope.get_variable("quantized_cdf", dtype=dtypes.int32, initializer=cdf, trainable=False, validate_shape=False, collections=()) # Need to provide a fake shape here since add_variable insists on it. self._quantized_cdf = self.add_variable("quantized_cdf", shape=(channels, 1), dtype=dtypes.int32, getter=cdf_getter, trainable=False) update_op = state_ops.assign(self._quantized_cdf, cdf, validate_shape=False) self.add_update(update_op, inputs=None) super(EntropyBottleneck, self).build(input_shape)
def lifted_struct_loss(labels, embeddings, margin=1.0): """Computes the lifted structured loss. The loss encourages the positive distances (between a pair of embeddings with the same labels) to be smaller than any negative distances (between a pair of embeddings with different labels) in the mini-batch in a way that is differentiable with respect to the embedding vectors. See: https://arxiv.org/abs/1511.06452. Args: labels: 1-D tf.int32 `Tensor` with shape [batch_size] of multiclass integer labels. embeddings: 2-D float `Tensor` of embedding vectors. Embeddings should not be l2 normalized. margin: Float, margin term in the loss definition. Returns: lifted_loss: tf.float32 scalar. """ # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor. lshape = array_ops.shape(labels) assert lshape.shape == 1 labels = array_ops.reshape(labels, [lshape[0], 1]) # Build pairwise squared distance matrix. pairwise_distances = pairwise_distance(embeddings) # Build pairwise binary adjacency matrix. adjacency = math_ops.equal(labels, array_ops.transpose(labels)) # Invert so we can select negatives only. adjacency_not = math_ops.logical_not(adjacency) batch_size = array_ops.size(labels) diff = margin - pairwise_distances mask = math_ops.cast(adjacency_not, dtype=dtypes.float32) # Safe maximum: Temporarily shift negative distances # above zero before taking max. # this is to take the max only among negatives. row_minimums = math_ops.reduce_min(diff, 1, keep_dims=True) row_negative_maximums = math_ops.reduce_max( math_ops.multiply(diff - row_minimums, mask), 1, keep_dims=True) + row_minimums # Compute the loss. # Keep track of matrix of maximums where M_ij = max(m_i, m_j) # where m_i is the max of alpha - negative D_i's. # This matches the Caffe loss layer implementation at: # https://github.com/rksltnl/Caffe-Deep-Metric-Learning-CVPR16/blob/0efd7544a9846f58df923c8b992198ba5c355454/src/caffe/layers/lifted_struct_similarity_softmax_layer.cpp # pylint: disable=line-too-long max_elements = math_ops.maximum(row_negative_maximums, array_ops.transpose(row_negative_maximums)) diff_tiled = array_ops.tile(diff, [batch_size, 1]) mask_tiled = array_ops.tile(mask, [batch_size, 1]) max_elements_vect = array_ops.reshape(array_ops.transpose(max_elements), [-1, 1]) loss_exp_left = array_ops.reshape( math_ops.reduce_sum(math_ops.multiply( math_ops.exp(diff_tiled - max_elements_vect), mask_tiled), 1, keep_dims=True), [batch_size, batch_size]) loss_mat = max_elements + math_ops.log(loss_exp_left + array_ops.transpose(loss_exp_left)) # Add the positive distance. loss_mat += pairwise_distances mask_positives = math_ops.cast(adjacency, dtype=dtypes.float32) - array_ops.diag( array_ops.ones([batch_size])) # *0.5 for upper triangular, and another *0.5 for 1/2 factor for loss^2. num_positives = math_ops.reduce_sum(mask_positives) / 2.0 lifted_loss = math_ops.truediv(0.25 * math_ops.reduce_sum( math_ops.square( math_ops.maximum(math_ops.multiply(loss_mat, mask_positives), 0.0))), num_positives, name='liftedstruct_loss') return lifted_loss
def categorical_hinge(y_true, y_pred): pos = math_ops.reduce_sum(y_true * y_pred, axis=-1) neg = math_ops.reduce_max((1. - y_true) * y_pred, axis=-1) return math_ops.maximum(0., neg - pos + 1.)
def all_reduce_indexed_slices(self, input_slices, communication_hint='AUTO', timeout=0): """All-reduce an IndexedSlices. This method must be called inside a tf.function. Args: input_slices: an IndexedSlices. communication_hint: string providing hint to runtime for choosing collective implementation. timeout: a float. The timeout in seconds. Returns: The reduced IndexedSlices. Raises: RuntimeError: if called in eager mode. """ if context.executing_eagerly(): raise RuntimeError( 'all_reduce_indexed_slices in eager mode is not supported') # Current CollectiveAllGather implementations require input IndexedSlices to # have consistent length across the board, we handle the reduction of # IndexedSlices as follows: # 1. Gather the lengths of IndexedSlices from all participants. # 2. If they have consistent length, apply all_gather. # 3. Otherwise convert IndexedSlices to dense tensors and apply # all_reduce. with ops.device(self._device): def all_gather(): """Use all_gather to aggregate `IndexedSlices`.""" all_values = self._all_gather( input_slices.values, communication_hint, timeout=timeout) # Add control dependency to order the all-gather. control = [all_values] if communication_hint == 'NCCL' else [] with ops.control_dependencies(control): all_indices = self._all_gather( input_slices.indices, communication_hint, timeout=timeout) return ops.IndexedSlices( values=all_values, indices=all_indices, dense_shape=input_slices.dense_shape) def densify_and_all_reduce(): """Use all_reduce to aggregate `IndexedSlices`.""" densified = ops.convert_to_tensor(input_slices) reduced = self.all_reduce( densified, communication_hint=communication_hint, timeout=timeout) # We have to convert dense grad to IndexedSlice because all_reduce() # and all_gather() must have the same return type as required by # control_flow_ops.cond. return ops.IndexedSlices( values=reduced, indices=math_ops.range(array_ops.shape(reduced)[0]), dense_shape=input_slices.dense_shape) length = array_ops.shape(input_slices.indices) all_lengths = self._all_gather( length, communication_hint, timeout=timeout) return control_flow_ops.cond( math_ops.equal( math_ops.reduce_max(all_lengths), math_ops.reduce_min(all_lengths)), all_gather, densify_and_all_reduce)
def step_fn(data): assert_op = control_flow_ops.Assert( math_ops.less_equal(math_ops.reduce_max(data), 100.), [data]) with ops.control_dependencies([assert_op]): return math_ops.square(data)