def linear_decay_fn(global_step): if global_step is None: raise ValueError("global_step is required for linear_decay.") global_step = math_ops.minimum(global_step, decay_steps) remaining_steps = math_ops.to_int32(decay_steps) - math_ops.to_int32( global_step) decayed = math_ops.to_float(remaining_steps) / math_ops.to_float( decay_steps) return math_ops.maximum(0.0, decayed)
def _compute_accuracy(logits, targets, weights=None): if self._n_classes > 2: _, predictions = nn.top_k(logits, 1) else: predictions = array_ops.reshape(logits, [-1]) predictions = math_ops.greater(predictions, array_ops.zeros_like(predictions)) targets = array_ops.reshape(targets, [-1]) return metrics_lib.streaming_accuracy( math_ops.to_int32(predictions), math_ops.to_int32(targets), weights)
def update_medoid_per_cluster(pairwise_distances, pairwise_distances_subset, labels, chosen_ids, cluster_member_ids, cluster_idx, margin_multiplier, margin_type): """Updates the cluster medoid per cluster. Args: pairwise_distances: 2-D Tensor of pairwise distances. pairwise_distances_subset: 2-D Tensor of pairwise distances for one cluster. labels: 1-D Tensor of ground truth cluster assignment. chosen_ids: 1-D Tensor of cluster centroid indices. cluster_member_ids: 1-D Tensor of cluster member indices for one cluster. cluster_idx: Index of this one cluster. margin_multiplier: multiplication constant. margin_type: Type of structured margin to use. Default is nmi. Returns: chosen_ids: Updated 1-D Tensor of cluster centroid indices. """ def func_cond(iteration, scores_margin): del scores_margin # Unused variable scores_margin. return iteration < num_candidates def func_body(iteration, scores_margin): # swap the current medoid with the candidate cluster member candidate_medoid = math_ops.to_int32(cluster_member_ids[iteration]) tmp_chosen_ids = update_1d_tensor(chosen_ids, cluster_idx, candidate_medoid) predictions = get_cluster_assignment(pairwise_distances, tmp_chosen_ids) metric_score = compute_clustering_score(labels, predictions, margin_type) pad_before = array_ops.zeros([iteration]) pad_after = array_ops.zeros([num_candidates - 1 - iteration]) return iteration + 1, scores_margin + array_ops.concat( [pad_before, [1.0 - metric_score], pad_after], 0) # pairwise_distances_subset is of size [p, 1, 1, p], # the intermediate dummy dimensions at # [1, 2] makes this code work in the edge case where p=1. # this happens if the cluster size is one. scores_fac = -1.0 * math_ops.reduce_sum( array_ops.squeeze(pairwise_distances_subset, [1, 2]), axis=0) iteration = array_ops.constant(0) num_candidates = array_ops.size(cluster_member_ids) scores_margin = array_ops.zeros([num_candidates]) _, scores_margin = control_flow_ops.while_loop(func_cond, func_body, [iteration, scores_margin]) candidate_scores = math_ops.add(scores_fac, margin_multiplier * scores_margin) argmax_index = math_ops.to_int32( math_ops.argmax(candidate_scores, dimension=0)) best_medoid = math_ops.to_int32(cluster_member_ids[argmax_index]) chosen_ids = update_1d_tensor(chosen_ids, cluster_idx, best_medoid) return chosen_ids
def testListOfScalarTensors(self): a = math_ops.to_int32(5) b = math_ops.to_int32(6) value = np.random.rand(11, 11) with self.test_session(use_gpu=False) as sess: result = sess.run(array_ops.split(value, [a, b])) self.assertAllEqual(result[0], value[0:5, :]) self.assertAllEqual(result[1], value[5:, :])
def _class_predictions_streaming_mean( predictions, labels, weights=None, class_id=None): del labels return metrics_lib.streaming_mean( array_ops.where( math_ops.equal( math_ops.to_int32(class_id), math_ops.to_int32(predictions)), array_ops.ones_like(predictions), array_ops.zeros_like(predictions)), weights=weights)
def testListOfScalarTensors(self): a = math_ops.to_int32(5) b = math_ops.to_int32(6) value = np.random.rand(11, 11) with test_util.device(use_gpu=True): result = self.evaluate(array_ops.split(value, [a, b])) self.assertAllEqual(result[0], value[0:5, :]) self.assertAllEqual(result[1], value[5:, :])
def _randomize(coeffs, radixes, seed=None): """Applies the Owen randomization to the coefficients.""" given_dtype = coeffs.dtype coeffs = math_ops.to_int32(coeffs) num_coeffs = array_ops.shape(coeffs)[-1] radixes = array_ops.reshape(math_ops.to_int32(radixes), [-1]) perms = _get_permutations(num_coeffs, radixes, seed=seed) perms = array_ops.reshape(perms, [-1]) radix_sum = math_ops.reduce_sum(radixes) radix_offsets = array_ops.reshape(math_ops.cumsum(radixes, exclusive=True), [-1, 1]) offsets = radix_offsets + math_ops.range(num_coeffs) * radix_sum permuted_coeffs = array_ops.gather(perms, coeffs + offsets) return math_ops.cast(permuted_coeffs, dtype=given_dtype)
def _loss(probs, targets): one_hot_labels = array_ops.one_hot(math_ops.to_int32(targets), num_classes, on_value=1., off_value=0., dtype=dtypes.float32) return loss_fn(probs, one_hot_labels)
def _compute_power_svd(self, var, mat_g, mat_g_size, alpha, mat_h_slot_name): """Computes mat_h = mat_g^alpha using svd. mat_g is a symmetric PSD matrix. Args: var: the variable we are updating. mat_g: the symmetric PSD matrix whose power it to be computed mat_g_size: size of mat_g alpha: a real number mat_h_slot_name: name of slot to store the power, if needed. Returns: mat_h = mat_g^alpha Stores mat_h in the appropriate slot, if it exists. Note that mat_g is PSD. So we could use linalg_ops.self_adjoint_eig. """ if mat_g_size == 1: mat_h = math_ops.pow(mat_g + self._epsilon, alpha) else: damping = self._epsilon * linalg_ops.eye( math_ops.to_int32(mat_g_size)) diag_d, mat_u, mat_v = linalg_ops.svd(mat_g + damping, full_matrices=True) mat_h = math_ops.matmul( mat_v * math_ops.pow(math_ops.maximum(diag_d, self._epsilon), alpha), array_ops.transpose(mat_u)) if mat_h_slot_name is not None: return state_ops.assign(self.get_slot(var, mat_h_slot_name), mat_h) return mat_h
def has_zero(): # Insert a zero in the consecutive ids where zero appears in unique_ids. # id_is_zero has length 1. zero_id_ind = math_ops.to_int32(id_is_zero[0]) ids_before = nonzero_consecutive_ids[:zero_id_ind] ids_after = nonzero_consecutive_ids[zero_id_ind:] return array_ops.concat([ids_before, [0], ids_after], axis=0)
def finalize(self, outputs, final_state, sequence_lengths): """Finalize and return the predicted_ids. Args: outputs: An instance of BeamSearchDecoderOutput. final_state: An instance of BeamSearchDecoderState. Passed through to the output. sequence_lengths: An `int64` tensor shaped `[batch_size, beam_width]`. The sequence lengths determined for each beam during decode. **NOTE** These are ignored; the updated sequence lengths are stored in `final_state.lengths`. Returns: outputs: An instance of `FinalBeamSearchDecoderOutput` where the predicted_ids are the result of calling _gather_tree. final_state: The same input instance of `BeamSearchDecoderState`. """ del sequence_lengths # Get max_sequence_length across all beams for each batch. max_sequence_lengths = math_ops.to_int32( math_ops.reduce_max(final_state.lengths, axis=1)) predicted_ids = beam_search_ops.gather_tree( outputs.predicted_ids, outputs.parent_ids, max_sequence_lengths=max_sequence_lengths, end_token=self._end_token) outputs = FinalBeamSearchDecoderOutput( beam_search_decoder_output=outputs, predicted_ids=predicted_ids) return outputs, final_state
def _compute_zeroone_score(labels, predictions): zeroone_score = math_ops.to_float( math_ops.equal( math_ops.reduce_sum( math_ops.to_int32(math_ops.equal(labels, predictions))), array_ops.shape(labels)[0])) return zeroone_score
def testGradientWithIntegerPath(self): x = constant_op.constant([3.9, 4.1]) k = math_ops.to_float(math_ops.to_int32(x)) y = x * k dy_dx, = gradients_impl.gradients(y, x) with self.test_session() as sess: self.assertAllClose([3., 4.], sess.run(dy_dx))
def average_impurity(self): """Constructs a TF graph for evaluating the average leaf impurity of a tree. If in regression mode, this is the leaf variance. If in classification mode, this is the gini impurity. Returns: The last op in the graph. """ children = array_ops.squeeze(array_ops.slice(self.variables.tree, [0, 0], [-1, 1]), squeeze_dims=[1]) is_leaf = math_ops.equal(constants.LEAF_NODE, children) leaves = math_ops.to_int32( array_ops.squeeze(array_ops.where(is_leaf), squeeze_dims=[1])) counts = array_ops.gather(self.variables.node_sums, leaves) gini = self._weighted_gini(counts) # Guard against step 1, when there often are no leaves yet. def impurity(): return gini # Since average impurity can be used for loss, when there's no data just # return a big number so that loss always decreases. def big(): return array_ops.ones_like(gini, dtype=dtypes.float32) * 10000000. return control_flow_ops.cond( math_ops.greater(array_ops.shape(leaves)[0], 0), impurity, big)
def gather_tree_from_array(t, parent_ids, sequence_length): """Calculates the full beams for `TensorArray`s. Args: t: A stacked `TensorArray` of size `max_time` that contains `Tensor`s of shape `[batch_size, beam_width, s]` or `[batch_size * beam_width, s]` where `s` is the depth shape. parent_ids: The parent ids of shape `[max_time, batch_size, beam_width]`. sequence_length: The sequence length of shape `[batch_size, beam_width]`. Returns: A `Tensor` which is a stacked `TensorArray` of the same size and type as `t` and where beams are sorted in each `Tensor` according to `parent_ids`. """ max_time = parent_ids.shape.dims[0].value or array_ops.shape(parent_ids)[0] batch_size = parent_ids.shape.dims[1].value or array_ops.shape( parent_ids)[1] beam_width = parent_ids.shape.dims[2].value or array_ops.shape( parent_ids)[2] # Generate beam ids that will be reordered by gather_tree. beam_ids = array_ops.expand_dims( array_ops.expand_dims(math_ops.range(beam_width), 0), 0) beam_ids = array_ops.tile(beam_ids, [max_time, batch_size, 1]) max_sequence_lengths = math_ops.to_int32( math_ops.reduce_max(sequence_length, axis=1)) sorted_beam_ids = beam_search_ops.gather_tree( step_ids=beam_ids, parent_ids=parent_ids, max_sequence_lengths=max_sequence_lengths, end_token=beam_width + 1) # For out of range steps, simply copy the same beam. in_bound_steps = array_ops.transpose(array_ops.sequence_mask( sequence_length, maxlen=max_time), perm=[2, 0, 1]) sorted_beam_ids = array_ops.where(in_bound_steps, x=sorted_beam_ids, y=beam_ids) # Generate indices for gather_nd. time_ind = array_ops.tile( array_ops.reshape(math_ops.range(max_time), [-1, 1, 1]), [1, batch_size, beam_width]) batch_ind = array_ops.tile( array_ops.reshape(math_ops.range(batch_size), [-1, 1, 1]), [1, max_time, beam_width]) batch_ind = array_ops.transpose(batch_ind, perm=[1, 0, 2]) indices = array_ops.stack([time_ind, batch_ind, sorted_beam_ids], -1) # Gather from a tensor with collapsed additional dimensions. gather_from = t final_shape = array_ops.shape(gather_from) gather_from = array_ops.reshape(gather_from, [max_time, batch_size, beam_width, -1]) ordered = array_ops.gather_nd(gather_from, indices) ordered = array_ops.reshape(ordered, final_shape) return ordered
def one_hot_mask(labels, num_classes, scope=None): """Compute 1-hot encodings for masks. Given a label image, this computes the one hot encoding at each pixel. Args: labels: (batch_size, width, height, 1) tensor containing labels. num_classes: number of classes scope: optional scope name Returns: Tensor of shape (batch_size, width, height, num_classes) with a 1-hot encoding. """ with ops.name_scope(scope, "OneHotMask", [labels]): height, width, depth = _shape(labels) assert depth == 1 sparse_labels = math_ops.to_int32(array_ops.reshape(labels, [-1, 1])) sparse_size, _ = _shape(sparse_labels) indices = array_ops.reshape(math_ops.range(0, sparse_size, 1), [-1, 1]) concated = array_ops.concat([indices, sparse_labels], 1) dense_result = sparse_ops.sparse_to_dense(concated, [sparse_size, num_classes], 1.0, 0.0) result = array_ops.reshape(dense_result, [height, width, num_classes]) return result
def confusion_matrix(predictions, labels, num_classes=None, name=None): """Computes the confusion matrix from predictions and labels Calculate the Confusion Matrix for a pair of prediction and label 1-D int arrays. Considering a prediction array such as: `[1, 2, 3]` And a label array such as: `[2, 2, 3]` The confusion matrix returned would be the following one: [[0, 0, 0] [0, 1, 0] [0, 1, 0] [0, 0, 1]] Where the matrix rows represent the prediction labels and the columns represents the real labels. The confusion matrix is always a 2-D array of shape [n, n], where n is the number of valid labels for a given classification task. Both prediction and labels must be 1-D arrays of the same shape in order for this function to work. Args: predictions: A 1-D array represeting the predictions for a given classification. labels: A 1-D represeting the real labels for the classification task. num_classes: The possible number of labels the classification task can have. If this value is not provided, it will be calculated using both predictions and labels array. name: Scope name. Returns: A l X l matrix represeting the confusion matrix, where l in the number of possible labels in the classification task. Raises: ValueError: If both predictions and labels are not 1-D vectors and do not have the same size. """ with ops.op_scope([predictions, labels, num_classes], name, 'confusion_matrix') as name: predictions = ops.convert_to_tensor(predictions, name='predictions', dtype=dtypes.int64) labels = ops.convert_to_tensor(labels, name='labels', dtype=dtypes.int64) if num_classes is None: num_classes = math_ops.maximum(math_ops.reduce_max(predictions), math_ops.reduce_max(labels)) + 1 shape = array_ops.pack([num_classes, num_classes]) indices = array_ops.transpose(array_ops.pack([predictions, labels])) values = array_ops.ones_like(predictions, dtype=dtypes.int32) cm_sparse = ops.SparseTensor(indices=indices, values=values, shape=shape) zero_matrix = array_ops.zeros(math_ops.to_int32(shape), dtypes.int32) return sparse_ops.sparse_add(zero_matrix, cm_sparse)
def one_hot_mask(labels, num_classes, scope=None): """Compute 1-hot encodings for masks. Given a label image, this computes the one hot encoding at each pixel. Args: labels: (batch_size, width, height, 1) tensor containing labels. num_classes: number of classes scope: optional scope name Returns: Tensor of shape (batch_size, width, height, num_classes) with a 1-hot encoding. """ with ops.name_scope(scope, "OneHotMask", [labels]): height, width, depth = _shape(labels) assert depth == 1 sparse_labels = math_ops.to_int32(array_ops.reshape(labels, [-1, 1])) sparse_size, _ = _shape(sparse_labels) indices = array_ops.reshape(math_ops.range(0, sparse_size, 1), [-1, 1]) concated = array_ops.concat_v2([indices, sparse_labels], 1) dense_result = sparse_ops.sparse_to_dense(concated, [sparse_size, num_classes], 1.0, 0.0) result = array_ops.reshape(dense_result, [height, width, num_classes]) return result
def body(i, newe, score): a = memories_iter.read(i) olde = e_iter.read(i) b = tf.tile(tf.expand_dims(olde, 0), [self.max_sentence_len, 1]) c = aspect_inputs_iter.read(i) l = math_ops.to_int32(sentence_lens_iter.read(i)) g = tf.matmul( weights['attention'][h], tf.transpose(tf.concat([a, b, c], 1), perm=[1, 0])) + biases['attention'][h] score_temp = tf.concat([ tf.nn.softmax(tf.slice(g, [0, 0], [1, l])), tf.zeros([1, self.max_sentence_len - l]) ], 1) score = score.write(i, score_temp) i_AL = tf.reshape(tf.matmul(score_temp, a), [-1, 1]) olde = tf.reshape(olde, [-1, 1]) r = tf.nn.sigmoid( tf.matmul(weights['gru_r'], i_AL) + tf.matmul(updates['gru_r'], olde)) z = tf.nn.sigmoid( tf.matmul(weights['gru_z'], i_AL) + tf.matmul(updates['gru_z'], olde)) e0 = tf.nn.tanh( tf.matmul(weights['gru_x'], i_AL) + tf.matmul(weights['gru_g'], tf.multiply(r, olde))) newe_temp = tf.multiply(1 - z, olde) + tf.multiply(z, e0) newe = newe.write(i, newe_temp) return (i + 1, newe, score)
def _inplace_helper(x, i, v, op): """Applies an inplace op on (x, i, v). op is one of gen_array_ops.alias_inplace_update, gen_array_ops.alias_inplace_add, or gen_array_ops.alias_inplace_sub. If i is None, x and v must be the same shape. Computes x op v; If i is a scalar, x has a rank 1 higher than v's. Computes x[i, :] op v; Otherwise, x and v must have the same rank. Computes x[i, :] op v; Args: x: A Tensor. i: None, a scalar or a vector. v: A Tensor. op: alias_inplace_update, alias_inplace_add, or alias_inplace_sub. Returns: Returns x. """ x = ops.convert_to_tensor(x) v = ops.convert_to_tensor(v, x.dtype) if i is None: # Full tensor. return array_ops.reshape( op(array_ops.reshape(x, [1, -1]), [0], array_ops.reshape(v, [1, -1])), array_ops.shape(x)) i = math_ops.to_int32(i) if i.get_shape().ndims == 0: # Single 0-dim update. return op(x, array_ops.reshape(i, [1]), array_ops.expand_dims(v, 0)) return op(x, i, v)
def get_best(self, n): """Return the indices and values of the n highest scores in the TopN.""" def refresh_shortlist(): """Update the shortlist with the highest scores in id_to_score.""" new_scores, new_ids = nn_ops.top_k(self.id_to_score, self.shortlist_size) smallest_new_score = math_ops.reduce_min(new_scores) new_length = math_ops.reduce_sum( math_ops.to_int32(math_ops.greater(new_scores, dtypes.float32.min))) u1 = self.sl_ids.assign( math_ops.to_int64(array_ops.concat([[new_length], new_ids], 0))) u2 = self.sl_scores.assign( array_ops.concat([[smallest_new_score], new_scores], 0)) self.last_ops = [u1, u2] return control_flow_ops.group(u1, u2) # We only need to refresh the shortlist if n is greater than the # current shortlist size (which is stored in sl_ids[0]). with ops.control_dependencies(self.last_ops): cond_op = control_flow_ops.cond(n > self.sl_ids[0], refresh_shortlist, control_flow_ops.no_op) with ops.control_dependencies([cond_op]): topk_values, topk_indices = nn_ops.top_k( self.sl_scores, math_ops.minimum(n, math_ops.to_int32(self.sl_ids[0]))) # topk_indices are the indices into the shortlist, we want to return # the indices into id_to_score gathered_indices = array_ops.gather(self.sl_ids, topk_indices) return gathered_indices, topk_values
def defun_fn(x): @function.Defun(dtypes.int32) def defun_fn_deep(x): return constant_op.constant(1000) + math_ops.to_int32(x) return constant_op.constant(11000) + defun_fn_deep( math_ops.to_int32(x))
def _compute_power_svd(self, var, mat_g, mat_g_size, alpha, mat_h_slot_name): """Computes mat_h = mat_g^alpha using svd. mat_g is a symmetric PSD matrix. Args: var: the variable we are updating. mat_g: the symmetric PSD matrix whose power it to be computed mat_g_size: size of mat_g alpha: a real number mat_h_slot_name: name of slot to store the power, if needed. Returns: mat_h = mat_g^alpha Stores mat_h in the appropriate slot, if it exists. Note that mat_g is PSD. So we could use linalg_ops.self_adjoint_eig. """ if mat_g_size == 1: mat_h = math_ops.pow(mat_g + self._epsilon, alpha) else: damping = self._epsilon * linalg_ops.eye(math_ops.to_int32(mat_g_size)) diag_d, mat_u, mat_v = linalg_ops.svd(mat_g + damping, full_matrices=True) mat_h = math_ops.matmul( mat_v * math_ops.pow(math_ops.maximum(diag_d, self._epsilon), alpha), array_ops.transpose(mat_u)) if mat_h_slot_name is not None: return state_ops.assign(self.get_slot(var, mat_h_slot_name), mat_h) return mat_h
def Forward(*args): """Forward pass of the recurrent net.""" theta, state0, inputs, max_input_length, extras = _Pack(args, forward_sig) slen_dim = _SeqLenDim(inputs) # Creates accumulators for state0 and extras. acc_state = _EmptyAcc(slen_dim, state0) acc_extras = _EmptyAcc(slen_dim, extras) t = slen_dim - max_input_length if self._aligned_end else 0 dev_t = math_ops.to_int32(t) if use_tpu else math_ops.to_int64(t) run = functional_ops.For( start=t, limit=slen_dim if self._aligned_end else max_input_length, delta=1, inputs=[dev_t] + _Flatten( [theta, state0, inputs, acc_state, acc_extras]), body=ForwardLoopBody, rewrite_with_while=compiled) _, state1, _, acc_state, acc_extras = _Pack( run[1:], [self._theta, self._state, self._inputs, self._state, self._extras]) return _Flatten([acc_state, state1, acc_extras])
def testGradientWithIntegerPath(self): x = constant_op.constant([3.9, 4.1]) k = math_ops.to_float(math_ops.to_int32(x)) y = x * k dy_dx, = gradients_impl.gradients(y, x) with self.cached_session() as sess: self.assertAllClose([3., 4.], sess.run(dy_dx))
def Backward(*args): """Backward pass for the recurrent net.""" # theta, state0, inputs are Forward's inputs. # acc_state is the accumulated 1st output of Forward. # acc_extras is the accumulated 2nd output of Forward. # d_acc_state is the gradient for acc_state. # d_state1 is the gradient for the final state computed by Forward. (theta, state0, inputs, max_input_length, acc_state, acc_extras, d_acc_state, d_state1) = _Pack(args, backward_sig) # Accumulators for gradients. d_theta = _EmptyLike(theta) d_inputs = _EmptyLike(inputs) # Loop backwards. Note the loop's limit is open-ended, so goes through # t=0. t = max_input_length - 1 dev_t = math_ops.to_int32(t) if use_tpu else math_ops.to_int64(t) run = functional_ops.For( start=t, limit=-1, delta=-1, inputs=[dev_t] + _Flatten([ theta, state0, inputs, acc_state, acc_extras, d_theta, d_state1, d_inputs, d_acc_state ]), body=BackwardLoopBody, rewrite_with_while=compiled) (theta, state0, inputs, acc_state, acc_extras, d_theta, d_state0, d_inputs, d_acc_state) = _Pack(run[1:], bakloop_sig) d_max_input_length = array_ops.constant(0, dtype=max_input_length.dtype) return _Flatten( [d_theta, d_state0, d_inputs, d_max_input_length, acc_extras])
def average_impurity(self): """Constructs a TF graph for evaluating the average leaf impurity of a tree. If in regression mode, this is the leaf variance. If in classification mode, this is the gini impurity. Returns: The last op in the graph. """ children = array_ops.squeeze(array_ops.slice( self.variables.tree, [0, 0], [-1, 1]), squeeze_dims=[1]) is_leaf = math_ops.equal(constants.LEAF_NODE, children) leaves = math_ops.to_int32(array_ops.squeeze(array_ops.where(is_leaf), squeeze_dims=[1])) counts = array_ops.gather(self.variables.node_sums, leaves) gini = self._weighted_gini(counts) # Guard against step 1, when there often are no leaves yet. def impurity(): return gini # Since average impurity can be used for loss, when there's no data just # return a big number so that loss always decreases. def big(): return array_ops.ones_like(gini, dtype=dtypes.float32) * 10000000. return control_flow_ops.cond(math_ops.greater( array_ops.shape(leaves)[0], 0), impurity, big)
def _Update(struct_acc, struct_x, t): """Updates t-th row in accumulators. Args: struct_acc: The accumulators. A structure of tensors. struct_x: The new values. A structure of tensors congruent to `struct_acc`. t: A scalar integer. Performance is better if `t` is on the device memory. Returns: A structure of tensors. Say, ret is a returned dictionary. Then, for each key, we have: ret[key] = struct_acc[key]; ret[key][t, :] = struct_x[key] """ to_skip_update = set() acc_lst = nest.flatten(struct_acc) x_lst = nest.flatten(struct_x) t = math_ops.to_int32([t]) # tf.to_int32 casts on-device tensors. lst = [] for acc, x in zip(acc_lst, x_lst): if acc in to_skip_update: # Until b/62105730 is fixed, we need to avoid inplace update for tensors # of rank 1. could reshape to handle it, but we don't really need the # values applied to these, so just skip their modification. lst += [acc] else: lst += [alias_inplace_update(acc, t, array_ops.expand_dims(x, 0))] return nest.pack_sequence_as(struct_acc, lst)
def defun_fn(x): @function.Defun(dtypes.int32) def defun_fn_deep(x): return constant_op.constant(1000) + math_ops.to_int32(x) return constant_op.constant(11000) + defun_fn_deep(math_ops.to_int32(x))
def body(i, prev_c, prev_h, actions, log_probs): # pylint: disable=g-long-lambda signal = control_flow_ops.cond( math_ops.equal(i, 0), lambda: array_ops.tile( device_go_embedding, [self.hparams.num_children, 1]), lambda: embedding_ops.embedding_lookup(device_embeddings, actions.read(i - 1))) if self.hparams.keep_prob is not None: signal = nn_ops.dropout(signal, self.hparams.keep_prob) next_c, next_h = lstm(signal, prev_c, prev_h, w_lstm, forget_bias) query = math_ops.matmul(next_h, attn_w_2) query = array_ops.reshape( query, [self.hparams.num_children, 1, self.hparams.hidden_size]) query = math_ops.tanh(query + attn_mem) query = array_ops.reshape(query, [ self.hparams.num_children * self.num_groups, self.hparams.hidden_size ]) query = math_ops.matmul(query, attn_v) query = array_ops.reshape( query, [self.hparams.num_children, self.num_groups]) query = nn_ops.softmax(query) query = array_ops.reshape( query, [self.hparams.num_children, self.num_groups, 1]) query = math_ops.reduce_sum(attn_mem * query, axis=1) query = array_ops.concat([next_h, query], axis=1) logits = math_ops.matmul(query, device_softmax) logits /= self.hparams.temperature if self.hparams.tanh_constant > 0: logits = math_ops.tanh(logits) * self.hparams.tanh_constant if self.hparams.logits_std_noise > 0: num_in_logits = math_ops.cast(array_ops.size(logits), dtype=dtypes.float32) avg_norm = math_ops.divide(linalg_ops.norm(logits), math_ops.sqrt(num_in_logits)) logits_noise = random_ops.random_normal( array_ops.shape(logits), stddev=self.hparams.logits_std_noise * avg_norm) logits = control_flow_ops.cond( self.global_step > self.hparams.stop_noise_step, lambda: logits, lambda: logits + logits_noise) if mode == "sample": next_y = random_ops.multinomial(logits, 1, seed=self.hparams.seed) elif mode == "greedy": next_y = math_ops.argmax(logits, 1) elif mode == "target": next_y = array_ops.slice(y, [0, i], [-1, 1]) else: raise NotImplementedError next_y = math_ops.to_int32(next_y) next_y = array_ops.reshape(next_y, [self.hparams.num_children]) actions = actions.write(i, next_y) log_probs += nn_ops.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=next_y) return i + 1, next_c, next_h, actions, log_probs
def confusion_matrix(predictions, labels, num_classes=None, dtype=dtypes.int32, name=None): """Computes the confusion matrix from predictions and labels. Calculate the Confusion Matrix for a pair of prediction and label 1-D int arrays. Considering a prediction array such as: `[1, 2, 3]` And a label array such as: `[2, 2, 3]` The confusion matrix returned would be the following one: [[0, 0, 0] [0, 1, 0] [0, 1, 0] [0, 0, 1]] Where the matrix rows represent the prediction labels and the columns represents the real labels. The confusion matrix is always a 2-D array of shape [n, n], where n is the number of valid labels for a given classification task. Both prediction and labels must be 1-D arrays of the same shape in order for this function to work. Args: predictions: A 1-D array represeting the predictions for a given classification. labels: A 1-D represeting the real labels for the classification task. num_classes: The possible number of labels the classification task can have. If this value is not provided, it will be calculated using both predictions and labels array. dtype: Data type of the confusion matrix. name: Scope name. Returns: A k X k matrix represeting the confusion matrix, where k is the number of possible labels in the classification task. Raises: ValueError: If both predictions and labels are not 1-D vectors and do not have the same size. """ with ops.name_scope(name, 'confusion_matrix', [predictions, labels, num_classes]) as name: predictions, labels = metric_ops_util.remove_squeezable_dimensions( ops.convert_to_tensor( predictions, name='predictions', dtype=dtypes.int64), ops.convert_to_tensor(labels, name='labels', dtype=dtypes.int64)) if num_classes is None: num_classes = math_ops.maximum(math_ops.reduce_max(predictions), math_ops.reduce_max(labels)) + 1 shape = array_ops.pack([num_classes, num_classes]) indices = array_ops.transpose(array_ops.pack([predictions, labels])) values = array_ops.ones_like(predictions, dtype) cm_sparse = ops.SparseTensor( indices=indices, values=values, shape=shape) zero_matrix = array_ops.zeros(math_ops.to_int32(shape), dtype) return sparse_ops.sparse_add(zero_matrix, cm_sparse)
def gather_tree_from_array(t, parent_ids, sequence_length): """Calculates the full beams for `TensorArray`s. Args: t: A stacked `TensorArray` of size `max_time` that contains `Tensor`s of shape `[batch_size, beam_width, s]` or `[batch_size * beam_width, s]` where `s` is the depth shape. parent_ids: The parent ids of shape `[max_time, batch_size, beam_width]`. sequence_length: The sequence length of shape `[batch_size, beam_width]`. Returns: A `Tensor` which is a stacked `TensorArray` of the same size and type as `t` and where beams are sorted in each `Tensor` according to `parent_ids`. """ max_time = parent_ids.shape[0].value or array_ops.shape(parent_ids)[0] batch_size = parent_ids.shape[1].value or array_ops.shape(parent_ids)[1] beam_width = parent_ids.shape[2].value or array_ops.shape(parent_ids)[2] # Generate beam ids that will be reordered by gather_tree. beam_ids = array_ops.expand_dims( array_ops.expand_dims(math_ops.range(beam_width), 0), 0) beam_ids = array_ops.tile(beam_ids, [max_time, batch_size, 1]) mask = array_ops.sequence_mask( sequence_length, maxlen=max_time, dtype=dtypes.int32) mask = array_ops.transpose(mask, perm=[2, 0, 1]) # Use beam_width + 1 to mark the end of beam. masked_beam_ids = (beam_ids * mask) + (1 - mask) * (beam_width + 1) max_sequence_lengths = math_ops.to_int32( math_ops.reduce_max(sequence_length, axis=1)) sorted_beam_ids = beam_search_ops.gather_tree( step_ids=masked_beam_ids, parent_ids=parent_ids, max_sequence_lengths=max_sequence_lengths, end_token=beam_width + 1) # For out of range steps, simply copy the same beam. sorted_beam_ids = array_ops.where( math_ops.cast(mask, dtypes.bool), x=sorted_beam_ids, y=beam_ids) # Generate indices for gather_nd. time_ind = array_ops.tile(array_ops.reshape( math_ops.range(max_time), [-1, 1, 1]), [1, batch_size, beam_width]) batch_ind = array_ops.tile(array_ops.reshape( math_ops.range(batch_size), [-1, 1, 1]), [1, max_time, beam_width]) batch_ind = array_ops.transpose(batch_ind, perm=[1, 0, 2]) indices = array_ops.stack([time_ind, batch_ind, sorted_beam_ids], -1) # Gather from a tensor with collapsed additional dimensions. gather_from = t final_shape = array_ops.shape(gather_from) gather_from = array_ops.reshape( gather_from, [max_time, batch_size, beam_width, -1]) ordered = array_ops.gather_nd(gather_from, indices) ordered = array_ops.reshape(ordered, final_shape) return ordered
def __call__(self, inputs, state, scope=None): batch_size = array_ops.shape(inputs)[0] ### Unpack state # last_betas: bs x V # last_index : bs last_beta, last_index = array_ops.split( state, [self._transitions.num_tags, 1], axis=1) last_index = math_ops.cast(last_index, dtypes.int32) ### Unpack inputs # unary: bs x V # last_beta bs x V shape = [ self._transitions.num_tags, self._transitions.num_tags, self._transitions._total_nr_parameters ] unary, beta, pairwise_flat = array_ops.split(inputs, shape, axis=1) ### Construct logits of this timestep according to (6) in the paper batch_indices = array_ops.reshape(math_ops.range(batch_size), [-1, 1]) pairwise = self._transitions.get_pairwise_given_start( pairwise_flat, last_index) # bs x V last_beta = array_ops.gather_nd( last_beta, array_ops.concat([batch_indices, last_index], axis=1)) last_beta = array_ops.reshape(last_beta, [-1, 1]) logits = pairwise + unary + beta - last_beta # NOTE this is only valid for the index we are gathering from # bs x V log_probs = nn_ops.log_softmax(logits) # bs x 1 entropy = -math_ops.reduce_sum( log_probs * math_ops.exp(log_probs), axis=1, keepdims=True) ### Sample the next symbol # bs x 1 new_indices = random_ops.multinomial(logits, 1) new_indices = math_ops.to_int32(new_indices) ### Gather the logits of the new symbol to return the sequence probability gather_indices = array_ops.concat( [batch_indices, array_ops.reshape(new_indices, [-1, 1])], axis=1) # bs x 1 output_logits = array_ops.gather_nd(logits, gather_indices) output_logits = array_ops.reshape(output_logits, [-1, 1]) ### Pack the new state new_state = array_ops.concat( [beta, math_ops.to_float(new_indices)], axis=1) output = array_ops.concat( [math_ops.to_float(new_indices), output_logits, entropy, logits], axis=1) return output, new_state
def _GatherV2Grad(op, grad): """Gradient for GatherV2 op.""" # params can be large, so colocate the shape calculation with it. # # params can be very large for sparse model, array_ops.shape raises # exception on the Windows platform when any dimension is larger than # int32. params_shape is not used in optimizer apply_sparse gradients, # so it's fine to convert it back to int32 regardless of truncation. params = op.inputs[0] with ops.colocate_with(params): params_shape = array_ops.shape(params, out_type=ops.dtypes.int64) params_shape = math_ops.to_int32(params_shape) indices = op.inputs[1] indices_size = array_ops.expand_dims(array_ops.size(indices), 0) axis = op.inputs[2] axis_static = tensor_util.constant_value(axis) # For axis 0 gathers, build an appropriately shaped IndexedSlices. if axis_static == 0: values_shape = array_ops.concat([indices_size, params_shape[1:]], 0) values = array_ops.reshape(grad, values_shape) indices = array_ops.reshape(indices, indices_size) return [ops.IndexedSlices(values, indices, params_shape), None, None] outer_shape = params_shape[:axis] outer_dims = array_ops.size(outer_shape) inner_shape = params_shape[axis:][1:] inner_dims = array_ops.size(inner_shape) outer_axes_indices = math_ops.range(outer_dims) inner_axes_indices = math_ops.range(outer_dims + 1, outer_dims + 1 + inner_dims) values_shape = array_ops.concat([outer_shape, indices_size, inner_shape], 0) values = array_ops.reshape(grad, values_shape) indices = array_ops.reshape(indices, indices_size) # We need to sum up every slice `values[..., i, ....]` corresponding to # `params[..., indices[i], ...]`. Since `unsorted_segment_sum` does not # support an axis parameter, we transpose the gather dimension to the front, # then use `unsorted_segment_sum` to build a # [gather_axis, outer_axes, inner_axes] tensor with all the gradients # affecting each index in `gather_axis` summed up. transpose_dims = array_ops.concat( [[outer_dims], outer_axes_indices, inner_axes_indices], 0) values_transpose = array_ops.transpose(values, transpose_dims) num_segments = params_shape[axis] params_grad = math_ops.unsorted_segment_sum(values_transpose, indices, num_segments) # Inverts the above transpose by moving dimension 0 back to its original # position. invert_transpose_dims = array_ops.concat( [outer_axes_indices + 1, [0], inner_axes_indices], 0) params_grad = array_ops.transpose(params_grad, invert_transpose_dims) return [params_grad, None, None]
def _loss(probs, targets): one_hot_labels = array_ops.one_hot( math_ops.to_int32(targets), num_classes, on_value=1., off_value=0., dtype=dtypes.float32) return loss_fn(probs, one_hot_labels)
def _GatherV2Grad(op, grad): """Gradient for GatherV2 op.""" # params can be large, so colocate the shape calculation with it. # # params can be very large for sparse model, array_ops.shape raises # exception on the Windows platform when any dimension is larger than # int32. params_shape is not used in optimizer apply_sparse gradients, # so it's fine to convert it back to int32 regardless of truncation. params = op.inputs[0] with ops.colocate_with(params): params_shape = array_ops.shape(params, out_type=ops.dtypes.int64) params_shape = math_ops.to_int32(params_shape) indices = op.inputs[1] indices_size = array_ops.expand_dims(array_ops.size(indices), 0) axis = op.inputs[2] axis_static = tensor_util.constant_value(axis) # For axis 0 gathers, build an appropriately shaped IndexedSlices. if axis_static == 0: values_shape = array_ops.concat([indices_size, params_shape[1:]], 0) values = array_ops.reshape(grad, values_shape) indices = array_ops.reshape(indices, indices_size) return [ops.IndexedSlices(values, indices, params_shape), None, None] outer_shape = params_shape[:axis] outer_dims = array_ops.size(outer_shape) inner_shape = params_shape[axis:][1:] inner_dims = array_ops.size(inner_shape) outer_axes_indices = math_ops.range(outer_dims) inner_axes_indices = math_ops.range(outer_dims + 1, outer_dims + 1 + inner_dims) values_shape = array_ops.concat([outer_shape, indices_size, inner_shape], 0) values = array_ops.reshape(grad, values_shape) indices = array_ops.reshape(indices, indices_size) # We need to sum up every slice `values[..., i, ....]` corresponding to # `params[..., indices[i], ...]`. Since `unsorted_segment_sum` does not # support an axis parameter, we transpose the gather dimension to the front, # then use `unsorted_segment_sum` to build a # [gather_axis, outer_axes, inner_axes] tensor with all the gradients # affecting each index in `gather_axis` summed up. transpose_dims = array_ops.concat( [[outer_dims], outer_axes_indices, inner_axes_indices], 0) values_transpose = array_ops.transpose(values, transpose_dims) num_segments = params_shape[axis] params_grad = math_ops.unsorted_segment_sum( values_transpose, indices, num_segments) # Inverts the above transpose by moving dimension 0 back to its original # position. invert_transpose_dims = array_ops.concat( [outer_axes_indices + 1, [0], inner_axes_indices], 0) params_grad = array_ops.transpose(params_grad, invert_transpose_dims) return [params_grad, None, None]
def input_fn(): random_sequence = random_ops.random_uniform( [batch_size, sequence_length], 0, 2, dtype=dtypes.int32, seed=seed) inputs = array_ops.expand_dims(math_ops.to_float(random_sequence), 2) labels = math_ops.to_int32( array_ops.squeeze( math_ops.reduce_sum(inputs, axis=[1]) > ( sequence_length / 2.0))) return {'inputs': inputs}, labels
def _loss(probs, targets): if targets.get_shape().ndims > 1: targets = array_ops.squeeze(targets, squeeze_dims=[1]) one_hot_labels = array_ops.one_hot(math_ops.to_int32(targets), num_classes, on_value=1., off_value=0., dtype=dtypes.float32) return loss_fn(probs, one_hot_labels)
def body(i, prev_c, prev_h, actions, log_probs): # pylint: disable=g-long-lambda signal = control_flow_ops.cond( math_ops.equal(i, 0), lambda: array_ops.tile(device_go_embedding, [self.hparams.num_children, 1]), lambda: embedding_ops.embedding_lookup(device_embeddings, actions.read(i - 1)) ) if self.hparams.keep_prob is not None: signal = nn_ops.dropout(signal, self.hparams.keep_prob) next_c, next_h = lstm(signal, prev_c, prev_h, w_lstm, forget_bias) query = math_ops.matmul(next_h, attn_w_2) query = array_ops.reshape( query, [self.hparams.num_children, 1, self.hparams.hidden_size]) query = math_ops.tanh(query + attn_mem) query = array_ops.reshape(query, [ self.hparams.num_children * self.num_groups, self.hparams.hidden_size ]) query = math_ops.matmul(query, attn_v) query = array_ops.reshape(query, [self.hparams.num_children, self.num_groups]) query = nn_ops.softmax(query) query = array_ops.reshape(query, [self.hparams.num_children, self.num_groups, 1]) query = math_ops.reduce_sum(attn_mem * query, axis=1) query = array_ops.concat([next_h, query], axis=1) logits = math_ops.matmul(query, device_softmax) logits /= self.hparams.temperature if self.hparams.tanh_constant > 0: logits = math_ops.tanh(logits) * self.hparams.tanh_constant if self.hparams.logits_std_noise > 0: num_in_logits = math_ops.cast( array_ops.size(logits), dtype=dtypes.float32) avg_norm = math_ops.divide( linalg_ops.norm(logits), math_ops.sqrt(num_in_logits)) logits_noise = random_ops.random_normal( array_ops.shape(logits), stddev=self.hparams.logits_std_noise * avg_norm) logits = control_flow_ops.cond( self.global_step > self.hparams.stop_noise_step, lambda: logits, lambda: logits + logits_noise) if mode == "sample": next_y = random_ops.multinomial(logits, 1, seed=self.hparams.seed) elif mode == "greedy": next_y = math_ops.argmax(logits, 1) elif mode == "target": next_y = array_ops.slice(y, [0, i], [-1, 1]) else: raise NotImplementedError next_y = math_ops.to_int32(next_y) next_y = array_ops.reshape(next_y, [self.hparams.num_children]) actions = actions.write(i, next_y) log_probs += nn_ops.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=next_y) return i + 1, next_c, next_h, actions, log_probs
def body(i, context_rep, context_att): a = context_outputs_iter.read(i) b = aspect_avg_iter.read(i) l = math_ops.to_int32(context_lens_iter.read(i)) context_score = tf.reshape(tf.nn.tanh(tf.matmul(tf.matmul(a, weights['context_score']), tf.reshape(b, [-1, 1])) + biases['context_score']), [1, -1]) context_att_temp = tf.concat([tf.nn.softmax(tf.slice(context_score, [0, 0], [1, l])), tf.zeros([1, self.max_context_len - l])], 1) context_att = context_att.write(i, context_att_temp) context_rep = context_rep.write(i, tf.matmul(context_att_temp, a)) return (i + 1, context_rep, context_att)
def _get_class_id(self, predictions_dict): # Handle different multiclass strategies. if (self._learner_config.multi_class_strategy == learner_pb2.LearnerConfig.TREE_PER_CLASS and self._logits_dimension != 1): # Choose the class for which the tree is built (one vs rest). return math_ops.to_int32( predictions_dict[NUM_TREES_ATTEMPTED] % self._logits_dimension) return constant_op.constant(-1, dtype=dtypes.int32)
def input_fn(): random_sequence = random_ops.random_uniform( [batch_size, sequence_length], 0, 2, dtype=dtypes.int32, seed=seed) inputs = array_ops.expand_dims(math_ops.to_float(random_sequence), 2) labels = math_ops.to_int32( array_ops.squeeze( math_ops.reduce_sum( inputs, reduction_indices=[1]) > (sequence_length / 2.0))) return {'inputs': inputs}, labels
def _loss(probs, targets): if targets.get_shape().ndims > 1: targets = array_ops.squeeze(targets, squeeze_dims=[1]) one_hot_labels = array_ops.one_hot( math_ops.to_int32(targets), num_classes, on_value=1., off_value=0., dtype=dtypes.float32) return loss_fn(probs, one_hot_labels)
def func_body(iteration, scores_margin): # swap the current medoid with the candidate cluster member candidate_medoid = math_ops.to_int32(cluster_member_ids[iteration]) tmp_chosen_ids = update_1d_tensor(chosen_ids, cluster_idx, candidate_medoid) predictions = get_cluster_assignment(pairwise_distances, tmp_chosen_ids) metric_score = compute_clustering_score(labels, predictions, margin_type) pad_before = array_ops.zeros([iteration]) pad_after = array_ops.zeros([num_candidates - 1 - iteration]) return iteration + 1, scores_margin + array_ops.concat( [pad_before, [1.0 - metric_score], pad_after], 0)
def my_rnn(alphabetEnc, cell, inputs, initial_state=None, dtype=None, sequence_length=None, scope=None): if not isinstance(cell, rnn_cell.RNNCell): raise TypeError("cell must be an instance of RNNCell") if not isinstance(inputs, list): raise TypeError("inputs must be a list") if not inputs: raise ValueError("inputs must not be empty") outputs = [] with vs.variable_scope(scope or "RNN"): fixed_batch_size = inputs[0].get_shape().with_rank_at_least(1)[0] if fixed_batch_size.value: batch_size = fixed_batch_size.value else: batch_size = array_ops.shape(inputs[0])[0] if initial_state is not None: state = initial_state else: if not dtype: raise ValueError("If no initial_state is provided, dtype must be.") state = cell.zero_state(batch_size, dtype) if sequence_length is not None: sequence_length = math_ops.to_int32(sequence_length) if sequence_length: # Prepare variables zero_output = array_ops.zeros( array_ops.pack([batch_size, cell.output_size]), inputs[0].dtype) zero_output.set_shape( tensor_shape.TensorShape([fixed_batch_size.value, cell.output_size])) min_sequence_length = math_ops.reduce_min(sequence_length) max_sequence_length = math_ops.reduce_max(sequence_length) for time, input_ in enumerate(inputs): if time > 0: vs.get_variable_scope().reuse_variables() # pylint: disable=cell-var-from-loop call_cell = lambda: cell([ input_ , alphabetEnc[time] ], state) # pylint: enable=cell-var-from-loop if sequence_length: (output, state) = _rnn_step( time, sequence_length, min_sequence_length, max_sequence_length, zero_output, state, call_cell) else: (output, state) = call_cell() outputs.append(output) return (outputs, state)
def ctc_lambda_func(self, args): y_pred, labels, input_length, label_length = args y_pred = y_pred[:, :, :] label_length = math_ops.to_int32( array_ops.squeeze(label_length, axis=-1)) input_length = math_ops.to_int32( array_ops.squeeze(input_length, axis=-1)) sparse_labels = math_ops.to_int32( ctc_label_dense_to_sparse(labels, label_length)) y_pred = math_ops.log( array_ops.transpose(y_pred, perm=[1, 0, 2]) + epsilon()) return array_ops.expand_dims( ctc.ctc_loss(inputs=y_pred, labels=sparse_labels, sequence_length=input_length, ignore_longer_outputs_than_inputs=True), 1)
def generate_sequence_output(encoder_outputs, encoder_state, num_decoder_symbols, sequence_length, num_heads=1, dtype=dtypes.float32, use_attention=True, loop_function=None, scope=None, DNN_at_output=False, forward_only=False): with variable_scope.variable_scope(scope or "non-attention_RNN"): attention_encoder_outputs = list() sequence_attention_weights = list() # copy over logits once out of sequence_length if encoder_outputs[0].get_shape().ndims != 1: (fixed_batch_size, output_size) = encoder_outputs[0].get_shape().with_rank(2) else: fixed_batch_size = encoder_outputs[0].get_shape().with_rank_at_least(1)[0] if fixed_batch_size.value: batch_size = fixed_batch_size.value else: batch_size = array_ops.shape(encoder_outputs[0])[0] if sequence_length is not None: sequence_length = math_ops.to_int32(sequence_length) if sequence_length is not None: # Prepare variables zero_logit = array_ops.zeros( array_ops.pack([batch_size, num_decoder_symbols]), encoder_outputs[0].dtype) zero_logit.set_shape( tensor_shape.TensorShape([fixed_batch_size.value, num_decoder_symbols])) min_sequence_length = math_ops.reduce_min(sequence_length) max_sequence_length = math_ops.reduce_max(sequence_length) for time, input_ in enumerate(encoder_outputs): if time > 0: variable_scope.get_variable_scope().reuse_variables() if not DNN_at_output: generate_logit = lambda: linear_transformation(encoder_outputs[time], output_size, num_decoder_symbols) else: generate_logit = lambda: multilayer_perceptron(encoder_outputs[time], output_size, 200, num_decoder_symbols, forward_only=forward_only) # pylint: enable=cell-var-from-loop if sequence_length is not None: logit = _step( time, sequence_length, min_sequence_length, max_sequence_length, zero_logit, generate_logit) else: logit = generate_logit attention_encoder_outputs.append(logit) if DNN_at_output: regularizers = get_multilayer_perceptron_regularizers() else: regularizers = get_linear_transformation_regularizers() return attention_encoder_outputs, sequence_attention_weights, regularizers
def my_ctc_decode(y_pred, input_length, greedy=False, beam_width=50, top_paths=1): """Decodes the output of a softmax. Can use either greedy search (also known as best path) or a constrained dictionary search. Arguments: y_pred: tensor `(samples, time_steps, num_categories)` containing the prediction, or output of the softmax. input_length: tensor `(samples, )` containing the sequence length for each batch item in `y_pred`. greedy: perform much faster best-path search if `true`. This does not use a dictionary. beam_width: if `greedy` is `false`: a beam search decoder will be used with a beam of this width. top_paths: if `greedy` is `false`, how many of the most probable paths will be returned. Returns: Tuple: List: if `greedy` is `true`, returns a list of one element that contains the decoded sequence. If `false`, returns the `top_paths` most probable decoded sequences. Important: blank labels are returned as `-1`. Tensor `(top_paths, )` that contains the log probability of each decoded sequence. """ y_pred = math_ops.log( array_ops.transpose(y_pred, perm=[1, 0, 2]) + epsilon()) input_length = math_ops.to_int32(input_length) if greedy: (decoded, log_prob) = ctc.ctc_greedy_decoder(inputs=y_pred, sequence_length=input_length) else: (decoded, log_prob) = my_ctc_beam_search_decoder(inputs=y_pred, sequence_length=input_length, beam_width=beam_width, top_paths=top_paths) decoded_dense = [ sparse_ops.sparse_to_dense(st.indices, st.dense_shape, st.values, default_value=-1) for st in decoded ] return (decoded_dense, log_prob)
def matrix_square_root(mat_a, mat_a_size, iter_count=100, ridge_epsilon=1e-4): """Iterative method to get matrix square root. Stable iterations for the matrix square root, Nicholas J. Higham Page 231, Eq 2.6b http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.6.8799&rep=rep1&type=pdf Args: mat_a: the symmetric PSD matrix whose matrix square root be computed mat_a_size: size of mat_a. iter_count: Maximum number of iterations. ridge_epsilon: Ridge epsilon added to make the matrix positive definite. Returns: mat_a^0.5 """ def _iter_condition(i, unused_mat_y, unused_old_mat_y, unused_mat_z, unused_old_mat_z, err, old_err): # This method require that we check for divergence every step. return math_ops.logical_and(i < iter_count, err < old_err) def _iter_body(i, mat_y, unused_old_mat_y, mat_z, unused_old_mat_z, err, unused_old_err): current_iterate = 0.5 * (3.0 * identity - math_ops.matmul(mat_z, mat_y)) current_mat_y = math_ops.matmul(mat_y, current_iterate) current_mat_z = math_ops.matmul(current_iterate, mat_z) # Compute the error in approximation. mat_sqrt_a = current_mat_y * math_ops.sqrt(norm) mat_a_approx = math_ops.matmul(mat_sqrt_a, mat_sqrt_a) residual = mat_a - mat_a_approx current_err = math_ops.sqrt(math_ops.reduce_sum( residual * residual)) / norm return i + 1, current_mat_y, mat_y, current_mat_z, mat_z, current_err, err identity = linalg_ops.eye(math_ops.to_int32(mat_a_size)) mat_a = mat_a + ridge_epsilon * identity norm = math_ops.sqrt(math_ops.reduce_sum(mat_a * mat_a)) mat_init_y = mat_a / norm mat_init_z = identity init_err = norm _, _, prev_mat_y, _, _, _, _ = control_flow_ops.while_loop( _iter_condition, _iter_body, [ 0, mat_init_y, mat_init_y, mat_init_z, mat_init_z, init_err, init_err + 1.0 ]) return prev_mat_y * math_ops.sqrt(norm)
def body(i, entities_rep, entities_att): a1 = entities1_outputs_iter.read(i) a2 = entities2_outputs_iter.read(i) b = context_avg_iter.read(i) l1 = math_ops.to_int32(entities1_lens_iter.read(i)) l2 = math_ops.to_int32(entities2_lens_iter.read(i)) print(l1) e1 = tf.matmul(a1, weights['entities1_score']) e2 = tf.matmul(a2, weights['entities2_score']) e12 = tf.matmul(e1, e2) # entities_score = tf.reshape(tf.nn.tanh(tf.matmul(e12, tf.reshape(b, [-1, 1])) + biases['entities1_score'] + biases['entities2_score']), [1, -1]) entities_score = tf.reshape( tf.nn.tanh( tf.matmul(e12, tf.reshape(b, [-1, 1])) + biases['context_score']), [1, -1]) print(entities_score.shape) entities_att_temp = tf.concat([ tf.nn.softmax(tf.slice(entities_score, [0, 0], [1, l1])), tf.zeros([1, self.max_entities_len - l1]) ], 1) entities_att = entities_att.write(i, entities_att_temp) entities_rep = entities_rep.write( i, tf.matmul(tf.matmul(a1, a2), entities_att_temp)) return (i + 1, entities_rep, entities_att)
def refresh_shortlist(): """Update the shortlist with the highest scores in id_to_score.""" new_scores, new_ids = nn_ops.top_k(self.id_to_score, self.shortlist_size) smallest_new_score = math_ops.reduce_min(new_scores) new_length = math_ops.reduce_sum( math_ops.to_int32( math_ops.greater(new_scores, dtypes.float32.min))) u1 = self.sl_ids.assign( math_ops.to_int64(array_ops.concat([[new_length], new_ids], 0))) u2 = self.sl_scores.assign( array_ops.concat([[smallest_new_score], new_scores], 0)) self.last_ops = [u1, u2] return control_flow_ops.group(u1, u2)
def make_grouping_predictions(self, input_layer, reuse=None): """model that predicts grouping (grouping_actions). Args: input_layer: group_input_layer reuse: reuse Returns: grouping_actions: actions grouping_log_probs: log probabilities corresponding to actions """ with variable_scope.variable_scope(self.hparams.name, reuse=True): # input_layer: tensor of size [1, num_ops, hidden_size] w_grouping_ff = variable_scope.get_variable("w_grouping_ff") w_grouping_softmax = variable_scope.get_variable( "w_grouping_softmax") batch_size = array_ops.shape(input_layer)[0] embedding_dim = array_ops.shape(input_layer)[2] reshaped = array_ops.reshape( input_layer, [batch_size * self.num_ops, embedding_dim]) ff_output = math_ops.matmul(reshaped, w_grouping_ff) logits = math_ops.matmul(ff_output, w_grouping_softmax) if self.hparams.logits_std_noise > 0: num_in_logits = math_ops.cast(array_ops.size(logits), dtype=dtypes.float32) avg_norm = math_ops.divide(linalg_ops.norm(logits), math_ops.sqrt(num_in_logits)) logits_noise = random_ops.random_normal( array_ops.shape(logits), stddev=self.hparams.logits_std_noise * avg_norm) logits = control_flow_ops.cond( self.global_step > self.hparams.stop_noise_step, lambda: logits, lambda: logits + logits_noise) logits = array_ops.reshape( logits, [batch_size * self.num_ops, self.num_groups]) actions = random_ops.multinomial(logits, 1, seed=self.hparams.seed) actions = math_ops.to_int32(actions) actions = array_ops.reshape(actions, [batch_size, self.num_ops]) action_label = array_ops.reshape(actions, [-1]) log_probs = nn_ops.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=action_label) log_probs = array_ops.reshape(log_probs, [batch_size, -1]) log_probs = math_ops.reduce_sum(log_probs, 1) grouping_actions = actions grouping_log_probs = log_probs return grouping_actions, grouping_log_probs
def _form_group_indices_nd(is_valid, group_size, shuffle=True): """Forms the indices for groups for gather_nd or scatter_nd. Args: is_valid: A boolen `Tensor` for entry validity with shape [batch_size, list_size]. group_size: An scalar int `Tensor` for the number of examples in a group. shuffle: A boolean that indicates whether valid indices should be shuffled when forming group indices. Returns: A tuple of Tensors (indices, mask). The first has shape [batch_size, num_groups, group_size, 2] and it can be used in gather_nd or scatter_nd for group features. The second has the shape of [batch_size, num_groups] with value True for valid groups. """ with ops.name_scope(None, 'form_group_indices', (is_valid, group_size)): is_valid = ops.convert_to_tensor(is_valid) batch_size, list_size = array_ops.unstack(array_ops.shape(is_valid)) num_valid_entries = math_ops.reduce_sum(math_ops.to_int32(is_valid), axis=1) # A tuple of Tensors (batch_rw_indices, batch_indices_mask). The first has # shape [batch_size, size, rw_size] and the second has shape [batch_size, # size]. rw_indices, mask = _rolling_window_indices(list_size, group_size, num_valid_entries) # Valid indices of the tensor are shuffled and put on the top. # [batch_size, list_size, 2]. A determinstic op-level seed is set mainly for # unittest purpose. We can find a better way to avoid setting this seed # explicitly. # A tensor of indices with shape [batch_size, list_size, 2]. The returned # tensor can be used with `tf.gather_nd` and `tf.scatter_nd` to compose a new # [batch_size, list_size] tensor. The values in the last dimension are the # indices for an element in the input tensor. shuffled_indices = utils.organize_valid_indices(is_valid, shuffle=shuffle, seed=87124) # Construct indices for gather_nd. # [batch_size, num_groups, group_size, 2, 1] group_indices_nd = array_ops.expand_dims(rw_indices, axis=3) group_indices_nd = array_ops.concat([ array_ops.reshape(math_ops.range(batch_size), [-1, 1, 1, 1]) * array_ops.ones_like(group_indices_nd), group_indices_nd ], 3) indices = array_ops.gather_nd(shuffled_indices, group_indices_nd) return indices, mask
def aspect_body(i, aspect_rep, aspect_att, weights, biases): a = aspect_outputs_iter.read(i) b = context_avg_iter.read(i) l = math_ops.to_int32(aspect_lens_iter.read(i)) aspect_score = tf.reshape( tf.nn.tanh( tf.matmul(tf.matmul(a, weights['aspect_score']), tf.reshape(b, [-1, 1])) + biases['aspect_score']), [1, -1]) aspect_att_temp = tf.concat([ tf.nn.softmax(tf.slice(aspect_score, [0, 0], [1, l])), tf.zeros([1, self.cfg.MaxAspectLength - l]) ], 1) aspect_att = aspect_att.write(i, aspect_att_temp) aspect_rep = aspect_rep.write(i, tf.matmul(aspect_att_temp, a)) return (i + 1, aspect_rep, aspect_att, weights, biases)
def dynamic_rnn_decoder(decoder_inputs, cell, initial_state=None, dtype=dtypes.float32, sequence_length=None, loop_function=None, parallel_iterations=None, swap_memory=False, time_major=False, scope=None): """RNN decoder for seq2seq model. This function is functionally identical to the function `rnn_decoder` above, but performs fully dynamic unrolling of `inputs`. Unlike `rnn_decoder`, the input `inputs` is not a Python list of `Tensors`. Instead it is a single `Tensor` where the maximum time is either the first or second dimension (see the parameter `time_major`). The corresponding output is a single `Tensor` having the same number of time steps and batch size. The parameter `sequence_length` is required and dynamic calculation is automatically performed. Args: decoder_inputs: the RNN decoder inputs. If time_major == False (default), this must be a tensor of shape: `[batch_size,max_time,cell_input_size]` If time_major == True, this must be a tensor of shape: `[max_time,batch_size,cell.input_size]` """ if not isinstance(cell, rnn_cell.RNNCell): raise TypeError("cell must be an instance of RNNCell") if not time_major: inputs = array_ops.transpose(decoder_inputs, [1, 0, 2]) parallel_iterations = parallel_iterations or 32 if sequence_length is not None: sequence_length = math_ops.to_int32(sequence_length) sequence_length = array_ops.identity(sequence_length, name="sequence_length") with variable_scope.variable_scope(scope or "dynamic_rnn_decoder") as varscope: if varscope.caching_device is None: varscope.set_caching_device(lambda op: op.device) outputs, state = _dynamic_rnn_decoder_loop(inputs, initial_state, cell, sequence_length, loop_function, parallel_iterations, swap_memory, dtype) if not time_major: outputs = array_ops.transpose(outputs, [1, 0, 2]) return outputs, state