def rnn_decoder(decoder_inputs, initial_state, cell, scope=None): """RNN Decoder that creates training and sampling sub-graphs. Args: decoder_inputs: Inputs for decoder, list of tensors. This is used only in trianing sub-graph. initial_state: Initial state for the decoder. cell: RNN cell to use for decoder. scope: Scope to use, if None new will be produced. Returns: List of tensors for outputs and states for training and sampling sub-graphs. """ with tf.variable_scope(scope or "dnn_decoder"): states, sampling_states = [initial_state], [initial_state] outputs, sampling_outputs = [], [] with tf.op_scope([decoder_inputs, initial_state], "training"): for i, inp in enumerate(decoder_inputs): if i > 0: tf.get_variable_scope().reuse_variables() output, new_state = cell(inp, states[-1]) outputs.append(output) states.append(new_state) with tf.op_scope([initial_state], "sampling"): for i, _ in enumerate(decoder_inputs): if i == 0: sampling_outputs.append(outputs[i]) sampling_states.append(states[i]) else: sampling_output, sampling_state = cell( sampling_outputs[-1], sampling_states[-1]) sampling_outputs.append(sampling_output) sampling_states.append(sampling_state) return outputs, states, sampling_outputs, sampling_states
def batch_sample_with_temperature(a, temperature=1.0): '''this function is like sample_with_temperature except it can handle batch input a of [batch_size x logits] this function takes logits input, and produces a specific number from the array. This is all done on the gpu because this function uses tensorflow As you increase the temperature, you will get more diversified output but with more errors (usually gramatical if you're doing text) args: Logits -- this must be a 2d array [batch_size x logits] Temperature -- how much variance you want in output returns: Selected number from distribution ''' ''' Equation can be found here: https://en.wikipedia.org/wiki/Softmax_function (under reinforcement learning) Karpathy did it here as well: https://github.com/karpathy/char-rnn/blob/4297a9bf69726823d944ad971555e91204f12ca8/sample.lua''' '''a is [batch_size x logits]''' with tf.op_scope([a,temperature], "batch_sample_with_temperature"): exponent_raised = tf.exp(tf.div(a, temperature)) #start by reduction of temperature, and get rid of negative numbers with exponent matrix_X = tf.div(exponent_raised, tf.reduce_sum(exponent_raised, reduction_indices = 1)) #this will yield probabilities! matrix_U = tf.random_uniform(tf.shape(a), minval = 0, maxval = 1) final_number = tf.argmax(tf.sub(matrix_X, matrix_U), dimension = 1) #you want dimension = 1 because you are argmaxing across rows. return final_number
def sequence_loss(logits, targets, weights, num_decoder_symbols, average_across_timesteps=True, average_across_batch=True, softmax_loss_function=None, name=None): """Weighted cross-entropy loss for a sequence of logits, batch-collapsed. Args: logits: list of 2D Tensors os shape [batch_size x num_decoder_symbols]. targets: list of 1D batch-sized int32-Tensors of the same length as logits. weights: list of 1D batch-sized float-Tensors of the same length as logits. num_decoder_symbols: integer, number of decoder symbols (output classes). average_across_timesteps: If set, divide the returned cost by the total label weight. average_across_batch: If set, divide the returned cost by the batch size. softmax_loss_function: function (inputs-batch, labels-batch) -> loss-batch to be used instead of the standard softmax (the default if this is None). name: optional name for this operation, defaults to "sequence_loss". Returns: A scalar float Tensor: the average log-perplexity per symbol (weighted). Raises: ValueError: if len(logits) is different from len(targets) or len(weights). """ with tf.op_scope(logits + targets + weights, name, "sequence_loss"): cost = tf.reduce_sum(sequence_loss_by_example( logits, targets, weights, num_decoder_symbols, average_across_timesteps=average_across_timesteps, softmax_loss_function=softmax_loss_function)) if average_across_batch: batch_size = tf.shape(targets[0])[0] return cost / tf.cast(batch_size, tf.float32) else: return cost
def distort_color(image, thread_id=0, scope=None): """Distort the color of the image. Each color distortion is non-commutative and thus ordering of the color ops matters. Ideally we would randomly permute the ordering of the color ops. Rather then adding that level of complication, we select a distinct ordering of color ops for each preprocessing thread. Args: image: Tensor containing single image. thread_id: preprocessing thread ID. scope: Optional scope for op_scope. Returns: color-distorted image """ with tf.op_scope([image], scope, 'distort_color'): color_ordering = thread_id % 2 if color_ordering == 0: image = tf.image.random_brightness(image, max_delta=32. / 255.) image = tf.image.random_saturation(image, lower=0.5, upper=1.5) image = tf.image.random_hue(image, max_delta=0.2) image = tf.image.random_contrast(image, lower=0.5, upper=1.5) elif color_ordering == 1: image = tf.image.random_brightness(image, max_delta=32. / 255.) image = tf.image.random_contrast(image, lower=0.5, upper=1.5) image = tf.image.random_saturation(image, lower=0.5, upper=1.5) image = tf.image.random_hue(image, max_delta=0.2) # The random_* ops do not necessarily clamp. image = tf.clip_by_value(image, 0.0, 1.0) return image
def avg_pool(inputs, kernel_size, stride=2, padding='VALID', scope=None): """Adds a Avg Pooling layer. It is assumed by the wrapper that the pooling is only done per image and not in depth or batch. Args: inputs: a tensor of size [batch_size, height, width, depth]. kernel_size: a list of length 2: [kernel_height, kernel_width] of the pooling kernel over which the op is computed. Can be an int if both values are the same. stride: a list of length 2: [stride_height, stride_width]. Can be an int if both strides are the same. Note that presently both strides must have the same value. padding: the padding method, either 'VALID' or 'SAME'. scope: Optional scope for op_scope. Returns: a tensor representing the results of the pooling operation. """ with tf.op_scope([inputs], scope, 'AvgPool'): kernel_h, kernel_w = _two_element_tuple(kernel_size) stride_h, stride_w = _two_element_tuple(stride) return tf.nn.avg_pool(inputs, ksize=[1, kernel_h, kernel_w, 1], strides=[1, stride_h, stride_w, 1], padding=padding)
def l1_orthogonal_regularizer(logits_to_normalize, l1_alpha_loss_factor = 10, name = None): '''Motivation from this loss function comes from: https://redd.it/3wx4sr Specifically want to thank spurious_recollectio and harponen on reddit for discussing this suggestion to me ''' '''Will add a L1 Loss linearly to the softmax cost function. Returns: final_reg_loss: One Scalar Value representing the loss averaged across the batch''' '''this is different than unitary because it is an orthongonal matrix approximation -- it will suffer from timesteps longer than 500 and will take more computation power of O(n^3)''' with tf.op_scope(logits_to_normalize, name, "rnn_l2_loss"): #need to have this for tf to work '''the l1 equation is: alpha * T.abs(T.dot(W, W.T) - (1.05) ** 2 * T.identity_like(W))''' Weights_for_l1_loss = tf.get_variable("linear") matrix_dot_product= tf.matmul(Weights_for_l1_loss, Weights_for_l1_loss, transpose_a = True) #we need to check here that we have the right dimension -- should it be 0 or the 1 dim? identity_matrix = lfe.identity_like(Weights_for_l1_loss) matrix_minus_identity = matrix_dot_product - 2*1.05*identity_matrix absolute_cost = tf.abs(matrix_minus_identity) final_l1_loss = l1_alpha_loss_factor*(absolute_cost/batch_size) return final_l1_loss
def avg_pool(inputs, kernel_size, stride=2, padding='VALID', scope=None): """Adds a Avg Pooling layer. It is assumed by the wrapper that the pooling is only done per image and not in depth or batch. Args: inputs: a tensor of size [batch_size, height, width, depth]. kernel_size: the size of the pooling kernel over which the op is computed. stride: the stride in height and width of the convolution. padding: the padding method, either 'VALID' or 'SAME'. scope: Optional scope for op_scope. Returns: a tensor representing the results of the pooling operation. Raises: ValueError: if 'kernel_size' is not a 2-D list """ if len(kernel_size) != 2: raise ValueError('kernel_size must be a 2-D list.') with tf.op_scope([inputs], scope, 'AvgPool'): return tf.nn.avg_pool(inputs, ksize=[1, kernel_size[0], kernel_size[1], 1], strides=[1, stride, stride, 1], padding=padding)
def std_forward(a, weights, bias_weights, name=None): with tf.op_scope([a, W, Wb], name, 'std_forward') as scope: a = tf.convert_to_tensor(a, dtype=tf.float32, name='input') weights = tf.convert_to_tensor(weights, dtype=tf.float32, name='weights') bias_weights = tf.convert_to_tensor(bias_weights, dtype=tf.float32, name='bias_weights') biased = tf.concat(1, (weights, bias_weights), name='biased') return tf.matmul(biased, a, name=scope)
def unzip(x, split_dim, current_length, num_splits=2, name=None): """Splits a tensor by unzipping along the split_dim. For example the following array split into 2 would be: [1, 2, 3, 4, 5, 6] -> [1, 3, 5], [2, 4, 6] and by 3: [1, 2, 3, 4] -> [1, 4], [2], [3] Args: x: The tensor to split. split_dim: The dimension to split along. current_length: Current length along the split_dim. num_splits: The number of splits. name: Optional name for this op. Returns: A length num_splits sequence. """ with tf.op_scope([x], name, 'unzip') as scope: x = tf.convert_to_tensor(x, name='x') # There is probably a more efficient way to do this. all_splits = tf.split(split_dim, current_length, x, name=scope) splits = [[] for _ in xrange(num_splits)] for i in xrange(current_length): splits[i % num_splits].append(all_splits[i]) return [tf.concat(split_dim, s) for s in splits]
def SoftThreshold(t, threshold_ratio, name=None): """Soft-threshold a tensor by the mean value. Softthreshold each dimension-0 vector (for matrix it is each column) by the mean of absolute value multiplied by the threshold_ratio factor. Here we soft threshold each column as it corresponds to each unit in a layer. Args: t: the input tensor. threshold_ratio: the threshold ratio. name: the optional name for the returned tensor. Returns: the thresholded tensor, where each entry is soft-thresholded by threshold_ratio times the mean of the aboslute value of each column. """ assert threshold_ratio >= 0 with tf.op_scope([t, threshold_ratio], name, "soft_thresholding") as name: saved_shape = tf.shape(t) t2 = tf.reshape(t, tf.concat(0, [tf.slice(saved_shape, [0], [1]), -1])) t_abs = tf.abs(t2) t_x = tf.sign(t2) * tf.nn.relu(t_abs - (tf.reduce_mean(t_abs, [0], keep_dims=True) * threshold_ratio)) return tf.reshape(t_x, saved_shape, name=name)
def inference(data, num_classes, scope): with tf.op_scope([data], scope): with scopes.arg_scope([ops.conv2d, ops.fc, ops.dropout], is_training=True): with tf.variable_scope('fc1'): fc1 = ops.fc( data, num_units_out=2048, activation=tf.nn.sigmoid) with tf.variable_scope('fc2'): fc2 = ops.fc( fc1, num_units_out=2048, activation=tf.nn.sigmoid) with tf.variable_scope('fc3'): fc3 = ops.fc( fc2, num_units_out=2048, activation=tf.nn.sigmoid) with tf.variable_scope('fc4'): fc4 = ops.fc( fc3, num_units_out=2048, activation=tf.nn.sigmoid) with tf.variable_scope('fc5'): fc5 = ops.fc( fc4, num_units_out=num_classes, activation=None) return fc5
def loss(logits, one_hot_labels, batch_size, scope): with tf.op_scope([logits, one_hot_labels], scope, 'CrossEntropyLoss'): cross_entropy = tf.nn.softmax_cross_entropy_with_logits( logits, one_hot_labels, name='xentropy') return cross_entropy
def U_t_variance(timestep_outputs_matrix, total_timesteps, gamma = 5): with tf.op_scope(timestep_outputs_matrix + total_timesteps + gamma, "U_t_variance"): G_i_matrix = G_i_piecewise_variance(timestep_outputs_matrix, total_timesteps) tf.mul(timestep_outputs_matrix, ) tf.reduce_prod(timestep_outputs_matrix_with_g)
def sequence_loss_by_example(inputs, targets, weights, loss_function, average_across_timesteps=True, name=None): """Sampled softmax loss for a sequence of inputs (per example). Args: inputs: List of 2D Tensors of shape [batch_size x hid_dim]. targets: List of 1D batch-sized int32 Tensors of the same length as logits. weights: List of 1D batch-sized float-Tensors of the same length as logits. loss_function: Sampled softmax function (inputs, labels) -> loss average_across_timesteps: If set, divide the returned cost by the total label weight. name: Optional name for this operation, default: 'sequence_loss_by_example'. Returns: 1D batch-sized float Tensor: The log-perplexity for each sequence. Raises: ValueError: If len(inputs) is different from len(targets) or len(weights). """ if len(targets) != len(inputs) or len(weights) != len(inputs): raise ValueError('Lengths of logits, weights, and targets must be the same ' '%d, %d, %d.' % (len(inputs), len(weights), len(targets))) with tf.op_scope(inputs + targets + weights, name, 'sequence_loss_by_example'): log_perp_list = [] for inp, target, weight in zip(inputs, targets, weights): crossent = loss_function(inp, target) log_perp_list.append(crossent * weight) log_perps = tf.add_n(log_perp_list) if average_across_timesteps: total_size = tf.add_n(weights) total_size += 1e-12 # Just to avoid division by 0 for all-0 weights. log_perps /= total_size return log_perps
def BatchClipByL2norm(t, upper_bound, name=None): """Clip an array of tensors by L2 norm. Shrink each dimension-0 slice of tensor (for matrix it is each row) such that the l2 norm is at most upper_bound. Here we clip each row as it corresponds to each example in the batch. Args: t: the input tensor. upper_bound: the upperbound of the L2 norm. name: optional name. Returns: the clipped tensor. """ assert upper_bound > 0 with tf.op_scope([t, upper_bound], name, "batch_clip_by_l2norm") as name: saved_shape = tf.shape(t) batch_size = tf.slice(saved_shape, [0], [1]) t2 = tf.reshape(t, tf.concat(0, [batch_size, [-1]])) upper_bound_inv = tf.fill(tf.slice(saved_shape, [0], [1]), tf.constant(1.0/upper_bound)) # Add a small number to avoid divide by 0 l2norm_inv = tf.rsqrt(tf.reduce_sum(t2 * t2, [1]) + 0.000001) scale = tf.minimum(l2norm_inv, upper_bound_inv) * upper_bound clipped_t = tf.matmul(tf.diag(scale), t2) clipped_t = tf.reshape(clipped_t, saved_shape, name=name) return clipped_t
def norm_stabilizer_loss(logits_to_normalize, norm_regularizer_factor = 50, name = None): '''Will add a Norm Stabilizer Loss Args: logits_to_normalize:This can be output logits or hidden states. The state of each decoder cell in each time-step. This is a list with length len(decoder_inputs) -- one item for each time-step. Each item is a 2D Tensor of shape [batch_size x cell.state_size] (or it can be [batch_size x output_logits]) norm_regularizer_factor: The factor required to apply norm stabilization. Keep in mind that a larger factor will allow you to achieve a lower loss, but it will take many more epochs to do so! Returns: final_reg_loss: One Scalar Value representing the loss averaged across the batch''' with tf.op_scope(logits_to_normalize, name, "norm_stabilizer_loss"): #need to have this for tf to work batch_size = tf.shape(logits_to_normalize[0])[0] #you choose the batch size number squared_sum = tf.zeros((batch_size),tf.float32) #batch size in zeros for q in xrange(len(bucket_states)-1): #this represents the summation part from t to T '''one problem you're having right now is that you can't take the sqrt of negative number...you need to figure this out first You need to take the euclidean norm of the value -- can't find how to do this in tf.... okay so Amn matrix means that the m is going down and n is going horizontal -- so we choose to reduce sum on axis 1 ''' difference = tf.sub(lfe.frobenius_norm(bucket_states[q+1], reduction_indicies = 1),lfe.frobenius_norm(bucket_states[q], reduction_indicies = 1)) '''the difference has the dimensions of [batch_size]''' squared_sum = tf. add(squared_sum, tf.square(difference)) #We want to average across batch sizes and divide by T final_reg_loss = norm_regularizer_factor*(tf.add_n(squared_sum)/((len(bucket_states))*(batch_size))) return final_reg_loss
def mean_squared_error_regressor(tensor_in, labels, weights, biases, name=None): """Returns prediction and loss for mean squared error regression.""" with tf.op_scope([tensor_in, labels], name, "mean_squared_error_regressor"): predictions = tf.nn.xw_plus_b(tensor_in, weights, biases) diff = predictions - labels loss = tf.reduce_mean(tf.mul(diff, diff)) return predictions, loss
def l2_orthogonal_regularizer(logits_to_normalize, l2_alpha_loss_factor = 10, name = None): '''Motivation from this loss function comes from: https://www.reddit.com/r/MachineLearning/comments/3uk2q5/151106464_unitary_evolution_recurrent_neural/ Specifically want to thank spurious_recollectio on reddit for discussing this suggestion to me ''' '''Will add a L2 Loss linearly to the softmax cost function. Returns: final_reg_loss: One Scalar Value representing the loss averaged across the batch''' '''this is different than unitary because it is an orthongonal matrix approximation -- it will suffer from timesteps longer than 500 and will take more computation power of O(n^3)''' with tf.op_scope(logits_to_normalize, name, "rnn_l2_loss"): #need to have this for tf to work '''somehow we need to get the Weights from the rnns right here....i don't know how! ''' '''the l1 equation is: alpha * T.abs(T.dot(W, W.T) - (1.05) ** 2 * T.identity_like(W))''' '''The Equation of the Cost Is: loss += alpha * T.sum((T.dot(W, W.T) - (1.05)*2 T.identity_like(W)) * 2)''' Weights_for_l2_loss = tf.get_variable("linear") matrix_dot_product= tf.matmul(Weights_for_l2_loss, Weights_for_l2_loss, transpose_a = True) #we need to check here that we have the right dimension -- should it be 0 or the 1 dim? identity_matrix = lfe.identity_like(Weights_for_l2_loss) matrix_minus_identity = matrix_dot_product - 2*1.05*identity_matrix square_the_loss = tf.square(matrix_minus_identity) final_l2_loss = l2_alpha_loss_factor*(tf.reduce_sum(square_the_loss)/(batch_size)) return final_l2_loss
def embedding_lookup(params, ids, name="embedding_lookup"): """Provides a N dimensional version of tf.embedding_lookup. Ids are flattened to a 1d tensor before being passed to embedding_lookup then, they are unflattend to match the original ids shape plus an extra leading dimension of the size of the embeddings. Args: params: List of tensors of size D0 x D1 x ... x Dn-2 x Dn-1. ids: N-dimensional tensor of B0 x B1 x .. x Bn-2 x Bn-1. Must contain indexes into params. name: Optional name for the op. Returns: A tensor of size B0 x B1 x .. x Bn-2 x Bn-1 x D1 x ... x Dn-2 x Dn-1 containing the values from the params tensor(s) for indecies in ids. Raises: ValueError: if some parameters are invalid. """ with tf.op_scope([params, ids], name, "embedding_lookup"): params = tf.convert_to_tensor(params) ids = tf.convert_to_tensor(ids) shape = tf.shape(ids) ids_flat = tf.reshape(ids, tf.reduce_prod(shape, keep_dims=True)) embeds_flat = tf.nn.embedding_lookup(params, ids_flat, name) embed_shape = tf.concat(0, [shape, [-1]]) embeds = tf.reshape(embeds_flat, embed_shape) embeds.set_shape(ids.get_shape().concatenate(params.get_shape()[1:])) return embeds
def my_model_with_buckets(encoder_inputs, decoder_inputs, targets, weights, buckets, seq2seq, softmax_loss_function=None, per_example_loss=False, name=None): """Improved version of model_with_buckets, to take the states """ if len(encoder_inputs) < buckets[-1][0]: raise ValueError("Length of encoder_inputs (%d) must be at least that of la" "st bucket (%d)." % (len(encoder_inputs), buckets[-1][0])) if len(targets) < buckets[-1][1]: raise ValueError("Length of targets (%d) must be at least that of last" "bucket (%d)." % (len(targets), buckets[-1][1])) if len(weights) < buckets[-1][1]: raise ValueError("Length of weights (%d) must be at least that of last" "bucket (%d)." % (len(weights), buckets[-1][1])) all_inputs = encoder_inputs + decoder_inputs + targets + weights losses = [] outputs = [] states = [] with tf.op_scope(all_inputs, name, "my_model_with_buckets"): for j, bucket in enumerate(buckets): with tf.variable_scope(tf.get_variable_scope(), reuse=True if j > 0 else None): bucket_outputs, _, bucket_enc_state = seq2seq(encoder_inputs[:bucket[0]], decoder_inputs[:bucket[1]]) outputs.append(bucket_outputs) states.append(bucket_enc_state) if per_example_loss: losses.append(tf.nn.seq2seq.sequence_loss_by_example( outputs[-1], targets[:bucket[1]], weights[:bucket[1]], softmax_loss_function=softmax_loss_function)) else: losses.append(tf.nn.seq2seq.sequence_loss( outputs[-1], targets[:bucket[1]], weights[:bucket[1]], softmax_loss_function=softmax_loss_function)) return outputs, losses, states
def dot(a, b): with tf.op_scope([a, b], 'dot'): # TODO: implement N-dimensinal dot product that consistent with Numpy. a_shape = a.get_shape().as_list() a_dims = len(a_shape) b_shape = b.get_shape().as_list() b_dims = len(b_shape) # scalar dot scalar, scalar dot tensor or tensor dot scalar: just do element-wise multiply. if a_dims == 0 or b_dims == 0: return a * b # vector dot vector, where we can just perform element-wise prod, and then sum them all. if a_dims == 1 and b_dims == 1: return tf.reduce_sum(a * b) # vector dot matrix or matrix dot vector, where we should expand the vector to matrix, and then squeeze result. if a_dims <= 2 and b_dims <= 2: if a_dims == 1: a = tf.expand_dims(a, dim=0) if b_dims == 1: b = tf.expand_dims(b, dim=1) ret = tf.matmul(a, b) if a_dims == 1: ret = tf.squeeze(ret, [0]) if b_dims == 1: ret = tf.squeeze(ret, [1]) return ret # throw exception, that we do not know how to handle the situation. raise TypeError('Tensor dot between shape %r and %r is not supported.' % (a_shape, b_shape))
def cross_entropy_loss(logits, one_hot_labels, label_smoothing=0, weight=1.0, scope=None): """Define a Cross Entropy loss using softmax_cross_entropy_with_logits. It can scale the loss by weight factor, and smooth the labels. Args: logits: [batch_size, num_classes] logits outputs of the network . one_hot_labels: [batch_size, num_classes] target one_hot_encoded labels. label_smoothing: if greater than 0 then smooth the labels. weight: scale the loss by this factor. scope: Optional scope for op_scope. Returns: A tensor with the softmax_cross_entropy loss. """ logits.get_shape().assert_is_compatible_with(one_hot_labels.get_shape()) with tf.op_scope([logits, one_hot_labels], scope, 'CrossEntropyLoss'): num_classes = one_hot_labels.get_shape()[-1].value one_hot_labels = tf.cast(one_hot_labels, logits.dtype) if label_smoothing > 0: smooth_positives = 1.0 - label_smoothing smooth_negatives = label_smoothing / num_classes one_hot_labels = one_hot_labels * smooth_positives + smooth_negatives cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits, one_hot_labels, name='xentropy') weight = tf.convert_to_tensor(weight, dtype=logits.dtype.base_dtype, name='loss_weight') loss = tf.mul(weight, tf.reduce_mean(cross_entropy), name='value') tf.add_to_collection(LOSSES_COLLECTION, loss) return loss
def sequence_classifier(decoding, labels, sampling_decoding=None, name=None): """Returns predictions and loss for sequence of predictions. Args: decoding: List of Tensors with predictions. labels: List of Tensors with labels. sampling_decoding: Optional, List of Tensor with predictions to be used in sampling. E.g. they shouldn't have dependncy on ouptuts. If not provided, decoding is used. Returns: Predictions and losses tensors. """ with tf.op_scope([decoding, labels], name, "sequence_classifier"): predictions, xent_list = [], [] for i, pred in enumerate(decoding): xent_list.append( tf.nn.softmax_cross_entropy_with_logits( pred, labels[i], name="sequence_loss/xent_raw{0}".format(i))) if sampling_decoding: predictions.append(tf.nn.softmax(sampling_decoding[i])) else: predictions.append(tf.nn.softmax(pred)) xent = tf.add_n(xent_list, name="sequence_loss/xent") loss = tf.reduce_sum(xent, name="sequence_loss") return array_ops.expand_concat(1, predictions), loss
def softmax_classifier(tensor_in, labels, weights, biases, class_weight=None, name=None): """Returns prediction and loss for softmax classifier. Args: tensor_in: Input tensor, [batch_size, feature_size], features. labels: Tensor, [batch_size, n_classes], labels of the output classes. weights: Tensor, [batch_size, feature_size], linear transformation matrix. biases: Tensor, [batch_size], biases. class_weight: Tensor, optional, [n_classes], weight for each class. If not given, all classes are supposed to have weight one. Returns: Prediction and loss tensors. """ with tf.op_scope([tensor_in, labels], name, "softmax_classifier"): logits = tf.nn.xw_plus_b(tensor_in, weights, biases) if class_weight is not None: logits = tf.mul(logits, class_weight) xent = tf.nn.softmax_cross_entropy_with_logits(logits, labels, name="xent_raw") loss = tf.reduce_mean(xent, name="xent") predictions = tf.nn.softmax(logits, name=name) return predictions, loss
def MultitaskLogits(features, num_tasks, num_classes=2, weight_init=None, bias_init=None, dropout=None, name=None): """Create a logit tensor for each classification task. Args: features: A 2D tensor with dimensions batch_size x num_features. num_tasks: Number of classification tasks. num_classes: Number of classes for each task. weight_init: Weight initializer. bias_init: Bias initializer. dropout: Float giving dropout probability for weights (NOT keep probability). name: Name for this op. Defaults to 'multitask_logits'. Returns: A list of logit tensors; one for each classification task. """ logits = [] with tf.name_scope('multitask_logits'): for task_idx in range(num_tasks): with tf.op_scope([features], name, ('task' + str(task_idx).zfill(len(str(num_tasks))))): logits.append( Logits(features, num_classes, weight_init=weight_init, bias_init=bias_init, dropout=dropout)) return logits
def seq2seq_inputs(X, y, input_length, output_length, sentinel=None, name=None): """Processes inputs for Sequence to Sequence models. Args: X: Input Tensor [batch_size, input_length, embed_dim]. y: Output Tensor [batch_size, output_length, embed_dim]. input_length: length of input X. output_length: length of output y. sentinel: optional first input to decoder and final output expected. if sentinel is not provided, zeros are used. Due to fact that y is not available in sampling time, shape of sentinel will be inferred from X. Returns: Encoder input from X, and decoder inputs and outputs from y. """ with tf.op_scope([X, y], name, "seq2seq_inputs"): in_X = array_ops.split_squeeze(1, input_length, X) y = array_ops.split_squeeze(1, output_length, y) if not sentinel: # Set to zeros of shape of y[0], using X for batch size. sentinel_shape = tf.pack([tf.shape(X)[0], y[0].get_shape()[1]]) sentinel = tf.zeros(sentinel_shape) sentinel.set_shape(y[0].get_shape()) in_y = [sentinel] + y out_y = y + [sentinel] return in_X, in_y, out_y
def FullyConnectedLayer(tensor, size, weight_init=None, bias_init=None, name=None): """Fully connected layer. Args: tensor: Input tensor. size: Number of nodes in this layer. weight_init: Weight initializer. bias_init: Bias initializer. name: Name for this op. Defaults to 'fully_connected'. Returns: A new tensor representing the output of the fully connected layer. Raises: ValueError: If input tensor is not 2D. """ if len(tensor.get_shape()) != 2: raise ValueError('Dense layer input must be 2D, not %dD' % len(tensor.get_shape())) if weight_init is None: num_features = tensor.get_shape()[-1].value weight_init = tf.truncated_normal([num_features, size], stddev=0.01) if bias_init is None: bias_init = tf.zeros([size]) with tf.op_scope([tensor], name, 'fully_connected'): w = tf.Variable(weight_init, name='w') b = tf.Variable(bias_init, name='b') return tf.nn.xw_plus_b(tensor, w, b)
def batch_sample_with_temperature_old(arr, temperature=1.0): """ Samples from something resembeling a multinomial distribution. Works by multiplying the probabilities of each value by a random uniform number and then selecting the max. Where arr is of shape (batch_size, vocab_size) Returns the index of the item that was sampled in each row. source: https://github.com/tensorflow/tensorflow/issues/456 """ batch_size, vocab_size = arr.get_shape() with tf.op_scope([arr, temperature], "batch_sample_with_temperature"): # subtract by the largest value in each batch to improve stability c = tf.reduce_max(arr, reduction_indices=1, keep_dims=True) softmax = tf.nn.softmax(arr - c) + 1e-6 x = tf.log(softmax) # / temperature # softmax again x = tf.nn.softmax(x) / temperature # perform the sampling u = tf.random_uniform(tf.shape(arr), minval=1e-6, maxval=1) sampled_idx = tf.argmax(tf.sub(x, -tf.log(-tf.log(u))), dimension=1) return sampled_idx, x
def sampled_sequence_loss(inputs, targets, weights, loss_function, average_across_timesteps=True, average_across_batch=True, name=None): """Weighted cross-entropy loss for a sequence of logits, batch-collapsed. Args: inputs: List of 2D Tensors of shape [batch_size x hid_dim]. targets: List of 1D batch-sized int32 Tensors of the same length as inputs. weights: List of 1D batch-sized float-Tensors of the same length as inputs. loss_function: Sampled softmax function (inputs, labels) -> loss average_across_timesteps: If set, divide the returned cost by the total label weight. average_across_batch: If set, divide the returned cost by the batch size. name: Optional name for this operation, defaults to 'sequence_loss'. Returns: A scalar float Tensor: The average log-perplexity per symbol (weighted). Raises: ValueError: If len(inputs) is different from len(targets) or len(weights). """ with tf.op_scope(inputs + targets + weights, name, 'sampled_sequence_loss'): cost = tf.reduce_sum(sequence_loss_by_example( inputs, targets, weights, loss_function, average_across_timesteps=average_across_timesteps)) if average_across_batch: batch_size = tf.shape(targets[0])[0] return cost / tf.cast(batch_size, tf.float32) else: return cost
def softmax_classifier(tensor_in, labels, weights, biases, name=None): """Returns prediction and loss for softmax classifier.""" with tf.op_scope([tensor_in, labels], name, "softmax_classifier"): logits = tf.nn.xw_plus_b(tensor_in, weights, biases) xent = tf.nn.softmax_cross_entropy_with_logits(logits, labels, name="xent_raw") loss = tf.reduce_mean(xent, name="xent") predictions = tf.nn.softmax(logits, name=name) return predictions, loss
def log_l1_loss(tensor, weight=1.0, scope=None): """Define a L1Loss, useful for regularize, i.e. lasso. Args: tensor: tensor to regularize. weight: scale the loss by this factor. scope: Optional scope for op_scope. Returns: the L1 loss op. """ with tf.op_scope([tensor], scope, 'LogL1Loss'): weight = tf.convert_to_tensor(weight, dtype=tensor.dtype.base_dtype, name='loss_weight') absLog = tf.log(tf.abs(tensor) + 1) logLoss = tf.mul(weight, tf.reduce_sum(absLog), name='value') return logLoss
def add_bias(tensor, init=None, name=None): """Add a bias term to a tensor. Args: tensor: Variable tensor. init: Bias initializer. Defaults to zero. name: Name for this op. Defaults to tensor.op.name. Returns: A biased tensor with the same shape as the input tensor. """ if init is None: init = tf.zeros([tensor.get_shape()[-1].value]) with tf.op_scope([tensor], name, tensor.op.name): b = tf.Variable(init, name='b') return tf.nn.bias_add(tensor, b)
def distort_color(image, scope=None): '''Distort the color of the image Args: image: Tensor containing single image. scope: Optional scope for op_scope Returns: color-distorted image ''' with tf.op_scope([image], scope, 'distort_color'): image = tf.image.random_brightness(image, max_delta=32. / 255.) image = tf.image.random_saturation(image, lower=0.5, upper=1.5) image = tf.image.random_hue(image, max_delta=0.2) image = tf.image.random_contrast(image, lower=0.5, upper=1.5) return image
def binary_cross_entropy_loss_with_logits(x, target, name=None): """Calculates the binary cross entropy between sigmoid(x) and target. Expects unscaled logits. Do not pass in results of sigmoid operation. Args: x: the calculated pre-sigmoid values target: the desired values. name: the name for this op, defaults to binary_cross_entropy_with_logits Returns: -(target * -softplus(-x) + (1-target) * (-x - softplus(-x))) """ with tf.op_scope([x, target], name, 'binary_cross_entropy_with_logits'): bce_loss = -tf.add(tf.mul(target, -tf.nn.softplus(-x)), tf.mul(1 - target, -x - tf.nn.softplus(-x))) return bce_loss
def l1_loss(tensor, weight=1.0, scope=None): """Define a L1Loss, useful for regularize, i.e. lasso. Args: tensor: tensor to regularize. weight: scale the loss by this factor. scope: Optional scope for op_scope. Returns: the L1 loss op. """ with tf.op_scope([tensor], scope, 'L1Loss'): weight = tf.convert_to_tensor(weight, dtype=tensor.dtype.base_dtype, name='loss_weight') loss = tf.mul(weight, tf.reduce_sum(tf.abs(tensor)), name='value') # tf.add_to_collection(LOSSES_COLLECTION, loss) return loss
def leaky_relu(x, name=None): """Creates a leaky_relu. This is an alternate non-linearity to relu. The leaky part of the relu may prevent dead Neurons in a model since the gradient doesn't go completely to 0. Args: x: The input tensor. name: Optional name for this op. Returns: x if x > 0 otherwise 0.01 * x. """ with tf.op_scope([x], name, 'leaky_relu') as scope: x = tf.convert_to_tensor(x, name='x') return tf.select(tf.less(x, 0.0), 0.01 * x, x, name=scope)
def l1_normalize(x, dim, name=None): """l1 normalizes x. Args: x: The tensor to normalize. dim: The dimension to normalize along. name: Optional name for this op. Returns: x normalized along dim. """ with tf.op_scope([x], name, 'l1_normalize') as scope: x = tf.convert_to_tensor(x, name='x') x = tf.verify_tensor_all_finite(x, 'Error at input %s' % scope) x_norm = tf.reduce_sum(tf.abs(x), [dim], keep_dims=True) return tf.verify_tensor_all_finite(tf.div(x, x_norm, name=scope), 'Error at %s' % scope)
def capped_sqrt(x, name=None): """Caps the gradient of the square root. This can help with numerical stability if you are taking square roots of very small values (like in the distance functions). Args: x: A tensor. name: The name for this operation. Returns: sqrt(x) with the gradient capped. """ with tf.op_scope([x], name, 'capped_sqrt') as scope: x = tf.convert_to_tensor(x, name='x') with tf.get_default_graph().gradient_override_map({'Sqrt': 'capped_sqrt'}): return tf.sqrt(x, name=scope)
def l1_distance(t1, t2, name=None): """l1 distance between t1 and t2. Args: t1: A tensor. t2: A tensor that is the same size as t1. name: Optional name for this op. Returns: The l1 distance between t1 and t2. """ with tf.op_scope([t1, t2], name, 'l1_distance') as scope: t1 = tf.convert_to_tensor(t1, name='t1') t2 = tf.convert_to_tensor(t2, name='t2') sub = tf.sub(t1, t2) reduction_dim = _last_index(sub, 1) return tf.reduce_sum(tf.abs(sub), reduction_dim, name=scope)
def unflatten_into_tensors(flatparams_P, output_shapes, name=None): """ Unflattens a vector produced by flatcat into a list of tensors of the specified shapes. """ with tf.op_scope([flatparams_P], name, 'unflatten_into_tensors') as scope: outputs = [] curr_pos = 0 for shape in output_shapes: size = np.prod(shape).astype('int') flatval = flatparams_P[curr_pos:curr_pos + size] outputs.append(tf.reshape(flatval, shape)) curr_pos += size assert curr_pos == flatparams_P.get_shape().num_elements( ), "{} != {}".format(curr_pos, flatparams_P.get_shape().num_elements()) return tf.tuple(outputs, name=scope)
def masked_embedding_lookup_sum(emb, index, reduction_indices=None, exclude_zero_index=True, name=None): """ @TODO need c++ op to really mask last dim zero feature vector now assume vector should zero filtered to be zero vector or to just make emb firt row zero before lookup ? """ with tf.op_scope([emb, index], name, 'masked_emb_lookup_sum'): lookup_result = tf.nn.embedding_lookup(emb, index) if exclude_zero_index: masked_emb = mask2d(emb) mask_lookup_result = tf.nn.embedding_lookup(masked_emb, index) lookup_result = tf.mul(lookup_result, mask_lookup_result) return tf.reduce_sum(lookup_result, reduction_indices)
def sync_from(self, src_netowrk, name=None): src_policy_vars = src_netowrk.get_policy_param() src_value_vars = src_netowrk.get_value_param() dst_policy_vars = self.get_policy_param() dst_value_vars = self.get_value_param() sync_ops = [] with tf.device("/cpu:0"): with tf.op_scope([], name, "netCreator") as name: for(src_policy_var, dst_policy_var) in zip(src_policy_vars, dst_policy_vars): sync_op = tf.assign(dst_policy_var, src_policy_var) sync_ops.append(sync_op) for(src_value_var, dst_value_var) in zip(src_value_vars, dst_value_vars): sync_op = tf.assign(dst_value_var, src_value_var) sync_ops.append(sync_op) return tf.group(*sync_ops, name=name)
def softmax_N(tensor, name=None): """Apply softmax across last dimension of a tensor. Args: tensor: Input tensor. name: Name for this op. If None, defaults to 'softmax_N'. Returns: A tensor with softmax-normalized values on the last dimension. """ with tf.op_scope([tensor], name, 'softmax_N'): exp_tensor = tf.exp(tensor) reduction_indices = [tensor.get_shape().ndims - 1] return tf.div(exp_tensor, tf.reduce_sum(exp_tensor, reduction_indices=reduction_indices, keep_dims=True))
def cos_distance(t1, t2, name=None): """Cos distance between t1 and t2 and caps the gradient of the Square Root. Args: t1: A tensor t2: A tensor that can be multiplied by t1. name: Optional name for this op. Returns: The cos distance between t1 and t2. """ with tf.op_scope([t1, t2], name, 'cos_distance') as scope: t1 = tf.convert_to_tensor(t1, name='t1') t2 = tf.convert_to_tensor(t2, name='t2') return tf.sub(1.0, dot_product(t1, t2) / capped_sqrt(length_squared(t1) * length_squared(t2)), name=scope)
def rntn_tensor_forward(a, b, V, name=None): with tf.op_scope([a, b, V], name, 'TensorForward') as scope: wvs = FLAGS.wvs a = tf.convert_to_tensor(a, dtype=tf.float32, name='a') b = tf.convert_to_tensor(b, dtype=tf.float32, name='b') V = tf.convert_to_tensor(V, dtype=tf.float32, name='V') ab = tf.concat(0, (a, b), name='ab') return tf.matmul( tf.transpose( tf.reshape( tf.matmul( tf.transpose(ab, name='ab.T'), tf.reshape(V, [wvs * 2, wvs * wvs * 2], name='inter/V_flattened'), name='inter/abTxV'), [wvs * 2, wvs], name='inter/prod/reshape'), name='inter/prod/transpose'), ab, name=scope)
def every_other(x, name=None): """Drops every other value from the tensor and returns a 1D tensor. This is useful if you are running multiple inputs through a model tower before splitting them and you want to line it up with some other data. Args: x: the target tensor. name: the name for this op, defaults to every_other Returns: A tensorflow op. """ with tf.op_scope([x], name, 'every_other') as scope: x = tf.convert_to_tensor(x, name='x') return tf.reshape(tf.slice(tf.reshape(x, [-1, 2]), [0, 0], [-1, 1]), [-1], name=scope)
def l2_normalize(x, dim, name=None): """l2 normalizes x and caps the gradient of the Square Root. Args: x: The tensor to normalize. dim: The dimension to normalize along. name: Optional name for this op. Returns: x normalized along dim. """ with tf.op_scope([x], name, 'l2_normalize') as scope: x = tf.convert_to_tensor(x, name='x') x = tf.verify_tensor_all_finite(x, 'Error at input %s' % scope) x_norm = capped_sqrt(tf.reduce_sum(tf.square(x), [dim], keep_dims=True)) return tf.verify_tensor_all_finite(tf.div(x, x_norm, name=scope), 'Error at %s' % scope)
def batch_masked_embedding_lookup_sum(emb, index, exclude_zero_index=True, name=None): """ @TODO need c++ op to really mask last dim zero feature vector now assume vector should zero filtered to be zero vector if not exclude_zero_index or will have to do lookup twice """ with tf.op_scope([emb, index], name, 'batch_masked_emb_lookup_sum'): lookup_result = tf.nn.embedding_lookup(emb, index) reduction_indices = len(index.get_shape()) - 1 if exclude_zero_index: masked_emb = mask2d(emb) mask_lookup_result = tf.nn.embedding_lookup(masked_emb, index) lookup_result = tf.mul(lookup_result, mask_lookup_result) return tf.reduce_sum(lookup_result, reduction_indices)
def pairwise_distance(x, y, scope=None): """Compute pairwise distance of a point cloud. Args: x: tensor (batch_size, num_points, num_dims) y: tensor (batch_size, num_points, num_dims) Returns: pairwise distance: (batch_size, num_points, num_points) """ with tf.op_scope([x, y], scope, 'pairwise_l2_norm2_batch'): y_T = tf.transpose(y, perm=[0, 2, 1]) x_y = -2 * tf.matmul(x, y_T) x_square = tf.reduce_sum(tf.square(x), axis=-1, keep_dims=True) y_square = tf.reduce_sum(tf.square(y), axis=-1, keep_dims=True) y_square_T = tf.transpose(y_square, perm=[0, 2, 1]) return x_square + x_y + y_square_T
def inference(data, num_classes, scope): with tf.op_scope([data], scope): with scopes.arg_scope([ops.conv2d, ops.fc, ops.dropout], is_training=True): with tf.variable_scope('fc1'): fc1 = ops.fc(data, num_units_out=2048, activation=tf.nn.sigmoid) with tf.variable_scope('fc2'): fc2 = ops.fc(fc1, num_units_out=2048, activation=tf.nn.sigmoid) with tf.variable_scope('fc3'): fc3 = ops.fc(fc2, num_units_out=2048, activation=tf.nn.sigmoid) with tf.variable_scope('fc4'): fc4 = ops.fc(fc3, num_units_out=2048, activation=tf.nn.sigmoid) with tf.variable_scope('fc5'): fc5 = ops.fc(fc4, num_units_out=num_classes, activation=None) return fc5
def apply_gradients(self, var_list, accum_grad_list, name=None): update_ops = [] with tf.device(self._device): with tf.control_dependencies(None): self._create_slots(var_list) with tf.op_scope([], name, self._name) as name: self._prepare() for var, accum_grad in zip(var_list, accum_grad_list): with tf.name_scope("update_" + var.op.name), tf.device( var.device): clipped_accum_grad = tf.clip_by_norm( accum_grad, self._clip_norm) update_ops.append( self._apply_dense(clipped_accum_grad, var)) return tf.group(*update_ops, name=name)
def pairwise_distancesx(x, y, scope=None): with tf.op_scope([x, y], scope, 'pairwise_distancesx'): size_x = tf.shape(x)[0] size_y = tf.shape(y)[0] xx = tf.expand_dims(x, -1) xx = tf.tile(xx, tf.stack([1, 1, size_y])) yy = tf.expand_dims(y, -1) yy = tf.tile(yy, tf.stack([1, 1, size_x])) yy = tf.transpose(yy, perm=[2, 1, 0]) diff = tf.subtract(xx, yy) square_diff = tf.square(diff) square_dist = tf.reduce_sum(square_diff, 1) return square_dist
def decode_jpeg_concat(self, image_buffer, scope=None): with tf.op_scope([image_buffer], scope, 'decode_jpeg'): length = image_buffer.get_shape()[0].value all = [] for i in range(length): decoded = tf.image.decode_jpeg( image_buffer[i], channels=3, ratio=FLAGS.decode_downsample_factor) all.append(tf.expand_dims(decoded, 0)) images = tf.concat(0, all) images.set_shape([ FLAGS.FRAMES_IN_SEG // FLAGS.temporal_downsample_factor, FLAGS.IM_HEIGHT / FLAGS.decode_downsample_factor, FLAGS.IM_WIDTH / FLAGS.decode_downsample_factor, 3 ]) return images
def distort_image(image, height, width, thread_id=0, scope=None): """ Distort one image for training a network. Distorting images provides a useful technique for augmenting the dataset during training in order to make the network invariant to aspects of the image that do not effect the label. Args: image: 3-D float Tensor of image height: integer width: integer thread_id: integer indicating the preprocessing thread. scope: Optional scope for op_scope. Returns: 3-D float Tensor of distorted image used for training. """ with tf.op_scope([image, height, width], scope, 'distort_image'): # This resizing operation may distort the images because the aspect # ratio is not respected. We select a resize method in a round robin # fashion based on the thread number. # Note that ResizeMethod contains 4 enumerated resizing methods. resize_method = thread_id % 4 distorted_image = tf.image.resize_images(image, height, width, resize_method) # This comment kept in case something breaks # Restore the shape since the dynamic slice based upon the bbox_size loses # the third dimension. distorted_image.set_shape([height, width, 3]) if not thread_id: tf.image_summary('cropped_resized_image', tf.expand_dims(distorted_image, 0)) # Randomly flip the image horizontally. distorted_image = tf.image.random_flip_left_right(distorted_image) # Randomly distort the colors. distorted_image = distort_color(distorted_image, thread_id) if not thread_id: tf.image_summary('final_distorted_image', tf.expand_dims(distorted_image, 0)) return distorted_image
def dropout(inputs, keep_prob=0.5, is_training=True, scope=None): """Returns a dropout layer applied to the input. Args: inputs: the tensor to pass to the Dropout layer. keep_prob: the probability of keeping each input unit. is_training: whether or not the model is in training mode. If so, dropout is applied and values scaled. Otherwise, inputs is returned. scope: Optional scope for op_scope. Returns: a tensor representing the output of the operation. """ if is_training and keep_prob > 0: with tf.op_scope([inputs], scope, 'Dropout'): return tf.nn.dropout(inputs, keep_prob) else: return inputs
def soften_labels(bool_labels, softness=0.05, scope='soften_labels'): """Converts boolean labels into float32. Args: bool_labels: Tensor with dtype `boolean` softness: The float value to use for False. 1 - softness is implicitly used for True scope: passed to op_scope Returns: Tensor with same shape as bool_labels with dtype `float32` and values 0.05 for False and 0.95 for True. """ with tf.op_scope([bool_labels, softness], scope): label_shape = tf.shape(bool_labels, name='label_shape') return tf.where(bool_labels, tf.fill(label_shape, 1.0 - softness, name='soft_true'), tf.fill(label_shape, softness, name='soft_false'))
def G_i_piecewise_variance(batch_size, total_timesteps, gamma=5): '''returns g(i) function for U(T) for diversity paper Input: total_timesteps: number of total timesteps made gamma: the number that you stop switch gamma to 0 Output: Value of G which is either a 0 or a 1 ''' with tf.op_scope(batch_size + total_timesteps + gamma, "G_i_piecewise_variance"): #make a list of the tensor you want #convert this list into a tensor tf.concat(1, [ones_matrix, zeros_matrix]) #it might be better to just concatenate two separate 0's and 1's matrices tf.constant
def l2_loss(tensor, weight=1.0, scope=None): """Define a L2Loss, useful for regularize, i.e. weight decay. Args: tensor: tensor to regularize. weight: an optional weight to modulate the loss. scope: Optional scope for op_scope. Returns: the L2 loss op. """ with tf.op_scope([tensor], scope, 'L2Loss'): weight = tf.convert_to_tensor(weight, dtype=tensor.dtype.base_dtype, name='loss_weight') loss = tf.multiply(weight, tf.nn.l2_loss(tensor), name='value') tf.add_to_collection(LOSSES_COLLECTION, loss) return loss
def pairwise_l2_norm2_batch(x, y, scope=None): with tf.op_scope([x, y], scope, 'pairwise_l2_norm2_batch'): nump_x = tf.shape(x)[1] nump_y = tf.shape(y)[1] xx = tf.expand_dims(x, -1) xx = tf.tile(xx, tf.stack([1, 1, 1, nump_y])) yy = tf.expand_dims(y, -1) yy = tf.tile(yy, tf.stack([1, 1, 1, nump_x])) yy = tf.transpose(yy, perm=[0, 3, 2, 1]) diff = tf.subtract(xx, yy) square_diff = tf.square(diff) square_dist = tf.reduce_sum(square_diff, 2) return square_dist
def add_gradient_noise(t, stddev=1e-3, name=None): """ Adds gradient noise as described in http://arxiv.org/abs/1511.06807 [2]. The input Tensor `t` should be a gradient. The output will be `t` + gaussian noise. 0.001 was said to be a good fixed value for memory networks [2]. """ # with tf.op_scope([t, stddev], name, "add_gradient_noise") as name: # with tf.name_scope(name, "add_gradient_noise", [t, stddev]) as name: with op_scope(values=[t, stddev], name=name, default_name="add_gradient_noise") as name: t = tf.convert_to_tensor(t, name="t") gn = tf.random_normal(tf.shape(t), stddev=stddev) return tf.add(t, gn, name=name)