def testSampledSoftmaxLoss(self): # A simple test to verify the numerics. def _SoftmaxCrossEntropyWithLogits(logits, targets): # logits, targets: float arrays of the same shape. assert logits.shape == targets.shape stable_exp_logits = np.exp(logits - np.amax(logits, axis=1, keepdims=True)) pred = stable_exp_logits / np.sum(stable_exp_logits, 1, keepdims=True) return -np.sum(targets * np.log(pred + 1.0e-20), axis=1) weights, biases, hidden_acts, sharded_weights = self._GenerateTestInputs() labels = [0, 1, 2] true_w, true_b = weights[labels], biases[labels] sampled = [1, 0, 2, 3] num_sampled = len(sampled) true_exp = np.full([self._batch_size, 1], fill_value=0.5, dtype=np.float32) sampled_exp = np.full([num_sampled], fill_value=0.5, dtype=np.float32) sampled_w, sampled_b = weights[sampled], biases[sampled] test_sampled_vals = (sampled, true_exp, sampled_exp) with self.test_session(): logits_np, labels_np = self._ComputeSampledLogitsNP( true_w, true_b, sampled_w, sampled_b, hidden_acts, true_expected=true_exp, sampled_expected=sampled_exp ) sampled_softmax_loss_np = _SoftmaxCrossEntropyWithLogits(logits_np, labels_np) labels_tf = constant_op.constant(labels, shape=(self._batch_size, 1)) weights_tf = constant_op.constant(weights) biases_tf = constant_op.constant(biases) inputs_tf = constant_op.constant(hidden_acts) sampled_softmax_loss_tf = nn.sampled_softmax_loss( weights_tf, biases_tf, inputs_tf, labels_tf, num_sampled=1, num_classes=self._num_classes, num_true=1, sampled_values=test_sampled_vals, remove_accidental_hits=False, ) self.assertAllClose(sampled_softmax_loss_np, sampled_softmax_loss_tf.eval(), 1e-4) # Test with sharded weights sampled_softmax_loss_tf = nn.sampled_softmax_loss( [constant_op.constant(shard) for shard in sharded_weights], biases_tf, inputs_tf, labels_tf, num_sampled=1, num_classes=self._num_classes, num_true=1, sampled_values=test_sampled_vals, remove_accidental_hits=False, ) self.assertAllClose(sampled_softmax_loss_np, sampled_softmax_loss_tf.eval(), 1e-4)
def _testCompareWithNN(self, weights, biases, partition_strategy): with ops.Graph().as_default(): loss = sampling_ops.rank_sampled_softmax_loss( weights=weights(), biases=biases(), labels=self._labels(), inputs=self._inputs(), num_sampled=self._num_sampled, num_resampled=self._num_resampled, num_classes=self._num_classes, num_true=self._num_true, sampled_values=self._sampled_values, resampling_temperature=1., remove_accidental_hits=self._remove_accidental_hits, partition_strategy=partition_strategy) loss_nn = nn.sampled_softmax_loss( weights=weights(), biases=biases(), labels=self._labels(), inputs=self._inputs(), num_sampled=self._num_resampled, num_classes=self._num_classes, num_true=self._num_true, sampled_values=self._resampled_values, remove_accidental_hits=self._remove_accidental_hits, partition_strategy=partition_strategy) with self.cached_session() as sess: loss_val = sess.run(loss) loss_nn_val = sess.run(loss_nn) self.assertAllClose(loss_val, loss_nn_val)
def _testCompareWithNN(self, weights, biases, partition_strategy): with ops.Graph().as_default(): loss = sampling_ops.rank_sampled_softmax_loss( weights=weights(), biases=biases(), labels=self._labels(), inputs=self._inputs(), num_sampled=self._num_sampled, num_resampled=self._num_resampled, num_classes=self._num_classes, num_true=self._num_true, sampled_values=self._sampled_values, resampling_temperature=1., remove_accidental_hits=self._remove_accidental_hits, partition_strategy=partition_strategy) loss_nn = nn.sampled_softmax_loss( weights=weights(), biases=biases(), labels=self._labels(), inputs=self._inputs(), num_sampled=self._num_resampled, num_classes=self._num_classes, num_true=self._num_true, sampled_values=self._resampled_values, remove_accidental_hits=self._remove_accidental_hits, partition_strategy=partition_strategy) with self.test_session() as sess: loss_val = sess.run(loss) loss_nn_val = sess.run(loss_nn) self.assertAllClose(loss_val, loss_nn_val)
def testSampledSoftmaxLoss(self): # A simple test to verify the numerics. def _SoftmaxCrossEntropyWithLogits(logits, targets): # logits, targets: float arrays of the same shape. assert logits.shape == targets.shape stable_exp_logits = np.exp(logits - np.amax(logits, axis=1, keepdims=True)) pred = stable_exp_logits / np.sum( stable_exp_logits, 1, keepdims=True) return -np.sum(targets * np.log(pred + 1.0e-20), axis=1) weights, biases, hidden_acts = self._GenerateTestInputs() labels = [0, 1, 2] true_w, true_b = weights[labels], biases[labels] sampled = [1, 0, 2, 3] num_sampled = len(sampled) true_exp = np.full([self._batch_size, 1], fill_value=0.5, dtype=np.float32) sampled_exp = np.full([num_sampled], fill_value=0.5, dtype=np.float32) sampled_w, sampled_b = weights[sampled], biases[sampled] test_sampled_vals = (sampled, true_exp, sampled_exp) with self.test_session(): logits_np, labels_np = self._ComputeSampledLogitsNP( true_w, true_b, sampled_w, sampled_b, hidden_acts, true_expected=true_exp, sampled_expected=sampled_exp) sampled_softmax_loss_np = _SoftmaxCrossEntropyWithLogits( logits_np, labels_np) labels_tf = constant_op.constant(labels, shape=(self._batch_size, 1)) weights_tf = constant_op.constant(weights) biases_tf = constant_op.constant(biases) inputs_tf = constant_op.constant(hidden_acts) sampled_softmax_loss_tf = nn.sampled_softmax_loss( weights_tf, biases_tf, inputs_tf, labels_tf, num_sampled=1, num_classes=self._num_classes, num_true=1, sampled_values=test_sampled_vals, remove_accidental_hits=False) self.assertAllClose(sampled_softmax_loss_np, sampled_softmax_loss_tf.eval(), 1e-4)
def _testCompareWithNNTemperature(self, temperature, resampled): weights = [[1., 2.], [3., 4.]] # two sampled classes inputs = [[6., -5. / 2.], [-11., 21. / 2.]] # Let w0, w1 = weights of sampled classes (biases set to 0 for simplicity) # Let x0, x1 = inputs # logits: # w0.x0 = 1 # w0.x1 = 10 # w1.x0 = 8 # w1.x1 = 9 # Resampling 1 class with temperature = t will pick the larger of: # exp(1/t) + exp(10/t) ==> w0, for values of t < 2.12 # exp(8/t) + exp(9/t) ==> w1, for values of t > 2.13 num_sampled = 2 num_resampled = 1 num_classes = 2 num_true = 1 sampled_values = [0, 1], [[1.], [1.]], [1., 1.] resampled_values = [resampled], [[1.], [1.]], [1.] remove_accidental_hits = False with ops.Graph().as_default(): weights = constant_op.constant(weights) biases = constant_op.constant([0., 0.]) labels = constant_op.constant([[0], [1]], dtype=dtypes.int64) inputs = constant_op.constant(inputs) loss = sampling_ops.rank_sampled_softmax_loss( weights=weights, biases=biases, labels=labels, inputs=inputs, num_sampled=num_sampled, num_resampled=num_resampled, num_classes=num_classes, num_true=num_true, sampled_values=sampled_values, resampling_temperature=constant_op.constant(temperature), remove_accidental_hits=remove_accidental_hits, partition_strategy='div') loss_nn = nn.sampled_softmax_loss( weights=weights, biases=biases, labels=labels, inputs=inputs, num_sampled=num_resampled, num_classes=num_classes, num_true=num_true, sampled_values=resampled_values, remove_accidental_hits=remove_accidental_hits, partition_strategy='div') with self.cached_session() as sess: loss_val = sess.run(loss) loss_nn_val = sess.run(loss_nn) self.assertAllClose(loss_val, loss_nn_val)
def _testCompareWithNNTemperature(self, temperature, resampled): weights = [[1., 2.], [3., 4.]] # two sampled classes inputs = [[6., -5. / 2.], [-11., 21. / 2.]] # Let w0, w1 = weights of sampled classes (biases set to 0 for simplicity) # Let x0, x1 = inputs # logits: # w0.x0 = 1 # w0.x1 = 10 # w1.x0 = 8 # w1.x1 = 9 # Resampling 1 class with temperature = t will pick the larger of: # exp(1/t) + exp(10/t) ==> w0, for values of t < 2.12 # exp(8/t) + exp(9/t) ==> w1, for values of t > 2.13 num_sampled = 2 num_resampled = 1 num_classes = 2 num_true = 1 sampled_values = [0, 1], [[1.], [1.]], [1., 1.] resampled_values = [resampled], [[1.], [1.]], [1.] remove_accidental_hits = False with ops.Graph().as_default(): weights = constant_op.constant(weights) biases = constant_op.constant([0., 0.]) labels = constant_op.constant([[0], [1]], dtype=dtypes.int64) inputs = constant_op.constant(inputs) loss = sampling_ops.rank_sampled_softmax_loss( weights=weights, biases=biases, labels=labels, inputs=inputs, num_sampled=num_sampled, num_resampled=num_resampled, num_classes=num_classes, num_true=num_true, sampled_values=sampled_values, resampling_temperature=constant_op.constant(temperature), remove_accidental_hits=remove_accidental_hits, partition_strategy='div') loss_nn = nn.sampled_softmax_loss( weights=weights, biases=biases, labels=labels, inputs=inputs, num_sampled=num_resampled, num_classes=num_classes, num_true=num_true, sampled_values=resampled_values, remove_accidental_hits=remove_accidental_hits, partition_strategy='div') with self.test_session() as sess: loss_val = sess.run(loss) loss_nn_val = sess.run(loss_nn) self.assertAllClose(loss_val, loss_nn_val)
def rank_sampled_softmax_loss(weights, biases, labels, inputs, num_sampled, num_resampled, num_classes, num_true, sampled_values, resampling_temperature, remove_accidental_hits, partition_strategy, name=None): """Computes softmax loss using rank-based adaptive resampling. This has been shown to improve rank loss after training compared to `tf.nn.sampled_softmax_loss`. For a description of the algorithm and some experimental results, please see: [TAPAS: Two-pass Approximate Adaptive Sampling for Softmax](https://arxiv.org/abs/1707.03073). Sampling follows two phases: * In the first phase, `num_sampled` classes are selected using `tf.nn.learned_unigram_candidate_sampler` or supplied `sampled_values`. The logits are calculated on those sampled classes. This phases is similar to `tf.nn.sampled_softmax_loss`. * In the second phase, the `num_resampled` classes with highest predicted probability are kept. Probabilities are `LogSumExp(logits / resampling_temperature)`, where the sum is over `inputs`. The `resampling_temperature` parameter controls the "adaptiveness" of the resampling. At lower temperatures, resampling is more adaptive because it picks more candidates close to the predicted classes. A common strategy is to decrease the temperature as training proceeds. See `tf.nn.sampled_softmax_loss` for more documentation on sampling and for typical default values for some of the parameters. This operation is for training only. It is generally an underestimate of the full softmax loss. A common use case is to use this method for training, and calculate the full softmax loss for evaluation or inference. In this case, you must set `partition_strategy="div"` for the two losses to be consistent, as in the following example: ```python if mode == "train": loss = rank_sampled_softmax_loss( weights=weights, biases=biases, labels=labels, inputs=inputs, ..., partition_strategy="div") elif mode == "eval": logits = tf.matmul(inputs, tf.transpose(weights)) logits = tf.nn.bias_add(logits, biases) labels_one_hot = tf.one_hot(labels, n_classes) loss = tf.nn.softmax_cross_entropy_with_logits( labels=labels_one_hot, logits=logits) ``` Args: weights: A `Tensor` or `PartitionedVariable` of shape `[num_classes, dim]`, or a list of `Tensor` objects whose concatenation along dimension 0 has shape [num_classes, dim]. The (possibly-sharded) class embeddings. biases: A `Tensor` or `PartitionedVariable` of shape `[num_classes]`. The (possibly-sharded) class biases. labels: A `Tensor` of type `int64` and shape `[batch_size, num_true]`. The target classes. Note that this format differs from the `labels` argument of `nn.softmax_cross_entropy_with_logits`. inputs: A `Tensor` of shape `[batch_size, dim]`. The forward activations of the input network. num_sampled: An `int`. The number of classes to randomly sample per batch. num_resampled: An `int`. The number of classes to select from the `num_sampled` classes using the adaptive resampling algorithm. Must be less than `num_sampled`. num_classes: An `int`. The number of possible classes. num_true: An `int`. The number of target classes per training example. sampled_values: A tuple of (`sampled_candidates`, `true_expected_count`, `sampled_expected_count`) returned by a `*_candidate_sampler` function. If None, default to `nn.learned_unigram_candidate_sampler`. resampling_temperature: A scalar `Tensor` with the temperature parameter for the adaptive resampling algorithm. remove_accidental_hits: A `bool`. Whether to remove "accidental hits" where a sampled class equals one of the target classes. partition_strategy: A string specifying the partitioning strategy, relevant if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported. See `tf.nn.embedding_lookup` for more details. name: A name for the operation (optional). Returns: A `batch_size` 1-D tensor of per-example sampled softmax losses. Raises: ValueError: If `num_sampled <= num_resampled`. """ if num_sampled > num_classes: raise ValueError("num_sampled ({}) cannot be greater than num_classes ({})". format(num_sampled, num_classes)) if num_sampled <= num_resampled: raise ValueError("num_resampled ({}) must be less than num_sampled ({})". format(num_resampled, num_sampled)) if partition_strategy not in ("div", "mod"): raise ValueError( "unsupported partition_strategy ({})".format(partition_strategy)) with ops.name_scope(name, "rank_sampled_softmax_loss", [ weights, biases, labels, inputs, sampled_values, resampling_temperature ]) as name: if not sampled_values: sampled_values = nn.learned_unigram_candidate_sampler( true_classes=labels, num_true=num_true, num_sampled=num_sampled, unique=True, range_max=num_classes) # From sampled_values, select the top num_resampled values using the # adaptive rank resampling strategy. resampled_values = _rank_resample(weights, biases, inputs, sampled_values, num_resampled, resampling_temperature, partition_strategy) return nn.sampled_softmax_loss( weights=weights, biases=biases, labels=labels, inputs=inputs, num_sampled=num_resampled, num_classes=num_classes, num_true=num_true, sampled_values=resampled_values, remove_accidental_hits=remove_accidental_hits, partition_strategy=partition_strategy, name=name)
def rank_sampled_softmax_loss(weights, biases, labels, inputs, num_sampled, num_resampled, num_classes, num_true, sampled_values, resampling_temperature, remove_accidental_hits, partition_strategy, name=None): """Computes softmax loss using rank-based adaptive resampling. This has been shown to improve rank loss after training compared to `tf.nn.sampled_softmax_loss`. For a description of the algorithm and some experimental results, please see: [TAPAS: Two-pass Approximate Adaptive Sampling for Softmax](https://arxiv.org/abs/1707.03073). Sampling follows two phases: * In the first phase, `num_sampled` classes are selected using `tf.nn.learned_unigram_candidate_sampler` or supplied `sampled_values`. The logits are calculated on those sampled classes. This phases is similar to `tf.nn.sampled_softmax_loss`. * In the second phase, the `num_resampled` classes with highest predicted probability are kept. Probabilities are `LogSumExp(logits / resampling_temperature)`, where the sum is over `inputs`. The `resampling_temperature` parameter controls the "adaptiveness" of the resampling. At lower temperatures, resampling is more adaptive because it picks more candidates close to the predicted classes. A common strategy is to decrease the temperature as training proceeds. See `tf.nn.sampled_softmax_loss` for more documentation on sampling and for typical default values for some of the parameters. This operation is for training only. It is generally an underestimate of the full softmax loss. A common use case is to use this method for training, and calculate the full softmax loss for evaluation or inference. In this case, you must set `partition_strategy="div"` for the two losses to be consistent, as in the following example: ```python if mode == "train": loss = rank_sampled_softmax_loss( weights=weights, biases=biases, labels=labels, inputs=inputs, ..., partition_strategy="div") elif mode == "eval": logits = tf.matmul(inputs, tf.transpose(weights)) logits = tf.nn.bias_add(logits, biases) labels_one_hot = tf.one_hot(labels, n_classes) loss = tf.nn.softmax_cross_entropy_with_logits( labels=labels_one_hot, logits=logits) ``` Args: weights: A `Tensor` or `PartitionedVariable` of shape `[num_classes, dim]`, or a list of `Tensor` objects whose concatenation along dimension 0 has shape [num_classes, dim]. The (possibly-sharded) class embeddings. biases: A `Tensor` or `PartitionedVariable` of shape `[num_classes]`. The (possibly-sharded) class biases. labels: A `Tensor` of type `int64` and shape `[batch_size, num_true]`. The target classes. Note that this format differs from the `labels` argument of `nn.softmax_cross_entropy_with_logits`. inputs: A `Tensor` of shape `[batch_size, dim]`. The forward activations of the input network. num_sampled: An `int`. The number of classes to randomly sample per batch. num_resampled: An `int`. The number of classes to select from the `num_sampled` classes using the adaptive resampling algorithm. Must be less than `num_sampled`. num_classes: An `int`. The number of possible classes. num_true: An `int`. The number of target classes per training example. sampled_values: A tuple of (`sampled_candidates`, `true_expected_count`, `sampled_expected_count`) returned by a `*_candidate_sampler` function. If None, default to `nn.learned_unigram_candidate_sampler`. resampling_temperature: A scalar `Tensor` with the temperature parameter for the adaptive resampling algorithm. remove_accidental_hits: A `bool`. Whether to remove "accidental hits" where a sampled class equals one of the target classes. partition_strategy: A string specifying the partitioning strategy, relevant if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported. See `tf.nn.embedding_lookup` for more details. name: A name for the operation (optional). Returns: A `batch_size` 1-D tensor of per-example sampled softmax losses. Raises: ValueError: If `num_sampled <= num_resampled`. """ if num_sampled > num_classes: raise ValueError( "num_sampled ({}) cannot be greater than num_classes ({})".format( num_sampled, num_classes)) if num_sampled <= num_resampled: raise ValueError( "num_resampled ({}) must be less than num_sampled ({})".format( num_resampled, num_sampled)) if partition_strategy not in ("div", "mod"): raise ValueError( "unsupported partition_strategy ({})".format(partition_strategy)) with ops.name_scope(name, "rank_sampled_softmax_loss", [ weights, biases, labels, inputs, sampled_values, resampling_temperature ]) as name: if not sampled_values: sampled_values = nn.learned_unigram_candidate_sampler( true_classes=labels, num_true=num_true, num_sampled=num_sampled, unique=True, range_max=num_classes) # From sampled_values, select the top num_resampled values using the # adaptive rank resampling strategy. resampled_values = _rank_resample(weights, biases, inputs, sampled_values, num_resampled, resampling_temperature, partition_strategy) return nn.sampled_softmax_loss( weights=weights, biases=biases, labels=labels, inputs=inputs, num_sampled=num_resampled, num_classes=num_classes, num_true=num_true, sampled_values=resampled_values, remove_accidental_hits=remove_accidental_hits, partition_strategy=partition_strategy, name=name)
def dnn_sampled_softmax_classifier_model_fn(features, target_indices, mode, params): """model_fn that uses candidate sampling. Args: features: Single Tensor or dict of Tensor (depends on data passed to `fit`) target_indices: A single Tensor of shape [batch_size, n_labels] containing the target indices. mode: Represents if this training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters that are listed below. hidden_units- List of hidden units per layer. All layers are fully connected. Ex. `[64, 32]` means first layer has 64 nodes and second one has 32. feature_columns- An iterable containing all the feature columns used by the model. All items in the set should be instances of classes derived from `FeatureColumn`. n_classes- number of target classes. It must be greater than 2. n_samples- number of sample target classes. Needs to be tuned - A good starting point could be 2% of n_classes. n_labels- number of labels in each example. top_k- The number of classes to predict. optimizer- An instance of `tf.Optimizer` used to train the model. If `None`, will use an Adagrad optimizer. dropout- When not `None`, the probability we will drop out a given coordinate. gradient_clip_norm- A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. See tf.clip_by_global_norm for more details. num_ps_replicas- The number of parameter server replicas. Returns: predictions: A single Tensor or a dict of Tensors. loss: A scalar containing the loss of the step. train_op: The op for training. """ hidden_units = params["hidden_units"] feature_columns = params["feature_columns"] n_classes = params["n_classes"] n_samples = params["n_samples"] n_labels = params["n_labels"] top_k = params["top_k"] optimizer = params["optimizer"] dropout = params["dropout"] gradient_clip_norm = params["gradient_clip_norm"] num_ps_replicas = params["num_ps_replicas"] parent_scope = "dnn_ss" # Setup the input layer partitioner. input_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) # Create the input layer. with variable_scope.variable_scope( parent_scope + "/input_from_feature_columns", features.values(), partitioner=input_layer_partitioner) as scope: net = layers.input_from_feature_columns( features, feature_columns, weight_collections=[parent_scope], scope=scope) # Setup the hidden layer partitioner. hidden_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas)) final_hidden_layer_dim = None # Create hidden layers using fully_connected. for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( parent_scope + "/hiddenlayer_%d" % layer_id, [net], partitioner=hidden_layer_partitioner) as scope: net = layers.fully_connected(net, num_hidden_units, variables_collections=[parent_scope], scope=scope) final_hidden_layer_dim = num_hidden_units # Add dropout if it is enabled. if dropout is not None and mode == estimator.ModeKeys.TRAIN: net = layers.dropout(net, keep_prob=(1.0 - dropout)) # Create the weights and biases for the logit layer. with variable_scope.variable_scope( parent_scope + "/logits", [net], partitioner=hidden_layer_partitioner) as scope: dtype = net.dtype.base_dtype weights_shape = [n_classes, final_hidden_layer_dim] weights = variables.model_variable( "weights", shape=weights_shape, dtype=dtype, initializer=initializers.xavier_initializer(), trainable=True, collections=[parent_scope]) biases = variables.model_variable( "biases", shape=[n_classes,], dtype=dtype, initializer=init_ops.zeros_initializer, trainable=True, collections=[parent_scope]) if mode == estimator.ModeKeys.TRAIN: # Call the candidate sampling APIs and calculate the loss. sampled_values = nn.learned_unigram_candidate_sampler( true_classes=math_ops.to_int64(target_indices), num_true=n_labels, num_sampled=n_samples, unique=True, range_max=n_classes) sampled_softmax_loss = nn.sampled_softmax_loss( weights=weights, biases=biases, inputs=net, labels=math_ops.to_int64(target_indices), num_sampled=n_samples, num_classes=n_classes, num_true=n_labels, sampled_values=sampled_values) loss = math_ops.reduce_mean(sampled_softmax_loss, name="loss") train_op = optimizers.optimize_loss( loss=loss, global_step=contrib_framework.get_global_step(), learning_rate=_DEFAULT_LEARNING_RATE, optimizer=_get_optimizer(optimizer), clip_gradients=gradient_clip_norm, name=parent_scope) return None, loss, train_op elif mode == estimator.ModeKeys.EVAL: logits = nn.bias_add(standard_ops.matmul(net, array_ops.transpose(weights)), biases) predictions = {} predictions[_PROBABILITIES] = nn.softmax(logits) predictions[_CLASSES] = math_ops.argmax(logits, 1) _, predictions[_TOP_K] = nn.top_k(logits, top_k) # Since the targets have multiple labels, setup the target probabilities # as 1.0/n_labels for each of the labels. target_one_hot = array_ops.one_hot( indices=target_indices, depth=n_classes, on_value=1.0 / n_labels) target_one_hot = math_ops.reduce_sum( input_tensor=target_one_hot, reduction_indices=[1]) loss = math_ops.reduce_mean( nn.softmax_cross_entropy_with_logits(logits, target_one_hot)) return predictions, loss, None elif mode == estimator.ModeKeys.INFER: logits = nn.bias_add(standard_ops.matmul(net, array_ops.transpose(weights)), biases) predictions = {} predictions[_PROBABILITIES] = nn.softmax(logits) predictions[_CLASSES] = math_ops.argmax(logits, 1) _, predictions[_TOP_K] = nn.top_k(logits, top_k) return predictions, None, None