Example #1
0
  def testGradientFloat16(self):
    with self.test_session(use_gpu=True) as sess:
      # Randomly construct a 1D shape from [1, 40)
      shape = random_ops.random_uniform(
          [1], minval=1, maxval=40, dtype=dtypes.int32)

      # Construct the fp32 graph and its gradient.
      x = random_ops.random_uniform(shape, minval=-1, maxval=1, name="x")
      y1 = nn_ops.relu(x, name="relu_fp32")
      l1 = nn_ops.l2_loss(y1)
      dx_f32 = gradients_impl.gradients(l1, x)

      # Construct the fp16 graph and its gradient.
      # It starts with the same x, in fp32. But before it reaches Relu, it is
      # cast into fp16. So during backprop, the gradient computation is in fp16.
      x2 = math_ops.cast(x, dtype=dtypes.float16, name="cast")
      y2 = nn_ops.relu(x2, name="relu_fp16")
      l2 = nn_ops.l2_loss(y2)
      dx_f16 = gradients_impl.gradients(l2, x)

      # Repeat the experiment for 100 times. All tensor shapes and its tensor
      # values are randomly generated for each run.
      for _ in xrange(100):
        dx_f32_v, dx_f16_v = sess.run([dx_f32, dx_f16])
        self.assertAllClose(dx_f32_v, dx_f16_v, atol=3e-4)
  def doTestExportNestedNames(self, use_resource=False):
    graph1 = ops.Graph()
    with graph1.as_default():
      with ops.name_scope("hidden1/hidden2/hidden3"):
        images = constant_op.constant(
            1.0, dtypes.float32, shape=[3, 2], name="images")
        if use_resource:
          weights1 = variables.Variable(
              [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], name="weights")
          biases1 = resource_variable_ops.ResourceVariable(
              [0.1] * 3, name="biases")
        else:
          biases1 = variables.Variable([0.1] * 3, name="biases")
          weights1 = variables.Variable(
              [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], name="weights")
        nn_ops.relu(math_ops.matmul(images, weights1) + biases1, name="relu")

    orig_meta_graph, var_list = meta_graph.export_scoped_meta_graph(
        export_scope="hidden1/hidden2", graph=graph1)
    var_names = [v.name for _, v in var_list.items()]
    self.assertEqual(["hidden3/biases:0", "hidden3/weights:0"],
                     sorted(var_list.keys()))
    self.assertEqual([
        "hidden1/hidden2/hidden3/biases:0", "hidden1/hidden2/hidden3/weights:0"
    ], sorted(var_names))
    for node in orig_meta_graph.graph_def.node:
      self.assertTrue(node.name.startswith("hidden3"))

    graph2 = ops.Graph()
    new_var_list = meta_graph.import_scoped_meta_graph(
        orig_meta_graph, import_scope="new_hidden1/new_hidden2", graph=graph2)
    self.assertEqual(["hidden3/biases:0", "hidden3/weights:0"],
                     sorted(new_var_list.keys()))
    new_var_names = [v.name for _, v in new_var_list.items()]
    self.assertEqual([
        "new_hidden1/new_hidden2/hidden3/biases:0",
        "new_hidden1/new_hidden2/hidden3/weights:0"
    ], sorted(new_var_names))

    nodes = [
        "new_hidden1/new_hidden2/hidden3/biases/Assign",
        "new_hidden1/new_hidden2/hidden3/weights/Assign"
    ]
    expected = [
        b"loc:@new_hidden1/new_hidden2/hidden3/biases",
        b"loc:@new_hidden1/new_hidden2/hidden3/weights"
    ]
    for n, e in zip(nodes, expected):
      self.assertEqual([e], graph2.get_operation_by_name(n).get_attr("_class"))
  def testSmallNetwork(self):
    image = array_ops.placeholder(dtypes.float32, shape=[1, 28, 28, 1])
    label = array_ops.placeholder(dtypes.float32, shape=[1, 10])
    w = variables.Variable(
        random_ops.truncated_normal([5, 5, 1, 32], stddev=0.1))
    b = variables.Variable(random_ops.truncated_normal([32], stddev=0.1))
    conv = nn_ops.conv2d(image, w, strides=[1, 1, 1, 1], padding="SAME")
    h_conv = nn_ops.relu(conv + b)
    h_conv_flat = array_ops.reshape(h_conv, [1, -1])

    w_fc = variables.Variable(
        random_ops.truncated_normal([25088, 10], stddev=0.1))
    b_fc = variables.Variable(random_ops.truncated_normal([10], stddev=0.1))
    y_conv = nn_ops.softmax(math_ops.matmul(h_conv_flat, w_fc) + b_fc)

    cross_entropy = math_ops.reduce_mean(-math_ops.reduce_sum(
        label * math_ops.log(y_conv), reduction_indices=[1]))
    _ = adam.AdamOptimizer(1e-4).minimize(cross_entropy)

    mg = meta_graph.create_meta_graph_def(graph=ops.get_default_graph())
    report = cost_analyzer.GenerateCostReport(mg)

    self.assertTrue(b"MatMul" in report)
    self.assertTrue(b"ApplyAdam" in report)
    self.assertTrue(b"Conv2D" in report)
    self.assertTrue(b"Conv2DBackpropInput" in report)
    self.assertTrue(b"Conv2DBackpropFilter" in report)
    self.assertTrue(b"Softmax" in report)

    # Also print the report to make it easier to debug
    print("{}".format(report))
def bottleneck_hole(inputs,
               depth,
               depth_bottleneck,
               stride,
               rate=2,
               outputs_collections=None,
               scope=None):
  with variable_scope.variable_scope(scope, 'bottleneck_v1', [inputs]) as sc:
    depth_in = utils.last_dimension(inputs.get_shape(), min_rank=4)
    if depth == depth_in:
      shortcut = resnet_utils.subsample(inputs, stride, 'shortcut')
    else:
      shortcut = layers.conv2d(
          inputs,
          depth, [1, 1],
          stride=stride,
          activation_fn=None,
          scope='shortcut')

    residual = layers.conv2d(
        inputs, depth_bottleneck, [1, 1], stride=1, scope='conv1')
    residual = layers_lib.conv2d(residual, depth_bottleneck, [3, 3], stride=1, rate=rate, padding='SAME', scope='conv2')
    residual = layers.conv2d(
        residual, depth, [1, 1], stride=1, activation_fn=None, scope='conv3')

    output = nn_ops.relu(shortcut + residual)

    return utils.collect_named_outputs(outputs_collections, sc.name, output)
def hinge_loss(logits, labels=None, scope=None, target=None):
  """Method that returns the loss tensor for hinge loss.

  Args:
    logits: The logits, a float tensor.
    labels: The ground truth output tensor. Its shape should match the shape of
      logits. The values of the tensor are expected to be 0.0 or 1.0.
    scope: The scope for the operations performed in computing the loss.
    target: Deprecated alias for `labels`.

  Returns:
    A `Tensor` of same shape as logits and target representing the loss values
      across the batch.

  Raises:
    ValueError: If the shapes of `logits` and `labels` don't match.
  """
  labels = _labels(labels, target)
  with ops.name_scope(scope, "hinge_loss", [logits, labels]) as scope:
    logits.get_shape().assert_is_compatible_with(labels.get_shape())
    # We first need to convert binary labels to -1/1 labels (as floats).
    labels = math_ops.to_float(labels)
    all_ones = array_ops.ones_like(labels)
    labels = math_ops.sub(2 * labels, all_ones)
    return nn_ops.relu(math_ops.sub(all_ones, math_ops.mul(labels, logits)))
Example #6
0
  def test(self):
    np.random.seed(1)  # Make it reproducible.
    x = np.random.randn(3, 4).astype(np.float32)
    y = np.maximum(x, 0.0)

    z = self.evaluate(nn_ops.relu(constant_op.constant(x)))
    self.assertAllEqual(y, z)
Example #7
0
def hinge_loss(logits, labels=None, scope=None):
  """Method that returns the loss tensor for hinge loss.

  Args:
    logits: The logits, a float tensor. Note that logits are assumed to be
      unbounded and 0-centered. A value > 0 (resp. < 0) is considered a positive
      (resp. negative) binary prediction.
    labels: The ground truth output tensor. Its shape should match the shape of
      logits. The values of the tensor are expected to be 0.0 or 1.0. Internally
      the {0,1} labels are converted to {-1,1} when calculating the hinge loss.
    scope: The scope for the operations performed in computing the loss.

  Returns:
    An unweighted `Tensor` of same shape as `logits` and `labels` representing
    the
      loss values across the batch.

  Raises:
    ValueError: If the shapes of `logits` and `labels` don't match.
  """
  with ops.name_scope(scope, "hinge_loss", [logits, labels]) as scope:
    logits.get_shape().assert_is_compatible_with(labels.get_shape())
    # We first need to convert binary labels to -1/1 labels (as floats).
    labels = math_ops.to_float(labels)
    all_ones = array_ops.ones_like(labels)
    labels = math_ops.subtract(2 * labels, all_ones)
    return nn_ops.relu(
        math_ops.subtract(all_ones, math_ops.multiply(labels, logits)))
Example #8
0
 def test(self):
   np.random.seed(1)  # Make it reproducible.
   x = np.random.randn(3, 4).astype(np.float32)
   y = np.maximum(x, 0.0)
   with self.test_session():
     z = nn_ops.relu(constant_op.constant(x)).eval()
     self.assertAllEqual(y, z)
Example #9
0
  def testBatchNormScope(self):
    batch_size, height, width, depth = 5, 128, 128, 3
    g = ops.Graph()
    with g.as_default():
      inputs = array_ops.zeros((batch_size, height, width, depth))
      stride = 1
      out_depth = 32
      scope = ''
      node = conv2d(
          inputs,
          out_depth, [2, 2],
          stride=stride,
          padding='SAME',
          weights_initializer=self._WeightInit(0.09),
          activation_fn=None,
          normalizer_fn=batch_norm,
          normalizer_params=self._BatchNormParams(False),
          scope=scope)

      node = nn_ops.relu(node, name='Relu6')
    bn_list = common.BatchNormGroups(g)
    with open('/tmp/common_test.pbtxt', 'w') as f:
      f.write(str(g.as_graph_def()))

  # Exactly one batch norm layer with empty scope should be found
    self.assertEqual(len(bn_list), 1)
    self.assertEqual(bn_list[0], '')
Example #10
0
 def testNaNs(self):
   # Test that relu(nan) = nan for various sizes.
   for i in range(18):
     x = np.zeros(i) + np.nan
     with self.test_session():
       z = nn_ops.relu(constant_op.constant(x)).eval()
       self.assertTrue(np.isnan(z).all())
Example #11
0
  def testGradient(self):
    with ops.Graph().as_default() as g:
      inputs = array_ops.placeholder(
          dtypes.float32, shape=[None, 100], name="input")
      weights = array_ops.placeholder(
          dtypes.float32, shape=[100, 10], name="weights")
      biases = array_ops.placeholder(dtypes.float32, shape=[10], name="biases")
      activations = nn_ops.relu(
          math_ops.matmul(inputs, weights) + biases, name="activations")
      loss = math_ops.reduce_mean(activations, name="loss")
    gdef = g.as_graph_def()

    with ops.Graph().as_default() as g:
      input_placeholder = array_ops.placeholder(dtypes.float32, shape=[32, 100])
      weights_var = variables.Variable(
          random_ops.truncated_normal([100, 10]), name="weights")
      biases_var = variables.Variable(array_ops.zeros([10]), name="biases")
      activations, loss = importer.import_graph_def(
          gdef,
          input_map={
              "input:0": input_placeholder,
              "weights:0": weights_var,
              "biases:0": biases_var
          },
          return_elements=["activations:0", "loss:0"])
      self.assertEqual([32, 10], activations.get_shape())
      self.assertEqual([], loss.get_shape())
      weights_grad, biases_grad = gradients_impl.gradients(
          loss, [weights_var, biases_var])
      self.assertEqual([100, 10], weights_grad.get_shape())
      self.assertEqual([10], biases_grad.get_shape())
Example #12
0
 def _testRelu(self, np_features, use_gpu=False):
   np_relu = self._npRelu(np_features)
   with self.test_session(use_gpu=use_gpu):
     relu = nn_ops.relu(np_features)
     tf_relu = relu.eval()
   self.assertAllClose(np_relu, tf_relu)
   self.assertShapeEqual(np_relu, relu)
Example #13
0
def hinge_loss(labels, logits, weights=1.0, scope=None,
               loss_collection=ops.GraphKeys.LOSSES,
               reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
  """Adds a hinge loss to the training procedure.

  Args:
    labels: The ground truth output tensor. Its shape should match the shape of
      logits. The values of the tensor are expected to be 0.0 or 1.0.
    logits: The logits, a float tensor.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `losses` dimension).
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.
    reduction: Type of reduction to apply to loss.

  Returns:
    Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
    shape as `labels`; otherwise, it is scalar.

  Raises:
    ValueError: If the shapes of `logits` and `labels` don't match.
  """
  with ops.name_scope(scope, "hinge_loss", (logits, labels, weights)) as scope:
    logits = math_ops.to_float(logits)
    labels = math_ops.to_float(labels)
    logits.get_shape().assert_is_compatible_with(labels.get_shape())
    # We first need to convert binary labels to -1/1 labels (as floats).
    all_ones = array_ops.ones_like(labels)
    labels = math_ops.subtract(2 * labels, all_ones)
    losses = nn_ops.relu(
        math_ops.subtract(all_ones, math_ops.multiply(labels, logits)))
    return compute_weighted_loss(
        losses, weights, scope, loss_collection, reduction=reduction)
  def testPotentialCycle(self):
    graph1 = ops.Graph()
    with graph1.as_default():
      a = constant_op.constant(1.0, shape=[2, 2])
      b = constant_op.constant(2.0, shape=[2, 2])
      matmul = math_ops.matmul(a, b)
      with ops.name_scope("hidden1"):
        c = nn_ops.relu(matmul)
        d = constant_op.constant(3.0, shape=[2, 2])
        matmul = math_ops.matmul(c, d)

    orig_meta_graph, _ = meta_graph.export_scoped_meta_graph(
        export_scope="hidden1", graph=graph1)

    graph2 = ops.Graph()
    with graph2.as_default():
      with self.assertRaisesRegexp(ValueError, "Graph contains unbound inputs"):
        meta_graph.import_scoped_meta_graph(
            orig_meta_graph, import_scope="new_hidden1")

      meta_graph.import_scoped_meta_graph(
          orig_meta_graph,
          import_scope="new_hidden1",
          input_map={
              "$unbound_inputs_MatMul": constant_op.constant(
                  4.0, shape=[2, 2])
          })
def SimulateFusedConv2dBiasActivationInt8(conv_input_scale, conv_input, kernel,
                                          padding, strides, side_input_scale,
                                          side_input, biases):
  """Simulates the int8 fused 2-D convolution op using separate float ops.

    The arguments and return values have the same format, meanings and
    restrictions as the actual op.
  Args:
    conv_input_scale: A scalar 'float'.
    conv_input: A `Tensor` of type `qint8` in NCHW_VECT_C layout.
    kernel: A `Tensor` of type `qint8` in OIHW_VECT_I layout.
    padding: A `string` from: `"SAME", "VALID"`.
    strides: A list of `ints`.
    side_input_scale: A scalar 'float'.
    side_input: A `Tensor` of type `qint8` in NCHW_VECT_C layout.
    biases: A `Tensor` of type `float32` in NCHW layout.
  Returns:
    A `Tensor` of type `qint8` in NCHW_VECT_C layout.
  """
  conv_result = nn_ops.conv2d(
      NchwVectCToNchw(gen_array_ops.dequantize(conv_input, -128, 127)),
      OihwVectIToHwio(gen_array_ops.dequantize(kernel, -128, 127)),
      strides=strides,
      padding=padding,
      data_format="NCHW") * conv_input_scale

  conv_and_side_inputs = conv_result + side_input_scale * NchwVectCToNchw(
      gen_array_ops.dequantize(side_input, -128, 127))

  logit = nn_ops.bias_add(conv_and_side_inputs, biases, data_format="NCHW")

  result, _, _ = gen_array_ops.quantize_v2(
      NchwToNchwVectC(nn_ops.relu(logit)), -128, 127, dtypes.qint8)
  return result
Example #16
0
def hinge_loss(labels, logits, weights=1.0, scope=None,
               loss_collection=ops.GraphKeys.LOSSES):
  """Adds a hinge loss to the training procedure.

  Args:
    labels: The ground truth output tensor. Its shape should match the shape of
      logits. The values of the tensor are expected to be 0.0 or 1.0.
    logits: The logits, a float tensor.
    weights: Coefficients for the loss a scalar, a tensor of shape
      [batch_size] or a tensor whose shape matches `predictions`.
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.

  Returns:
    A scalar `Tensor` of the loss value.

  Raises:
    ValueError: If the shapes of `logits` and `labels` don't match.
  """
  with ops.name_scope(scope, "hinge_loss", [logits, labels]) as scope:
    logits.get_shape().assert_is_compatible_with(labels.get_shape())
    # We first need to convert binary labels to -1/1 labels (as floats).
    labels = math_ops.to_float(labels)
    all_ones = array_ops.ones_like(labels)
    labels = math_ops.sub(2 * labels, all_ones)
    losses = nn_ops.relu(math_ops.sub(all_ones, math_ops.mul(labels, logits)))
    return compute_weighted_loss(losses, weights, scope, loss_collection)
Example #17
0
def hinge_loss(labels, logits, weights=1.0, scope=None,
               loss_collection=ops.GraphKeys.LOSSES):
  """Adds a hinge loss to the training procedure.

  WARNING: `weights` also supports dimensions of 1, but the broadcasting does
  not work as advertised, you'll wind up with weighted sum instead of weighted
  mean for any but the last dimension. This will be cleaned up soon, so please
  do not rely on the current behavior for anything but the shapes documented for
  `weights` below.

  Args:
    labels: The ground truth output tensor. Its shape should match the shape of
      logits. The values of the tensor are expected to be 0.0 or 1.0.
    logits: The logits, a float tensor.
    weights: Coefficients for the loss a scalar, a tensor of shape
      `[batch_size]` or a tensor whose shape matches `predictions`.
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.

  Returns:
    A scalar `Tensor` of the loss value.

  Raises:
    ValueError: If the shapes of `logits` and `labels` don't match.
  """
  with ops.name_scope(scope, "hinge_loss", (logits, labels)) as scope:
    logits = math_ops.to_float(logits)
    labels = math_ops.to_float(labels)
    logits.get_shape().assert_is_compatible_with(labels.get_shape())
    # We first need to convert binary labels to -1/1 labels (as floats).
    all_ones = array_ops.ones_like(labels)
    labels = math_ops.subtract(2 * labels, all_ones)
    losses = nn_ops.relu(
        math_ops.subtract(all_ones, math_ops.multiply(labels, logits)))
    return compute_weighted_loss(losses, weights, scope, loss_collection)
Example #18
0
  def unregularized_loss(self, examples):
    """Add operations to compute the loss (without the regularization loss).

    Args:
      examples: Examples to compute unregularized loss on.

    Returns:
      An Operation that computes mean (unregularized) loss for given set of
      examples.

    Raises:
      ValueError: if examples are not well defined.
    """
    self._assertSpecified([
        'example_labels', 'example_weights', 'sparse_features', 'dense_features'
    ], examples)
    self._assertList(['sparse_features', 'dense_features'], examples)
    with name_scope('sdca/unregularized_loss'):
      predictions = math_ops.cast(
          self._linear_predictions(examples), dtypes.float64)
      labels = math_ops.cast(
          internal_convert_to_tensor(examples['example_labels']),
          dtypes.float64)
      weights = math_ops.cast(
          internal_convert_to_tensor(examples['example_weights']),
          dtypes.float64)

      if self._options['loss_type'] == 'logistic_loss':
        return math_ops.reduce_sum(math_ops.multiply(
            sigmoid_cross_entropy_with_logits(labels=labels,
                                              logits=predictions),
            weights)) / math_ops.reduce_sum(weights)

      if self._options['loss_type'] == 'poisson_loss':
        return math_ops.reduce_sum(math_ops.multiply(
            log_poisson_loss(targets=labels, log_input=predictions),
            weights)) / math_ops.reduce_sum(weights)

      if self._options['loss_type'] in ['hinge_loss', 'smooth_hinge_loss']:
        # hinge_loss = max{0, 1 - y_i w*x} where y_i \in {-1, 1}. So, we need to
        # first convert 0/1 labels into -1/1 labels.
        all_ones = array_ops.ones_like(predictions)
        adjusted_labels = math_ops.subtract(2 * labels, all_ones)
        # Tensor that contains (unweighted) error (hinge loss) per
        # example.
        error = nn_ops.relu(
            math_ops.subtract(all_ones,
                              math_ops.multiply(adjusted_labels, predictions)))
        weighted_error = math_ops.multiply(error, weights)
        return math_ops.reduce_sum(weighted_error) / math_ops.reduce_sum(
            weights)

      # squared loss
      err = math_ops.subtract(labels, predictions)

      weighted_squared_err = math_ops.multiply(math_ops.square(err), weights)
      # SDCA squared loss function is sum(err^2) / (2*sum(weights))
      return (math_ops.reduce_sum(weighted_squared_err) /
              (2.0 * math_ops.reduce_sum(weights)))
Example #19
0
 def testReluInt8x4GoodShape(self):
   if not test.is_gpu_available(cuda_only=True):
     self.skipTest("No GPU available")
   inputs = np.array([[-50, 7, 23, 0], [-1, -5, 6, 11]])
   np_relu = self._npRelu(inputs)
   tf_relu = nn_ops.relu(constant_op.constant(inputs, dtypes.qint8))
   self.assertAllClose(np_relu, tf_relu)
   self.assertShapeEqual(np_relu, tf_relu)
Example #20
0
 def func(inp):
   conv = nn_ops.conv2d(
       inp,
       filter=array_ops.ones([3, 3, 3, 16]),
       strides=[1, 1, 1, 1],
       padding='SAME')
   output = nn_ops.relu(conv, name='output')
   return output
Example #21
0
  def testReluInt8x4BadShape(self):
    if not test.is_gpu_available(cuda_only=True):
      self.skipTest("No GPU available")
    inputs = constant_op.constant(
        np.array([[-50, 7, 23], [0, 1, -5], [6, -2, 11]]), dtypes.qint8)
    with self.assertRaisesRegexp(
        errors.InvalidArgumentError,
        "Tensor size must be a multiple of 4 for Relu<qint8>. Got 9"):
      self.evaluate(nn_ops.relu(inputs))

    inputs = constant_op.constant(
        np.array([1, -2, 3, -4, 5, -6, 7, -8, 9, -8, 7, -6, 5, -4, 3, -2, 1]),
        dtypes.qint8)
    with self.assertRaisesRegexp(
        errors.InvalidArgumentError,
        "Tensor size must be a multiple of 4 for Relu<qint8>. Got 17"):
      self.evaluate(nn_ops.relu(inputs))
Example #22
0
def bottleneck(inputs,
               depth,
               depth_bottleneck,
               stride,
               rate=1,
               outputs_collections=None,
               scope=None):
  """Bottleneck residual unit variant with BN after convolutions.
  This is the original residual unit proposed in [1]. See Fig. 1(a) of [2] for
  its definition. Note that we use here the bottleneck variant which has an
  extra bottleneck layer.
  When putting together two consecutive ResNet blocks that use this unit, one
  should use stride = 2 in the last unit of the first block.
  Args:
    inputs: A tensor of size [batch, height, width, channels].
    depth: The depth of the ResNet unit output.
    depth_bottleneck: The depth of the bottleneck layers.
    stride: The ResNet unit's stride. Determines the amount of downsampling of
      the units output compared to its input.
    rate: An integer, rate for atrous convolution.
    outputs_collections: Collection to add the ResNet unit output.
    scope: Optional variable_scope.
  Returns:
    The ResNet unit's output.
  """
  with variable_scope.variable_scope(scope, 'bottleneck_v1', [inputs]) as sc:
    depth_in = utils.last_dimension(inputs.get_shape(), min_rank=4)
    if depth == depth_in:
      shortcut = resnet_utils.subsample(inputs, stride, 'shortcut')
    else:
      shortcut = layers.conv2d(
          inputs,
          depth, [1, 1],
          stride=stride,
          activation_fn=None,
          scope='shortcut')

    #if stride > 1:
    #    residual = layers.conv2d(
    #        inputs, depth_bottleneck, [1, 1], stride=stride, scope='conv1')
    #else:
    #    residual = layers.conv2d(
    #        inputs, depth_bottleneck, [1, 1], stride=1, scope='conv1')

    #residual = resnet_utils.conv2d_same(
    #    residual, depth_bottleneck, 3, stride=1, rate=rate, scope='conv2')
    residual = layers.conv2d(
        inputs, depth_bottleneck, [1, 1], stride=1, scope='conv1')
    residual = resnet_utils.conv2d_same(
        residual, depth_bottleneck, 3, stride, rate=rate, scope='conv2')
    residual = layers.conv2d(
        residual, depth, [1, 1], stride=1, activation_fn=None, scope='conv3')

    output = nn_ops.relu(shortcut + residual)

    return utils.collect_named_outputs(outputs_collections, sc.name, output)
  def _SetupValuesForDevice(self, tensor_in_sizes, filter_in_sizes, bias,
                            strides, padding, activation_mode, data_format,
                            dtype):
    """Verifies the output values of the convolution function.

    Args:
      tensor_in_sizes: Input tensor dimensions in
        [batch, input_rows, input_cols, input_depth].
      filter_in_sizes: Filter tensor dimensions in
        [kernel_rows, kernel_cols, input_depth, output_depth].
      bias: 1-D bias tensor of length output_depth.
      strides: Stride: [col_stride, row_stride]
      padding: Padding type.
      activation_mode: Activation mode.
      data_format: Format of the data tensors.
      dtype: Data type for inputs and outputs.
    Returns:
      Symbolic tensor value and reference value that can be used to
      execute the computation and verify the results.
    """
    input_size = np.prod(tensor_in_sizes)
    filter_size = np.prod(filter_in_sizes)
    bias_size = filter_in_sizes[-1]  # equals to output depth
    # Initializes the input tensor with array containing incrementing
    # numbers from 1.
    x1 = [f * 1.0 for f in range(1, input_size + 1)]
    x2 = [f * 1.0 for f in range(1, filter_size + 1)]
    # This is to guarantee that there is always negative values after
    # bias add so that we can test whether relu works correctly.
    x3 = bias
    with self.test_session(use_gpu=True):
      t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=dtype)
      t2 = constant_op.constant(x2, shape=filter_in_sizes, dtype=dtype)
      t3 = constant_op.constant(x3, shape=[bias_size], dtype=dtype)
      strides = [1] + strides + [1]
      if data_format == "NCHW":
        t1 = test_util.NHWCToNCHW(t1)
        strides = test_util.NHWCToNCHW(strides)
      output = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(
          t1,
          t2,
          t3,
          strides=strides,
          padding=padding,
          data_format=data_format,
          activation_mode=activation_mode)
      ref_conv_output = nn_ops.conv2d(
          t1, t2, strides=strides, padding=padding, data_format=data_format)
      ref_bias_output = nn_ops.bias_add(
          ref_conv_output, t3, data_format=data_format)
      ref_output = nn_ops.relu(ref_bias_output)
      if data_format == "NCHW":
        output = test_util.NCHWToNHWC(output)
        ref_output = test_util.NCHWToNHWC(ref_output)

      return output, ref_output
Example #24
0
 def testGradientScalar(self):
   with self.test_session() as sess:
     x = variables.Variable(100.)
     y = nn_ops.relu(x)
     loss = y**2
     optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=0.25)
     train_op = optimizer.minimize(loss)
     sess.run(variables.global_variables_initializer())
     sess.run(train_op)
     self.assertAllClose(x.eval(), 50.0)
  def testExportDebugInfo(self):
    graph1 = ops.Graph()
    with graph1.as_default():
      with ops.name_scope("hidden1/hidden2/hidden3"):
        images = constant_op.constant(
            1.0, dtypes.float32, shape=[3, 2], name="images")
        weights1 = variables.Variable([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]],
                                      name="weights")
        biases1 = resource_variable_ops.ResourceVariable(
            [0.1] * 3, name="biases")
        nn_ops.relu(math_ops.matmul(images, weights1) + biases1, name="relu")
    debug_info_def = meta_graph.create_graph_debug_info_def(
        operations=graph1.get_operations())

    # The unique file names in all the stack traces should be larger or equal
    # than 1.
    self.assertTrue(len(debug_info_def.files) >= 1)
    # All the nodes from the exported graphdef are included.
    self.assertEqual(len(debug_info_def.traces), len(graph1.get_operations()))
def build_conv_bias_relu_graph(device, input_shape, filter_shape, strides,
                               padding, num_iters, data_format):
  """builds a graph containing a sequence of conv2d operations.

  Args:
    device: String, the device to run on.
    input_shape: Shape of the input tensor.
    filter_shape: Shape of the filter tensor.
    strides: A list of ints. 1-D of length 4. The stride of sliding
             window for each dimension of input.
    padding: A string from: "SAME", "VALID". The type of padding
             algorithm to use.
    num_iters: number of iterations to run conv2d.
    data_format: data format string of input, 'NHWC' and 'NCHW' are
    supported.

  Returns:
    An array of tensors to run()
  """
  if data_format == "NCHW":
    input_shape = [
        input_shape[0], input_shape[3], input_shape[1], input_shape[2]
    ]
  with ops.device("/%s:0" % device):
    inp = variables.Variable(random_ops.truncated_normal(input_shape))
    filt = variables.Variable(random_ops.truncated_normal(filter_shape))
    bias_shape = [filter_shape[-1]]
    bias = variables.Variable(random_ops.truncated_normal(bias_shape))

    outputs = []
    conv2d_out = nn_ops.conv2d(
        inp, filt, strides, padding, data_format=data_format)
    bias_out = nn_ops.bias_add(conv2d_out, bias, data_format=data_format)
    relu_out = nn_ops.relu(bias_out)
    outputs.append(relu_out)
    for _ in range(1, num_iters):
      with ops.control_dependencies([relu_out]):
        conv2d_out = nn_ops.conv2d(
            inp, filt, strides, padding, data_format=data_format)
        bias_out = nn_ops.bias_add(conv2d_out, bias, data_format=data_format)
        relu_out = nn_ops.relu(bias_out)
        outputs.append(relu_out)
    return control_flow_ops.group(*outputs)
Example #27
0
def sigmoid_cross_entropy_with_logits(logits, targets, name=None):
  """Computes sigmoid cross entropy given `logits`.

  Measures the probability error in discrete classification tasks in which each
  class is independent and not mutually exclusive.  For instance, one could
  perform multilabel classification where a picture can contain both an elephant
  and a dog at the same time.

  For brevity, let `x = logits`, `z = targets`.  The logistic loss is

        z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x))
      = z * -log(1 / (1 + exp(-x))) + (1 - z) * -log(exp(-x) / (1 + exp(-x)))
      = z * log(1 + exp(-x)) + (1 - z) * (-log(exp(-x)) + log(1 + exp(-x)))
      = z * log(1 + exp(-x)) + (1 - z) * (x + log(1 + exp(-x))
      = (1 - z) * x + log(1 + exp(-x))
      = x - x * z + log(1 + exp(-x))

  To ensure stability and avoid overflow, the implementation uses

      max(x, 0) - x * z + log(1 + exp(-abs(x)))

  `logits` and `targets` must have the same type and shape.

  Args:
    logits: A `Tensor` of type `float32` or `float64`.
    targets: A `Tensor` of the same type and shape as `logits`.
    name: A name for the operation (optional).

  Returns:
    A `Tensor` of the same shape as `logits` with the componentwise
    logistic losses.

  Raises:
    ValueError: If `logits` and `targets` do not have the same shape.
  """
  with ops.op_scope([logits, targets], name, "logistic_loss") as name:
    logits = ops.convert_to_tensor(logits, name="logits")
    targets = ops.convert_to_tensor(targets, name="targets")
    try:
      targets.get_shape().merge_with(logits.get_shape())
    except ValueError:
      raise ValueError(
          "logits and targets must have the same shape (%s vs %s)"
          % (logits.get_shape(), targets.get_shape()))

    # The logistic loss formula from above is
    #   x - x * z + log(1 + exp(-x))
    # For x < 0, a more numerically stable formula is
    #   -x * z + log(1 + exp(x))
    # To avoid branching, we use the combined version
    #   max(x, 0) - x * z + log(1 + exp(-abs(x)))
    return math_ops.add(nn_ops.relu(logits) - logits * targets,
                        math_ops.log(1 + math_ops.exp(-math_ops.abs(logits))),
                        name=name)
Example #28
0
 def layer(x, name=None):
   with variable_scope.variable_scope(name, default_name="layer"):
     x = layers.layer_norm(x)
     x = convolutional.conv1d(
         x,
         10,
         1,
         use_bias=False,
         kernel_initializer=init_ops.constant_initializer(42.42))
     x = nn_ops.relu(x)
     return x
Example #29
0
  def test_summarize_activation_relu(self):
    with self.cached_session():
      var = variables.Variable(1)
      op = nn_ops.relu(var, name='SummaryTest')
      summary_op = summaries_lib.summarize_activation(op)

      self.assertEquals(summary_op.op.type, 'HistogramSummary')
      names = [op.op.name for op in ops.get_collection(ops.GraphKeys.SUMMARIES)]
      self.assertEquals(len(names), 2)
      self.assertIn(u'SummaryTest/zeros', names)
      self.assertIn(u'SummaryTest/activation', names)
 def f(inp, hidden_weight, hidden_bias, softmax_weight, softmax_bias):
   features = nn_ops.relu(
       nn_ops.xw_plus_b(inp, hidden_weight, hidden_bias), name="features")
   logits = nn_ops.xw_plus_b(
       features, softmax_weight, softmax_bias, name="logits")
   labels = constant_op.constant(
       label_data.tolist(),
       shape=[batch, classes],
       dtype=dtypes.float64,
       name="labels")
   cost = nn_ops.softmax_cross_entropy_with_logits(
       labels=labels, logits=logits, name="cost")
   return cost
Example #31
0
 def _compute_time_features(self, time):
     """Compute some features on the time value."""
     batch_size = array_ops.shape(time)[0]
     num_periods = len(self._periodicities)
     # Reshape to 3D.
     periods = constant_op.constant(self._periodicities,
                                    shape=[1, 1, num_periods, 1],
                                    dtype=time.dtype)
     time = array_ops.reshape(time, [batch_size, -1, 1, 1])
     window_offset = time / self._periodicities
     # Cast to appropriate type and scale to [0, 1) range
     mod = (math_ops.cast(time % periods, self.dtype) * self._buckets /
            math_ops.cast(periods, self.dtype))
     # Bucketize based on some fixed width intervals. For a value t and interval
     # [a, b), we return (t - a) if a <= t < b, else 0.
     intervals = array_ops.reshape(
         math_ops.range(self._buckets, dtype=self.dtype),
         [1, 1, 1, self._buckets])
     mod = nn_ops.relu(mod - intervals)
     mod = array_ops.where(mod < 1.0, mod, array_ops.zeros_like(mod))
     return window_offset, mod
Example #32
0
    def _test01(self, dtype):
        with test_util.device(True):

            x = constant_op.constant([
                5, 5, 5, 5, 5, 7, 7, 7, 7, 7, 9, 9, 9, 9, 9, 11, 11, 11, 11,
                11, 13, 13, 13, 13, 13
            ],
                                     dtype=dtype,
                                     shape=[1, 5, 5, 1])
            k = constant_op.constant([1, 0, 1, 1, 0, 1, 1, 0, 1],
                                     dtype=dtype,
                                     shape=[3, 3, 1, 1])
            offset = constant_op.constant([2], dtype=dtype)

            conv = nn_ops.conv2d(x, k, [1, 1, 1, 1], "VALID")
            bias = nn_ops.bias_add(conv, offset)
            relu = nn_ops.relu(bias)

            y1 = array_ops.identity(relu)

            return (y1, )
Example #33
0
def sigmoid_cross_entropy_with_logits(logits, targets, name=None):
    """Computes sigmoid cross entropy given `logits`.

  Measures the probability error in discrete classification tasks in which each
  class is independent and not mutually exclusive.  For instance, one could
  perform multilabel classification where a picture can contain both an elephant
  and a dog at the same time.

  For brevity, let `x = logits`, `z = targets`.  The logistic loss is

      x - x * z + log(1 + exp(-x))

  To ensure stability and avoid overflow, the implementation uses

      max(x, 0) - x * z + log(1 + exp(-abs(x)))

  `logits` and `targets` must have the same type and shape.

  Args:
    logits: A `Tensor` of type `float32` or `float64`.
    targets: A `Tensor` of the same type and shape as `logits`.
    name: A name for the operation (optional).

  Returns:
    A `Tensor` of the same shape as `logits` with the componentwise
    logistic losses.
  """
    with ops.op_scope([logits, targets], name, "logistic_loss") as name:
        logits = ops.convert_to_tensor(logits, name="logits")
        targets = ops.convert_to_tensor(targets, name="targets")
        # The logistic loss formula from above is
        #   x - x * z + log(1 + exp(-x))
        # For x < 0, a more numerically stable formula is
        #   -x * z + log(1 + exp(x))
        # To avoid branching, we use the combined version
        #   max(x, 0) - x * z + log(1 + exp(-abs(x)))
        return math_ops.add(nn_ops.relu(logits) - logits * targets,
                            math_ops.log(1 +
                                         math_ops.exp(-math_ops.abs(logits))),
                            name=name)
Example #34
0
    def testConvWithBnAndRelu(self):
        with ops.device("/device:IPU:0"):
            x = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2])
            with variable_scope.variable_scope("vs", use_resource=True):
                y = layers.Conv2D(
                    2,
                    1,
                    use_bias=True,
                    kernel_initializer=init_ops.ones_initializer())(x)
                y = layers_norm.batch_normalization(y, fused=True)
                y = nn_ops.relu(y)

            with ops.device('cpu'):
                report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system(True, True, True)

        with tu.ipu_session() as sess:

            sess.run(variables.global_variables_initializer())

            sess.run(report)

            sess.run(y, {x: np.zeros([1, 4, 4, 2])})

            result = sess.run(report)
            self.assertEqual(
                len(result),
                6)  # 2xcompile, 1xupload 1xload, 1xdownload, 1xexecute

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)

            ok = [
                '__seed*', 'host-exchange-local-copy', 'Copy_',
                'vs/conv2d/Conv2D/convolution.*/Conv_1x1', 'vs/conv2d/BiasAdd',
                'vs/batch_normalization/FusedBatchNorm/batch-norm-inference.*/',
                'vs/Relu/custom-call/Nonlinearity'
            ]
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
Example #35
0
def hinge_loss(labels,
               logits,
               weights=1.0,
               scope=None,
               loss_collection=ops.GraphKeys.LOSSES,
               reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
    """Adds a hinge loss to the training procedure.

  Args:
    labels: The ground truth output tensor. Its shape should match the shape of
      logits. The values of the tensor are expected to be 0.0 or 1.0.
    logits: The logits, a float tensor.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `losses` dimension).
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.
    reduction: Type of reduction to apply to loss.

  Returns:
    Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
    shape as `labels`; otherwise, it is scalar.

  Raises:
    ValueError: If the shapes of `logits` and `labels` don't match.
  """
    with ops.name_scope(scope, "hinge_loss", (logits, labels)) as scope:
        logits = math_ops.to_float(logits)
        labels = math_ops.to_float(labels)
        logits.get_shape().assert_is_compatible_with(labels.get_shape())
        # We first need to convert binary labels to -1/1 labels (as floats).
        all_ones = array_ops.ones_like(labels)
        labels = math_ops.subtract(2 * labels, all_ones)
        losses = nn_ops.relu(
            math_ops.subtract(all_ones, math_ops.multiply(labels, logits)))
        return compute_weighted_loss(losses,
                                     weights,
                                     scope,
                                     loss_collection,
                                     reduction=reduction)
Example #36
0
def focal_loss_with_weight(targets, logits, pos_weight, r=2, a=4, name=None):
    """
    This is the fixed weighted_cross_entropy_with_logits with focal loss.
    See the weighted_cross_entropy_with_logits for more information.
    :param targets:
    :param logits:
    :param pos_weight:
    :param r:
    :param a:
    :param name:
    :return:
    """
    with ops.name_scope(name, "logistic_loss", [logits, targets]) as name:
        logits = ops.convert_to_tensor(logits, name="logits")
        targets = ops.convert_to_tensor(targets, name="targets")
        try:
            targets.get_shape().merge_with(logits.get_shape())
        except ValueError:
            raise ValueError(
                "logits and targets must have the same shape (%s vs %s)" %
                (logits.get_shape(), targets.get_shape()))

        # This copied from tensorflow.
        # The logistic loss formula from above is
        #   (1 - z) * x + (1 + (q - 1) * z) * log(1 + exp(-x))
        # For x < 0, a more numerically stable formula is
        #   (1 - z) * x + (1 + (q - 1) * z) * log(1 + exp(x)) - l * x
        # To avoid branching, we use the combined version
        #   (1 - z) * x + l * (log(1 + exp(-abs(x))) + max(-x, 0))
        sig_log = tf.sigmoid(logits)
        FL1 = a * tf.pow(1 - sig_log, r)
        FL2 = a * tf.pow(sig_log, r)

        log_weight = FL2 + (pos_weight * FL1 - FL2) * targets
        # log_weight = 1 + (pos_weight - 1) * targets
        return math_ops.add(
            FL2 * (1 - targets) * logits,
            log_weight * (math_ops.log1p(math_ops.exp(-math_ops.abs(logits))) +
                          nn_ops.relu(-logits)),
            name=name)
def _SimulateFusedConv2dBiasActivationInt8(conv_input_scale, conv_input,
                                           kernel, padding, strides,
                                           side_input_scale, side_input,
                                           biases, apply_relu):
    """Simulates the int8 fused 2-D convolution op using separate float ops.

    The arguments and return values have the same format, meanings and
    restrictions as the actual op.
  Args:
    conv_input_scale: A scalar 'float'.
    conv_input: A `Tensor` of type `qint8` in NCHW_VECT_C layout.
    kernel: A `Tensor` of type `qint8` in OIHW_VECT_I layout.
    padding: A `string` from: `"SAME", "VALID"`.
    strides: A list of `ints`.
    side_input_scale: A scalar 'float'.
    side_input: A `Tensor` of type `qint8` in NCHW_VECT_C layout.
    biases: A `Tensor` of type `float32` in NCHW layout.
    apply_relu: A boolean to specify whether to apply "Relu" activation function
      that clips outputs to the range [0, 127], or "None" activation that clips
      to the range [-128, 127].
  Returns:
    A `Tensor` of type `qint8` in NCHW_VECT_C layout.
  """
    conv_result = nn_ops.conv2d(
        _NchwVectCToNchw(gen_array_ops.dequantize(conv_input, -128, 127)),
        _OihwVectIToHwio(gen_array_ops.dequantize(kernel, -128, 127)),
        strides=strides,
        padding=padding,
        data_format="NCHW") * conv_input_scale

    conv_and_side_inputs = conv_result + side_input_scale * _NchwVectCToNchw(
        gen_array_ops.dequantize(side_input, -128, 127))

    output = nn_ops.bias_add(conv_and_side_inputs, biases, data_format="NCHW")
    if apply_relu:
        output = nn_ops.relu(output)

    result, _, _ = gen_array_ops.quantize_v2(_NchwToNchwVectC(output), -128,
                                             127, dtypes.qint8)
    return result
def multiclass_hinge_loss(labels, multi_labels_mask, logits):
    """Adds Ops for computing the multiclass hinge loss.
  The implementation is based on the following paper:
  On the Algorithmic Implementation of Multiclass Kernel-based Vector Machines
  by Crammer and Singer.
  link: http://jmlr.csail.mit.edu/papers/volume2/crammer01a/crammer01a.pdf
  This is a generalization of standard (binary) hinge loss. For a given instance
  with correct label c*, the loss is given by:
    loss = max_{c != c*} logits_c - logits_{c*} + 1.
  or equivalently
    loss = max_c { logits_c - logits_{c*} + I_{c != c*} }
  where I_{c != c*} = 1 if c != c* and 0 otherwise.
  """
    with tf.variable_scope("Hinge_Loss"):
        #label_logits = tf.reduce_sum(labels * logits, 1, keep_dims=True)
        label_logits = tf.reduce_max(multi_labels_mask * logits,
                                     axis=1,
                                     keep_dims=True)
        margin = (logits - label_logits + 1) * (1 - multi_labels_mask)
        margin = nn_ops.relu(margin)
        loss = math_ops.reduce_max(margin, axis=1)
    return loss
Example #39
0
    def _test01(self, dtype):
        with test_util.device(True):
            x = constant_op.constant(
                [5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15],
                dtype=dtype,
                shape=[1, 1, 6, 2])
            scale = constant_op.constant([4, 5], dtype=float)
            offset = constant_op.constant([2, 3], dtype=float)
            batch_mean = constant_op.constant([10, 10], dtype=float)
            batch_var = constant_op.constant([14, 14], dtype=float)

            batch_norm, _, _ = nn_impl.fused_batch_norm(x,
                                                        scale,
                                                        offset,
                                                        mean=batch_mean,
                                                        variance=batch_var,
                                                        is_training=False)
            relu = nn_ops.relu(batch_norm)

            y1 = array_ops.identity(relu)

            return (y1, )
Example #40
0
def weighted_cross_entropy_with_logits_v2(labels,
                                          logits,
                                          pos_weight,
                                          name=None):
    """Computes a weighted cross entropy.
  """
    with ops.name_scope(name, "logistic_loss", [logits, labels]) as name:
        logits = ops.convert_to_tensor(logits, name="logits")
        labels = ops.convert_to_tensor(labels, name="labels")
        try:
            labels.get_shape().merge_with(logits.get_shape())
        except ValueError:
            raise ValueError(
                "logits and labels must have the same shape (%s vs %s)" %
                (logits.get_shape(), labels.get_shape()))

        log_weight = 1 + (pos_weight - 1) * labels
        return math_ops.add(
            (1 - labels) * logits,
            log_weight * (math_ops.log1p(math_ops.exp(-math_ops.abs(logits))) +
                          nn_ops.relu(-logits)),
            name=name)
Example #41
0
    def testGradient(self):
        with ops.Graph().as_default() as g:
            inputs = array_ops.placeholder(dtypes.float32,
                                           shape=[None, 100],
                                           name="input")
            weights = array_ops.placeholder(dtypes.float32,
                                            shape=[100, 10],
                                            name="weights")
            biases = array_ops.placeholder(dtypes.float32,
                                           shape=[10],
                                           name="biases")
            activations = nn_ops.relu(math_ops.matmul(inputs, weights) +
                                      biases,
                                      name="activations")
            loss = math_ops.reduce_mean(activations, name="loss")
        gdef = g.as_graph_def()

        with ops.Graph().as_default() as g:
            input_placeholder = array_ops.placeholder(dtypes.float32,
                                                      shape=[32, 100])
            weights_var = variables.Variable(random_ops.truncated_normal(
                [100, 10]),
                                             name="weights")
            biases_var = variables.Variable(array_ops.zeros([10]),
                                            name="biases")
            activations, loss = importer.import_graph_def(
                gdef,
                input_map={
                    "input:0": input_placeholder,
                    "weights:0": weights_var,
                    "biases:0": biases_var
                },
                return_elements=["activations:0", "loss:0"])
            self.assertEqual([32, 10], activations.get_shape())
            self.assertEqual([], loss.get_shape())
            weights_grad, biases_grad = gradients_impl.gradients(
                loss, [weights_var, biases_var])
            self.assertEqual([100, 10], weights_grad.get_shape())
            self.assertEqual([10], biases_grad.get_shape())
Example #42
0
def bottleneck_hole(inputs,
                    depth,
                    depth_bottleneck,
                    stride,
                    rate=2,
                    outputs_collections=None,
                    scope=None):
    with variable_scope.variable_scope(scope, 'bottleneck_v1', [inputs]) as sc:
        depth_in = utils.last_dimension(inputs.get_shape(), min_rank=4)
        if depth == depth_in:
            shortcut = resnet_utils.subsample(inputs, stride, 'shortcut')
        else:
            shortcut = layers.conv2d(inputs,
                                     depth, [1, 1],
                                     stride=stride,
                                     activation_fn=None,
                                     scope='shortcut')

        residual = layers.conv2d(inputs,
                                 depth_bottleneck, [1, 1],
                                 stride=1,
                                 scope='conv1')
        residual = layers_lib.conv2d(residual,
                                     depth_bottleneck, [3, 3],
                                     stride=1,
                                     rate=rate,
                                     padding='SAME',
                                     scope='conv2')
        residual = layers.conv2d(residual,
                                 depth, [1, 1],
                                 stride=1,
                                 activation_fn=None,
                                 scope='conv3')

        output = nn_ops.relu(shortcut + residual)

        return utils.collect_named_outputs(outputs_collections, sc.name,
                                           output)
Example #43
0
def hinge_loss(labels,
               logits,
               weights=1.0,
               scope=None,
               loss_collection=ops.GraphKeys.LOSSES):
    """Adds a hinge loss to the training procedure.

  WARNING: `weights` also supports dimensions of 1, but the broadcasting does
  not work as advertised, you'll wind up with weighted sum instead of weighted
  mean for any but the last dimension. This will be cleaned up soon, so please
  do not rely on the current behavior for anything but the shapes documented for
  `weights` below.

  Args:
    labels: The ground truth output tensor. Its shape should match the shape of
      logits. The values of the tensor are expected to be 0.0 or 1.0.
    logits: The logits, a float tensor.
    weights: Coefficients for the loss a scalar, a tensor of shape
      `[batch_size]` or a tensor whose shape matches `predictions`.
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.

  Returns:
    A scalar `Tensor` of the loss value.

  Raises:
    ValueError: If the shapes of `logits` and `labels` don't match.
  """
    with ops.name_scope(scope, "hinge_loss", (logits, labels)) as scope:
        logits = math_ops.to_float(logits)
        labels = math_ops.to_float(labels)
        logits.get_shape().assert_is_compatible_with(labels.get_shape())
        # We first need to convert binary labels to -1/1 labels (as floats).
        all_ones = array_ops.ones_like(labels)
        labels = math_ops.subtract(2 * labels, all_ones)
        losses = nn_ops.relu(
            math_ops.subtract(all_ones, math_ops.multiply(labels, logits)))
        return compute_weighted_loss(losses, weights, scope, loss_collection)
Example #44
0
def multiclass_svm(labels, logits):

    # Read batch size and number of tags from logits
    shape = array_ops.shape(logits)
    batchSize = shape[0]
    tagTotal = shape[1]
    logits = math_ops.to_float(logits)
    labels = array_ops.reshape(labels, shape=[-1])

    # Target indices
    targetIndex = array_ops.reshape(math_ops.range(batchSize),
                                    shape=[batchSize, 1])
    indices = array_ops.concat([
        targetIndex,
        array_ops.reshape(math_ops.cast(labels, targetIndex.dtype),
                          shape=[batchSize, 1])
    ],
                               axis=1)

    # logits with the label class value
    labelLogits = array_ops.reshape(array_ops.gather_nd(params=logits,
                                                        indices=indices),
                                    shape=[batchSize, 1])

    marginDelta = array_ops.one_hot(indices=labels,
                                    depth=tagTotal,
                                    on_value=0.0,
                                    off_value=1.0)

    # Compute SVM margin
    margin = logits - labelLogits + marginDelta

    # Use rectifier for max(features, 0)
    margin = nn_ops.relu(margin)

    # Maximize margin
    loss = math_ops.reduce_max(margin, axis=1)
    return losses.compute_weighted_loss(loss)
Example #45
0
  def testReluNotInPlace(self):
    with self.session() as sess:
      with ops.device("/device:IPU:0"):
        pa = array_ops.placeholder(np.float32, [3], name="a")
        c = nn_ops.relu(pa) + pa

      report = tu.ReportJSON(self, sess)
      report.reset()

      fd = {pa: [1, -2, 1]}
      result = sess.run(c, fd)
      self.assertAllClose(result, [2, -2, 2])

      report.parse_log(assert_len=4)

      # pylint: disable=line-too-long
      ok = [
          '__seed*',
          'Copy_XLA_Args*/arg0.*_to_Relu/relu/Nonlinearity/out/OnTileCopy-0',
          'Relu/relu/Nonlinearity', 'add/add.*/Add'
      ]
      # pylint: enable=line-too-long
      report.assert_all_compute_sets_and_list(ok)
Example #46
0
def hinge_loss(logits, target, scope=None):
  """Method that returns the loss tensor for hinge loss.

  Args:
    logits: The logits, a float tensor.
    target: The ground truth output tensor. Its shape should match the shape of
      logits. The values of the tensor are expected to be 0.0 or 1.0.
    scope: The scope for the operations performed in computing the loss.

  Returns:
    A `Tensor` of same shape as logits and target representing the loss values
      across the batch.

  Raises:
    ValueError: If the shapes of `logits` and `target` don't match.
  """
  with ops.name_scope(scope, "hinge_loss", [logits, target]) as scope:
    logits.get_shape().assert_is_compatible_with(target.get_shape())
    # We first need to convert binary labels to -1/1 labels (as floats).
    target = math_ops.to_float(target)
    all_ones = array_ops.ones_like(target)
    labels = math_ops.sub(2 * target, all_ones)
    return nn_ops.relu(math_ops.sub(all_ones, math_ops.mul(labels, logits)))
Example #47
0
    def testRelu(self):
        with ops.device("/device:IPU:0"):
            pa = array_ops.placeholder(np.float32, [3], name="a")
            c = nn_ops.relu(pa)

        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            fd = {pa: [-6.0, 0.0, 6.0]}
            result = sess.run(c, fd)
            self.assertAllClose(result, [0.0, 0.0, 6.0])

            result = sess.run(report)
            self.assertTrue(len(result) == 3)

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)

            ok = ['__seed*', 'Relu/custom-call/Nonlinearity']
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
Example #48
0
 def run_test(sess):
     inp = array_ops.placeholder(dtypes.float32)
     filt = array_ops.placeholder(dtypes.float32)
     scale = array_ops.placeholder(dtypes.float32)
     offset = array_ops.placeholder(dtypes.float32)
     mean = array_ops.placeholder(dtypes.float32)
     variance = array_ops.placeholder(dtypes.float32)
     bn, _, _ = nn_impl.fused_batch_norm(
         nn_ops.conv2d(inp, filt, strides=[1, 1, 1, 1], padding="SAME"),
         scale,
         offset,
         mean,
         variance,
         epsilon=0.02,
         is_training=False)
     return sess.run(
         nn_ops.relu(bn), {
             inp: inp_values,
             filt: filt_values,
             scale: scale_values,
             offset: offset_values,
             mean: mean_values,
             variance: variance_values,
         })
Example #49
0
 def _testRelu(self, np_features):
     np_relu = self._npRelu(np_features)
     tf_relu = nn_ops.relu(np_features)
     self.assertAllClose(np_relu, tf_relu)
     self.assertShapeEqual(np_relu, tf_relu)
Example #50
0
 def loss():
     return nn_ops.relu(x)**2
Example #51
0
 def f(x):
     assert x.dtype == dtypes.float64
     with backprop.GradientTape() as tape:
         tape.watch(x)
         y = nn_ops.relu(x)
     return tape.gradient(y, x)
Example #52
0
 def grad(x):
     with backprop.GradientTape() as tape:
         tape.watch(x)
         y = nn_ops.l2_loss(nn_ops.relu(x))
     return tape.gradient(y, x)
Example #53
0
    def _BuildAndTestMiniMNIST(self, param_index, tag):
        # Fix seed to avoid occasional flakiness
        np.random.seed(6)

        # Hyperparameters
        batch = 3
        inputs = 16
        features = 32
        classes = 10

        # Define the parameters
        inp_data = np.random.random_sample(inputs * batch)
        hidden_weight_data = np.random.randn(
            inputs * features) / np.sqrt(inputs)
        hidden_bias_data = np.random.random_sample(features)
        sm_weight_data = np.random.randn(
            features * classes) / np.sqrt(features)
        sm_bias_data = np.random.random_sample(classes)

        # special care for labels since they need to be normalized per batch
        label_data = np.random.random(batch * classes).reshape(
            (batch, classes))
        s = label_data.sum(axis=1)
        label_data /= s[:, None]

        with self.session(use_gpu=True):
            # We treat the inputs as "parameters" here
            inp = constant_op.constant(inp_data.tolist(),
                                       shape=[batch, inputs],
                                       dtype=dtypes.float64,
                                       name="inp")
            hidden_weight = constant_op.constant(hidden_weight_data.tolist(),
                                                 shape=[inputs, features],
                                                 dtype=dtypes.float64,
                                                 name="hidden_weight")
            hidden_bias = constant_op.constant(hidden_bias_data.tolist(),
                                               shape=[features],
                                               dtype=dtypes.float64,
                                               name="hidden_bias")
            softmax_weight = constant_op.constant(sm_weight_data.tolist(),
                                                  shape=[features, classes],
                                                  dtype=dtypes.float64,
                                                  name="softmax_weight")
            softmax_bias = constant_op.constant(sm_bias_data.tolist(),
                                                shape=[classes],
                                                dtype=dtypes.float64,
                                                name="softmax_bias")

            # List all the parameter so that we can test them one at a time
            all_params = [
                inp, hidden_weight, hidden_bias, softmax_weight, softmax_bias
            ]
            param_sizes = [
                [batch, inputs],  # inp
                [inputs, features],  # hidden_weight,
                [features],  # hidden_bias
                [features, classes],  # softmax_weight,
                [classes]
            ]  # softmax_bias

            # Now, Building MNIST
            features = nn_ops.relu(nn_ops.xw_plus_b(inp, hidden_weight,
                                                    hidden_bias),
                                   name="features")
            logits = nn_ops.xw_plus_b(features,
                                      softmax_weight,
                                      softmax_bias,
                                      name="logits")
            labels = constant_op.constant(label_data.tolist(),
                                          shape=[batch, classes],
                                          dtype=dtypes.float64,
                                          name="labels")
            cost = nn_ops.softmax_cross_entropy_with_logits(labels=labels,
                                                            logits=logits,
                                                            name="cost")

            # Test the gradients.
            err = gradient_checker.compute_gradient_error(
                all_params[param_index],
                param_sizes[param_index],
                cost, [batch],
                delta=1e-5)

        tf_logging.info("Mini MNIST: %s gradient error = %g", tag, err)
        return err
Example #54
0
    def _SetupValuesForDevice(self, tensor_in_sizes, filter_in_sizes, bias,
                              strides, padding, activation_mode, data_format,
                              filter_format, dtype):
        """Verifies the output values of the convolution function.

    Args:
      tensor_in_sizes: Input tensor dimensions in
        [batch, input_rows, input_cols, input_depth].
      filter_in_sizes: Filter tensor dimensions in
        [kernel_rows, kernel_cols, input_depth, output_depth].
      bias: 1-D bias tensor of length output_depth.
      strides: Stride: [col_stride, row_stride]
      padding: Padding type.
      activation_mode: Activation mode.
      data_format: Format of the data tensors.
      filter_format: Filter format to use for the fused convolution.
      dtype: Data type for inputs and outputs.
    Returns:
      Symbolic tensor value and reference value that can be used to
      execute the computation and verify the results.
    """
        input_size = np.prod(tensor_in_sizes)
        filter_size = np.prod(filter_in_sizes)
        bias_size = filter_in_sizes[-1]  # equals to output depth
        # Initializes the input tensor with array containing incrementing
        # numbers from 1.
        x1 = [f * 1.0 for f in range(1, input_size + 1)]
        x2 = [f * 1.0 for f in range(1, filter_size + 1)]
        # This is to guarantee that there are always negative values after
        # bias add so that we can test whether relu works correctly.
        x3 = bias
        with self.cached_session(use_gpu=True), self.test_scope():
            t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=dtype)
            t2 = constant_op.constant(x2, shape=filter_in_sizes, dtype=dtype)
            fused_t2 = t2
            if filter_format == "OIHW":
                fused_t2 = _HwioToOihw(t2)
            t3 = constant_op.constant(x3, shape=[bias_size], dtype=dtype)
            strides = [1] + strides + [1]
            if data_format == "NCHW":
                t1 = test_util.NHWCToNCHW(t1)
                strides = test_util.NHWCToNCHW(strides)
            output = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(
                t1,
                fused_t2,
                t3,
                strides=strides,
                padding=padding,
                data_format=data_format,
                filter_format=filter_format,
                activation_mode=activation_mode)
            ref_conv_output = nn_ops.conv2d(t1,
                                            t2,
                                            strides=strides,
                                            padding=padding,
                                            data_format=data_format)
            ref_bias_output = nn_ops.bias_add(ref_conv_output,
                                              t3,
                                              data_format=data_format)
            ref_output = nn_ops.relu(ref_bias_output)
            if data_format == "NCHW":
                output = test_util.NCHWToNHWC(output)
                ref_output = test_util.NCHWToNHWC(ref_output)

            return output, ref_output
Example #55
0
def weighted_cross_entropy_with_logits(logits, targets, pos_weight,
                                       name=None):
  """Computes a weighted cross entropy.

  This is like `sigmoid_cross_entropy_with_logits()` except that `pos_weight`,
  allows one to trade off recall and precision by up- or down-weighting the
  cost of a positive error relative to a negative error.

  The usual cross-entropy cost is defined as:

    targets * -log(sigmoid(logits)) + (1 - targets) * -log(1 - sigmoid(logits))

  The argument `pos_weight` is used as a multiplier for the positive targets:

    targets * -log(sigmoid(logits)) * pos_weight +
        (1 - targets) * -log(1 - sigmoid(logits))

  For brevity, let `x = logits`, `z = targets`, `q = pos_weight`.
  The loss is:

        qz * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x))
      = qz * -log(1 / (1 + exp(-x))) + (1 - z) * -log(exp(-x) / (1 + exp(-x)))
      = qz * log(1 + exp(-x)) + (1 - z) * (-log(exp(-x)) + log(1 + exp(-x)))
      = qz * log(1 + exp(-x)) + (1 - z) * (x + log(1 + exp(-x))
      = (1 - z) * x + (qz +  1 - z) * log(1 + exp(-x))
      = (1 - z) * x + (1 + (q - 1) * z) * log(1 + exp(-x))

  Setting `l = (1 + (q - 1) * z)`, to ensure stability and avoid overflow,
  the implementation uses

      (1 - z) * x + l * (log(1 + exp(-abs(x))) + max(-x, 0))

  `logits` and `targets` must have the same type and shape.

  Args:
    logits: A `Tensor` of type `float32` or `float64`.
    targets: A `Tensor` of the same type and shape as `logits`.
    pos_weight: A coefficient to use on the positive examples.
    name: A name for the operation (optional).

  Returns:
    A `Tensor` of the same shape as `logits` with the componentwise
    weightedlogistic losses.

  Raises:
    ValueError: If `logits` and `targets` do not have the same shape.
  """
  with ops.op_scope([logits, targets], name, "logistic_loss") as name:
    logits = ops.convert_to_tensor(logits, name="logits")
    targets = ops.convert_to_tensor(targets, name="targets")
    try:
      targets.get_shape().merge_with(logits.get_shape())
    except ValueError:
      raise ValueError(
          "logits and targets must have the same shape (%s vs %s)"
          % (logits.get_shape(), targets.get_shape()))

    # The logistic loss formula from above is
    #   (1 - z) * x + (1 + (q - 1) * z) * log(1 + exp(-x))
    # For x < 0, a more numerically stable formula is
    #   (1 - z) * x + (1 + (q - 1) * z) * log(1 + exp(x)) - l * x
    # To avoid branching, we use the combined version
    #   (1 - z) * x + l * (log(1 + exp(-abs(x))) + max(-x, 0))
    log_weight = 1 + (pos_weight - 1) * targets
    return math_ops.add(
        (1 - targets) * logits,
        log_weight * (math_ops.log(1 + math_ops.exp(-math_ops.abs(logits))) +
                      nn_ops.relu(-logits)),
        name=name)
Example #56
0
    def unregularized_loss(self, examples):
        """Add operations to compute the loss (without the regularization loss).

    Args:
      examples: Examples to compute unregularized loss on.

    Returns:
      An Operation that computes mean (unregularized) loss for given set of
      examples.

    Raises:
      ValueError: if examples are not well defined.
    """
        self._assertSpecified([
            'example_labels', 'example_weights', 'sparse_features',
            'dense_features'
        ], examples)
        self._assertList(['sparse_features', 'dense_features'], examples)
        with name_scope('sdca/unregularized_loss'):
            predictions = math_ops.cast(self._linear_predictions(examples),
                                        dtypes.float64)
            labels = math_ops.cast(
                internal_convert_to_tensor(examples['example_labels']),
                dtypes.float64)
            weights = math_ops.cast(
                internal_convert_to_tensor(examples['example_weights']),
                dtypes.float64)

            if self._options['loss_type'] == 'logistic_loss':
                return math_ops.reduce_sum(
                    math_ops.multiply(
                        sigmoid_cross_entropy_with_logits(labels=labels,
                                                          logits=predictions),
                        weights)) / math_ops.reduce_sum(weights)

            if self._options['loss_type'] == 'poisson_loss':
                return math_ops.reduce_sum(
                    math_ops.multiply(
                        log_poisson_loss(targets=labels,
                                         log_input=predictions),
                        weights)) / math_ops.reduce_sum(weights)

            if self._options['loss_type'] in [
                    'hinge_loss', 'smooth_hinge_loss'
            ]:
                # hinge_loss = max{0, 1 - y_i w*x} where y_i \in {-1, 1}. So, we need to
                # first convert 0/1 labels into -1/1 labels.
                all_ones = array_ops.ones_like(predictions)
                adjusted_labels = math_ops.subtract(2 * labels, all_ones)
                # Tensor that contains (unweighted) error (hinge loss) per
                # example.
                error = nn_ops.relu(
                    math_ops.subtract(
                        all_ones,
                        math_ops.multiply(adjusted_labels, predictions)))
                weighted_error = math_ops.multiply(error, weights)
                return math_ops.reduce_sum(
                    weighted_error) / math_ops.reduce_sum(weights)

            # squared loss
            err = math_ops.subtract(labels, predictions)

            weighted_squared_err = math_ops.multiply(math_ops.square(err),
                                                     weights)
            # SDCA squared loss function is sum(err^2) / (2*sum(weights))
            return (math_ops.reduce_sum(weighted_squared_err) /
                    (2.0 * math_ops.reduce_sum(weights)))
Example #57
0
def _update_confusion_matrix_variables_optimized(
    variables_to_update,
    y_true,
    y_pred,
    thresholds,
    multi_label=False,
    sample_weights=None,
    label_weights=None,
    thresholds_with_epsilon=False):
  """Update confusion matrix variables with memory efficient alternative.

  Note that the thresholds need to be evenly distributed within the list, eg,
  the diff between consecutive elements are the same.

  To compute TP/FP/TN/FN, we are measuring a binary classifier
    C(t) = (predictions >= t)
  at each threshold 't'. So we have
    TP(t) = sum( C(t) * true_labels )
    FP(t) = sum( C(t) * false_labels )

  But, computing C(t) requires computation for each t. To make it fast,
  observe that C(t) is a cumulative integral, and so if we have
    thresholds = [t_0, ..., t_{n-1}];  t_0 < ... < t_{n-1}
  where n = num_thresholds, and if we can compute the bucket function
    B(i) = Sum( (predictions == t), t_i <= t < t{i+1} )
  then we get
    C(t_i) = sum( B(j), j >= i )
  which is the reversed cumulative sum in tf.cumsum().

  We can compute B(i) efficiently by taking advantage of the fact that
  our thresholds are evenly distributed, in that
    width = 1.0 / (num_thresholds - 1)
    thresholds = [0.0, 1*width, 2*width, 3*width, ..., 1.0]
  Given a prediction value p, we can map it to its bucket by
    bucket_index(p) = floor( p * (num_thresholds - 1) )
  so we can use tf.math.unsorted_segment_sum() to update the buckets in one
  pass.

  Consider following example:
  y_true = [0, 0, 1, 1]
  y_pred = [0.1, 0.5, 0.3, 0.9]
  thresholds = [0.0, 0.5, 1.0]
  num_buckets = 2   # [0.0, 1.0], (1.0, 2.0]
  bucket_index(y_pred) = tf.math.floor(y_pred * num_buckets)
                       = tf.math.floor([0.2, 1.0, 0.6, 1.8])
                       = [0, 0, 0, 1]
  # The meaning of this bucket is that if any of the label is true,
  # then 1 will be added to the corresponding bucket with the index.
  # Eg, if the label for 0.2 is true, then 1 will be added to bucket 0. If the
  # label for 1.8 is true, then 1 will be added to bucket 1.
  #
  # Note the second item "1.0" is floored to 0, since the value need to be
  # strictly larger than the bucket lower bound.
  # In the implementation, we use tf.math.ceil() - 1 to achieve this.
  tp_bucket_value = tf.math.unsorted_segment_sum(true_labels, bucket_indices,
                                                 num_segments=num_thresholds)
                  = [1, 1, 0]
  # For [1, 1, 0] here, it means there is 1 true value contributed by bucket 0,
  # and 1 value contributed by bucket 1. When we aggregate them to together,
  # the result become [a + b + c, b + c, c], since large thresholds will always
  # contribute to the value for smaller thresholds.
  true_positive = tf.math.cumsum(tp_bucket_value, reverse=True)
                = [2, 1, 0]

  This implementation exhibits a run time and space complexity of O(T + N),
  where T is the number of thresholds and N is the size of predictions.
  Metrics that rely on standard implementation instead exhibit a complexity of
  O(T * N).

  Args:
    variables_to_update: Dictionary with 'tp', 'fn', 'tn', 'fp' as valid keys
      and corresponding variables to update as values.
    y_true: A floating point `Tensor` whose shape matches `y_pred`. Will be cast
      to `bool`.
    y_pred: A floating point `Tensor` of arbitrary shape and whose values are in
      the range `[0, 1]`.
    thresholds: A sorted floating point `Tensor` with value in `[0, 1]`.
      It need to be evenly distributed (the diff between each element need to be
      the same).
    multi_label: Optional boolean indicating whether multidimensional
      prediction/labels should be treated as multilabel responses, or flattened
      into a single label. When True, the valus of `variables_to_update` must
      have a second dimension equal to the number of labels in y_true and
      y_pred, and those tensors must not be RaggedTensors.
    sample_weights: Optional `Tensor` whose rank is either 0, or the same rank
      as `y_true`, and must be broadcastable to `y_true` (i.e., all dimensions
      must be either `1`, or the same as the corresponding `y_true` dimension).
    label_weights: Optional tensor of non-negative weights for multilabel
      data. The weights are applied when calculating TP, FP, FN, and TN without
      explicit multilabel handling (i.e. when the data is to be flattened).
    thresholds_with_epsilon: Optional boolean indicating whether the leading and
      tailing thresholds has any epsilon added for floating point imprecisions.
      It will change how we handle the leading and tailing bucket.

  Returns:
    Update op.
  """
  num_thresholds = thresholds.shape.as_list()[0]

  if sample_weights is None:
    sample_weights = 1.0
  else:
    sample_weights = weights_broadcast_ops.broadcast_weights(
        math_ops.cast(sample_weights, dtype=y_pred.dtype), y_pred)
    if not multi_label:
      sample_weights = array_ops.reshape(sample_weights, [-1])
  if label_weights is None:
    label_weights = 1.0
  else:
    label_weights = array_ops.expand_dims(label_weights, 0)
    label_weights = weights_broadcast_ops.broadcast_weights(label_weights,
                                                            y_pred)
    if not multi_label:
      label_weights = array_ops.reshape(label_weights, [-1])
  weights = math_ops.multiply(sample_weights, label_weights)

  # We shouldn't need this, but in case there are predict value that is out of
  # the range of [0.0, 1.0]
  y_pred = clip_ops.clip_by_value(y_pred,
                                  clip_value_min=0.0, clip_value_max=1.0)

  y_true = math_ops.cast(math_ops.cast(y_true, dtypes.bool), y_true.dtype)
  if not multi_label:
    y_true = array_ops.reshape(y_true, [-1])
    y_pred = array_ops.reshape(y_pred, [-1])

  true_labels = math_ops.multiply(y_true, weights)
  false_labels = math_ops.multiply((1.0 - y_true), weights)

  # Compute the bucket indices for each prediction value.
  # Since the predict value has to be strictly greater than the thresholds,
  # eg, buckets like [0, 0.5], (0.5, 1], and 0.5 belongs to first bucket.
  # We have to use math.ceil(val) - 1 for the bucket.
  bucket_indices = math_ops.ceil(y_pred * (num_thresholds - 1)) - 1

  if thresholds_with_epsilon:
    # In this case, the first bucket should actually take into account since
    # the any prediction between [0.0, 1.0] should be larger than the first
    # threshold. We change the bucket value from -1 to 0.
    bucket_indices = nn_ops.relu(bucket_indices)

  bucket_indices = math_ops.cast(bucket_indices, dtypes.int32)

  if multi_label:
    # We need to run bucket segment sum for each of the label class. In the
    # multi_label case, the rank of the label is 2. We first transpose it so
    # that the label dim becomes the first and we can parallel run though them.
    true_labels = array_ops.transpose_v2(true_labels)
    false_labels = array_ops.transpose_v2(false_labels)
    bucket_indices = array_ops.transpose_v2(bucket_indices)

    def gather_bucket(label_and_bucket_index):
      label, bucket_index = label_and_bucket_index[0], label_and_bucket_index[1]
      return math_ops.unsorted_segment_sum(
          data=label, segment_ids=bucket_index, num_segments=num_thresholds)
    tp_bucket_v = vectorized_map(
        gather_bucket, (true_labels, bucket_indices))
    fp_bucket_v = vectorized_map(
        gather_bucket, (false_labels, bucket_indices))
    tp = array_ops.transpose_v2(
        math_ops.cumsum(tp_bucket_v, reverse=True, axis=1))
    fp = array_ops.transpose_v2(
        math_ops.cumsum(fp_bucket_v, reverse=True, axis=1))
  else:
    tp_bucket_v = math_ops.unsorted_segment_sum(
        data=true_labels, segment_ids=bucket_indices,
        num_segments=num_thresholds)
    fp_bucket_v = math_ops.unsorted_segment_sum(
        data=false_labels, segment_ids=bucket_indices,
        num_segments=num_thresholds)
    tp = math_ops.cumsum(tp_bucket_v, reverse=True)
    fp = math_ops.cumsum(fp_bucket_v, reverse=True)

  # fn = sum(true_labels) - tp
  # tn = sum(false_labels) - fp
  if (ConfusionMatrix.TRUE_NEGATIVES in variables_to_update or
      ConfusionMatrix.FALSE_NEGATIVES in variables_to_update):
    if multi_label:
      total_true_labels = math_ops.reduce_sum(true_labels, axis=1)
      total_false_labels = math_ops.reduce_sum(false_labels, axis=1)
    else:
      total_true_labels = math_ops.reduce_sum(true_labels)
      total_false_labels = math_ops.reduce_sum(false_labels)

  update_ops = []
  if ConfusionMatrix.TRUE_POSITIVES in variables_to_update:
    variable = variables_to_update[ConfusionMatrix.TRUE_POSITIVES]
    update_ops.append(variable.assign_add(tp))
  if ConfusionMatrix.FALSE_POSITIVES in variables_to_update:
    variable = variables_to_update[ConfusionMatrix.FALSE_POSITIVES]
    update_ops.append(variable.assign_add(fp))
  if ConfusionMatrix.TRUE_NEGATIVES in variables_to_update:
    variable = variables_to_update[ConfusionMatrix.TRUE_NEGATIVES]
    tn = total_false_labels - fp
    update_ops.append(variable.assign_add(tn))
  if ConfusionMatrix.FALSE_NEGATIVES in variables_to_update:
    variable = variables_to_update[ConfusionMatrix.FALSE_NEGATIVES]
    fn = total_true_labels - tp
    update_ops.append(variable.assign_add(fn))
  return control_flow_ops.group(update_ops)
Example #58
0
 def _loss(logits):
     """The loss of pairwise logits with l_i > l_j."""
     # TODO(xuanhui, pointer-team): Consider pass params object into the loss and
     # put a margin here.
     return nn_ops.relu(1. - logits)
Example #59
0
def sigmoid_cross_entropy_with_logits(logits, targets, name=None):
    """Computes sigmoid cross entropy given `logits`.

  Measures the probability error in discrete classification tasks in which each
  class is independent and not mutually exclusive.  For instance, one could
  perform multilabel classification where a picture can contain both an elephant
  and a dog at the same time.

  For brevity, let `x = logits`, `z = targets`.  The logistic loss is

        z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x))
      = z * -log(1 / (1 + exp(-x))) + (1 - z) * -log(exp(-x) / (1 + exp(-x)))
      = z * log(1 + exp(-x)) + (1 - z) * (-log(exp(-x)) + log(1 + exp(-x)))
      = z * log(1 + exp(-x)) + (1 - z) * (x + log(1 + exp(-x))
      = (1 - z) * x + log(1 + exp(-x))
      = x - x * z + log(1 + exp(-x))

  For x < 0, to avoid overflow in exp(-x), we reformulate the above

        x - x * z + log(1 + exp(-x))
      = log(exp(x)) - x * z + log(1 + exp(-x))
      = - x * z + log(1 + exp(x))

  Hence, to ensure stability and avoid overflow, the implementation uses this
  equivalent formulation

      max(x, 0) - x * z + log(1 + exp(-abs(x)))

  `logits` and `targets` must have the same type and shape.

  Args:
    logits: A `Tensor` of type `float32` or `float64`.
    targets: A `Tensor` of the same type and shape as `logits`.
    name: A name for the operation (optional).

  Returns:
    A `Tensor` of the same shape as `logits` with the componentwise
    logistic losses.

  Raises:
    ValueError: If `logits` and `targets` do not have the same shape.
  """
    with ops.op_scope([logits, targets], name, "logistic_loss") as name:
        logits = ops.convert_to_tensor(logits, name="logits")
        targets = ops.convert_to_tensor(targets, name="targets")
        try:
            targets.get_shape().merge_with(logits.get_shape())
        except ValueError:
            raise ValueError(
                "logits and targets must have the same shape (%s vs %s)" %
                (logits.get_shape(), targets.get_shape()))

        # The logistic loss formula from above is
        #   x - x * z + log(1 + exp(-x))
        # For x < 0, a more numerically stable formula is
        #   -x * z + log(1 + exp(x))
        # To avoid branching, we use the combined version
        #   max(x, 0) - x * z + log(1 + exp(-abs(x)))
        return math_ops.add(nn_ops.relu(logits) - logits * targets,
                            math_ops.log(1 +
                                         math_ops.exp(-math_ops.abs(logits))),
                            name=name)
    def _testScopedExport(self, test_dir, exported_filenames):
        graph = ops.Graph()
        with graph.as_default():
            # Creates an inference graph.
            # Hidden 1
            colocate_constraint = constant_op.constant(1.2, name="constraint")
            images = constant_op.constant(1.2,
                                          dtypes.float32,
                                          shape=[100, 28],
                                          name="images")
            with ops.name_scope("hidden1"):
                with graph.colocate_with(colocate_constraint.op):
                    weights1 = variables.Variable(random_ops.truncated_normal(
                        [28, 128], stddev=1.0 / math.sqrt(float(28))),
                                                  name="weights")
                # The use of control_flow_ops.cond here is purely for adding test
                # coverage the save and restore of control flow context (which doesn't
                # make any sense here from a machine learning perspective).  The typical
                # biases is a simple Variable without the conditions.
                biases1 = variables.Variable(control_flow_ops.cond(
                    math_ops.less(random.random(),
                                  0.5), lambda: array_ops.ones([128]),
                    lambda: array_ops.zeros([128])),
                                             name="biases")
                hidden1 = nn_ops.relu(
                    math_ops.matmul(images, weights1) + biases1)

            # Hidden 2
            with ops.name_scope("hidden2"):
                weights2 = variables.Variable(random_ops.truncated_normal(
                    [128, 32], stddev=1.0 / math.sqrt(float(128))),
                                              name="weights")

                # The use of control_flow_ops.while_loop here is purely for adding test
                # coverage the save and restore of control flow context (which doesn't
                # make any sense here from a machine learning perspective).  The typical
                # biases is a simple Variable without the conditions.
                def loop_cond(it, _):
                    return it < 2

                def loop_body(it, biases2):
                    biases2 += constant_op.constant(0.1, shape=[32])
                    return it + 1, biases2

                _, biases2 = control_flow_ops.while_loop(
                    loop_cond, loop_body, [
                        constant_op.constant(0),
                        variables.Variable(array_ops.zeros([32]),
                                           name="biases")
                    ])
                hidden2 = nn_ops.relu(
                    math_ops.matmul(hidden1, weights2) + biases2)
            # Linear
            with ops.name_scope("softmax_linear"):
                weights3 = variables.Variable(random_ops.truncated_normal(
                    [32, 10], stddev=1.0 / math.sqrt(float(32))),
                                              name="weights")
                biases3 = variables.Variable(array_ops.zeros([10]),
                                             name="biases")
                logits = math_ops.matmul(hidden2, weights3) + biases3
                ops.add_to_collection("logits", logits)

            # Exports each sub-graph.
            # Exports the first one with unbound_inputs_col_name set to default.
            orig_meta_graph1, var_list = meta_graph.export_scoped_meta_graph(
                filename=os.path.join(test_dir, exported_filenames[0]),
                graph=ops.get_default_graph(),
                export_scope="hidden1")
            self.assertEqual(["biases:0", "weights:0"],
                             sorted(var_list.keys()))
            var_names = [v.name for _, v in var_list.items()]
            self.assertEqual(["hidden1/biases:0", "hidden1/weights:0"],
                             sorted(var_names))

            # Exports the rest with no unbound_inputs_col_name.
            orig_meta_graph2, _ = meta_graph.export_scoped_meta_graph(
                filename=os.path.join(test_dir, exported_filenames[1]),
                graph=ops.get_default_graph(),
                export_scope="hidden2",
                unbound_inputs_col_name=None)
            orig_meta_graph3, _ = meta_graph.export_scoped_meta_graph(
                filename=os.path.join(test_dir, exported_filenames[2]),
                graph=ops.get_default_graph(),
                export_scope="softmax_linear",
                unbound_inputs_col_name=None)

        return [orig_meta_graph1, orig_meta_graph2, orig_meta_graph3]