Ejemplo n.º 1
0
    def GraphFn(self, inp):
        """Create a graph containing multiple segment."""
        dtype = inp.dtype
        conv_filter = constant_op.constant(
            [[[[1., 0.5, 4., 6., 0.5, 1.], [1., 0.5, 1., 1., 0.5, 1.]]]],
            name="weights",
            dtype=dtype)
        conv = nn.conv2d(input=inp,
                         filter=conv_filter,
                         strides=[1, 2, 2, 1],
                         padding="SAME",
                         name="conv")
        c1 = constant_op.constant(np.random.randn(12, 12, 6),
                                  dtype=dtype,
                                  name="c1")
        p = math_ops.mul(conv, c1, name="mul")
        c2 = constant_op.constant(np.random.randn(12, 12, 6),
                                  dtype=dtype,
                                  name="c2")
        q = math_ops.div(conv, c2, name="div")

        edge = self.trt_incompatible_op(q, name="incompatible")
        edge = math_ops.div(edge, edge, name="div1")
        r = math_ops.add(edge, edge, name="add")

        p = math_ops.sub(p, edge, name="sub")
        q = math_ops.mul(q, edge, name="mul1")
        s = math_ops.add(p, q, name="add1")
        s = math_ops.sub(s, r, name="sub1")
        return array_ops.squeeze(s, name="output_0")
Ejemplo n.º 2
0
def hinge_loss(logits, labels=None, scope=None, target=None):
    """Method that returns the loss tensor for hinge loss.

  Args:
    logits: The logits, a float tensor.
    labels: The ground truth output tensor. Its shape should match the shape of
      logits. The values of the tensor are expected to be 0.0 or 1.0.
    scope: The scope for the operations performed in computing the loss.
    target: Deprecated alias for `labels`.

  Returns:
    A `Tensor` of same shape as logits and target representing the loss values
      across the batch.

  Raises:
    ValueError: If the shapes of `logits` and `labels` don't match.
  """
    labels = _labels(labels, target)
    with ops.name_scope(scope, "hinge_loss", [logits, labels]) as scope:
        logits.get_shape().assert_is_compatible_with(labels.get_shape())
        # We first need to convert binary labels to -1/1 labels (as floats).
        labels = math_ops.to_float(labels)
        all_ones = array_ops.ones_like(labels)
        labels = math_ops.sub(2 * labels, all_ones)
        return nn_ops.relu(math_ops.sub(all_ones, math_ops.mul(labels,
                                                               logits)))
Ejemplo n.º 3
0
def hinge_loss(labels, logits, weights=1.0, scope=None,
               loss_collection=ops.GraphKeys.LOSSES):
  """Adds a hinge loss to the training procedure.

  WARNING: `weights` also supports dimensions of 1, but the broadcasting does
  not work as advertised, you'll wind up with weighted sum instead of weighted
  mean for any but the last dimension. This will be cleaned up soon, so please
  do not rely on the current behavior for anything but the shapes documented for
  `weights` below.

  Args:
    labels: The ground truth output tensor. Its shape should match the shape of
      logits. The values of the tensor are expected to be 0.0 or 1.0.
    logits: The logits, a float tensor.
    weights: Coefficients for the loss a scalar, a tensor of shape
      `[batch_size]` or a tensor whose shape matches `predictions`.
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.

  Returns:
    A scalar `Tensor` of the loss value.

  Raises:
    ValueError: If the shapes of `logits` and `labels` don't match.
  """
  with ops.name_scope(scope, "hinge_loss", [logits, labels]) as scope:
    logits.get_shape().assert_is_compatible_with(labels.get_shape())
    # We first need to convert binary labels to -1/1 labels (as floats).
    labels = math_ops.to_float(labels)
    all_ones = array_ops.ones_like(labels)
    labels = math_ops.sub(2 * labels, all_ones)
    losses = nn_ops.relu(math_ops.sub(all_ones, math_ops.mul(labels, logits)))
    return compute_weighted_loss(losses, weights, scope, loss_collection)
Ejemplo n.º 4
0
def hinge_loss_cap(logits, target, cap=10.0, scope=None):
  """Method that returns the loss tensor for capped hinge loss.

  Args:
    logits: The logits, a float tensor.
    target: The ground truth output tensor. Its shape should match the shape of
      logits. The values of the tensor are expected to be 0.0 or 1.0.
    cap: parameter for the hinge loss upper bound
    scope: The scope for the operations performed in computing the loss.

  Returns:
    A `Tensor` of same shape as logits and target representing the loss values
      across the batch.

  Raises:
    ValueError: If the shapes of `logits` and `target` don't match.
  """
  with ops.name_scope(scope) as scope:
    logits.get_shape().assert_is_compatible_with(target.get_shape())
    # We first need to convert binary labels to -1/1 labels (as floats).
    target = math_ops.to_float(target)
    all_ones = array_ops.ones_like(target)
    # convert labels into {1, -1} matrix
    labels = math_ops.sub(2 * target, all_ones)
    cross_prod = math_ops.mul(labels, logits)
    losses = nn_ops.relu(math_ops.sub(all_ones, cross_prod))
    #losses_cap = -nn_ops.relu(math_ops.sub(cap, losses))
    losses_cap = math_ops.minimum(cap, losses) 
    return losses_cap
Ejemplo n.º 5
0
def hinge_loss(labels,
               logits,
               weights=1.0,
               scope=None,
               loss_collection=ops.GraphKeys.LOSSES):
    """Adds a hinge loss to the training procedure.

  Args:
    labels: The ground truth output tensor. Its shape should match the shape of
      logits. The values of the tensor are expected to be 0.0 or 1.0.
    logits: The logits, a float tensor.
    weights: Coefficients for the loss a scalar, a tensor of shape
      [batch_size] or a tensor whose shape matches `predictions`.
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.

  Returns:
    A scalar `Tensor` of the loss value.

  Raises:
    ValueError: If the shapes of `logits` and `labels` don't match.
  """
    with ops.name_scope(scope, "hinge_loss", [logits, labels]) as scope:
        logits.get_shape().assert_is_compatible_with(labels.get_shape())
        # We first need to convert binary labels to -1/1 labels (as floats).
        labels = math_ops.to_float(labels)
        all_ones = array_ops.ones_like(labels)
        labels = math_ops.sub(2 * labels, all_ones)
        losses = nn_ops.relu(
            math_ops.sub(all_ones, math_ops.mul(labels, logits)))
        return compute_weighted_loss(losses, weights, scope, loss_collection)
def hinge_loss(logits, labels=None, scope=None, target=None):
  """Method that returns the loss tensor for hinge loss.

  Args:
    logits: The logits, a float tensor.
    labels: The ground truth output tensor. Its shape should match the shape of
      logits. The values of the tensor are expected to be 0.0 or 1.0.
    scope: The scope for the operations performed in computing the loss.
    target: Deprecated alias for `labels`.

  Returns:
    A `Tensor` of same shape as logits and target representing the loss values
      across the batch.

  Raises:
    ValueError: If the shapes of `logits` and `labels` don't match.
  """
  labels = _labels(labels, target)
  with ops.name_scope(scope, "hinge_loss", [logits, labels]) as scope:
    logits.get_shape().assert_is_compatible_with(labels.get_shape())
    # We first need to convert binary labels to -1/1 labels (as floats).
    labels = math_ops.to_float(labels)
    all_ones = array_ops.ones_like(labels)
    labels = math_ops.sub(2 * labels, all_ones)
    return nn_ops.relu(math_ops.sub(all_ones, math_ops.mul(labels, logits)))
Ejemplo n.º 7
0
def hinge_loss(labels, logits, weights=1.0, scope=None,
               loss_collection=ops.GraphKeys.LOSSES):
  """Adds a hinge loss to the training procedure.

  Args:
    labels: The ground truth output tensor. Its shape should match the shape of
      logits. The values of the tensor are expected to be 0.0 or 1.0.
    logits: The logits, a float tensor.
    weights: Coefficients for the loss a scalar, a tensor of shape
      [batch_size] or a tensor whose shape matches `predictions`.
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.

  Returns:
    A scalar `Tensor` of the loss value.

  Raises:
    ValueError: If the shapes of `logits` and `labels` don't match.
  """
  with ops.name_scope(scope, "hinge_loss", [logits, labels]) as scope:
    logits.get_shape().assert_is_compatible_with(labels.get_shape())
    # We first need to convert binary labels to -1/1 labels (as floats).
    labels = math_ops.to_float(labels)
    all_ones = array_ops.ones_like(labels)
    labels = math_ops.sub(2 * labels, all_ones)
    losses = nn_ops.relu(math_ops.sub(all_ones, math_ops.mul(labels, logits)))
    return compute_weighted_loss(losses, weights, scope, loss_collection)
Ejemplo n.º 8
0
def hingesig_tf(y_true, y_pred):
    """Computes the hingeles for a sigmoidal output by apply the logit to y_pred.
        Note: this function is intended for TENSORFLOW.

    Arguments:
        Arguments:
            y_true  -- a tensorflow tensor holding the true labels
            y_pred -- a tensorflow tensor holding the raw pradictions, i.e. the sigmoid output

    Returns:
        a tensorflow tensor with hingeloss
    """
    y_true = math_ops.to_float(y_true)

    all_ones = array_ops.ones_like(y_true)
    all_zeros = array_ops.zeros_like(y_true)
    y_true = math_ops.subtract(2 * y_true, all_ones)

    compl_y_pred = tf.clip_by_value(math_ops.sub(1., y_pred), 1e-20, 1)
    y_pred = tf.clip_by_value(y_pred, 1e-20, 1)
    logits = math_ops.log(math_ops.div(y_pred, compl_y_pred))
    logits_2 = math_ops.div(
        logits,
        math_ops.log(2.0),
    )
    return math_ops.mean(math_ops.maximum(
        math_ops.sub(1.0, math_ops.multiply(logits_2, y_true)), 0.0),
                         axis=-1)
Ejemplo n.º 9
0
  def testStripUnusedMultipleInputs(self):
    input_graph_name = "input_graph.pb"
    output_graph_name = "output_graph.pb"

    # We'll create an input graph that multiplies two input nodes.
    with ops.Graph().as_default():
      constant_node1 = constant_op.constant(1.0, name="constant_node1")
      constant_node2 = constant_op.constant(2.0, name="constant_node2")
      input_node1 = math_ops.sub(constant_node1, 3.0, name="input_node1")
      input_node2 = math_ops.sub(constant_node2, 5.0, name="input_node2")
      output_node = math_ops.multiply(
          input_node1, input_node2, name="output_node")
      math_ops.add(output_node, 2.0, name="later_node")
      sess = session.Session()
      output = sess.run(output_node)
      self.assertNear(6.0, output, 0.00001)
      graph_io.write_graph(sess.graph, self.get_temp_dir(), input_graph_name)

    # We save out the graph to disk, and then call the const conversion
    # routine.
    input_graph_path = os.path.join(self.get_temp_dir(), input_graph_name)
    input_binary = False
    input_node_names = "input_node1,input_node2"
    input_node_types = [
        dtypes.float32.as_datatype_enum, dtypes.float32.as_datatype_enum
    ]
    output_binary = True
    output_node_names = "output_node"
    output_graph_path = os.path.join(self.get_temp_dir(), output_graph_name)

    strip_unused_lib.strip_unused_from_files(input_graph_path, input_binary,
                                             output_graph_path, output_binary,
                                             input_node_names,
                                             output_node_names,
                                             input_node_types)

    # Now we make sure the variable is now a constant, and that the graph still
    # produces the expected result.
    with ops.Graph().as_default():
      output_graph_def = graph_pb2.GraphDef()
      with open(output_graph_path, "rb") as f:
        output_graph_def.ParseFromString(f.read())
        _ = importer.import_graph_def(output_graph_def, name="")

      self.assertEqual(3, len(output_graph_def.node))
      for node in output_graph_def.node:
        self.assertNotEqual("Add", node.op)
        self.assertNotEqual("Sub", node.op)
        if node.name == input_node_names:
          self.assertTrue("shape" in node.attr)

      with session.Session() as sess:
        input_node1 = sess.graph.get_tensor_by_name("input_node1:0")
        input_node2 = sess.graph.get_tensor_by_name("input_node2:0")
        output_node = sess.graph.get_tensor_by_name("output_node:0")
        output = sess.run(output_node,
                          feed_dict={input_node1: [10.0],
                                     input_node2: [-5.0]})
        self.assertNear(-50.0, output, 0.00001)
Ejemplo n.º 10
0
    def unregularized_loss(self, examples):
        """Add operations to compute the loss (without the regularization loss).

    Args:
      examples: Examples to compute unregularized loss on.

    Returns:
      An Operation that computes mean (unregularized) loss for given set of
      examples.

    Raises:
      ValueError: if examples are not well defined.
    """
        self._assertSpecified([
            'example_labels', 'example_weights', 'sparse_features',
            'dense_features'
        ], examples)
        self._assertList(['sparse_features', 'dense_features'], examples)
        with name_scope('sdca/unregularized_loss'):
            predictions = math_ops.cast(self._linear_predictions(examples),
                                        dtypes.float64)
            labels = math_ops.cast(
                internal_convert_to_tensor(examples['example_labels']),
                dtypes.float64)
            weights = math_ops.cast(
                internal_convert_to_tensor(examples['example_weights']),
                dtypes.float64)

            if self._options['loss_type'] == 'logistic_loss':
                return math_ops.reduce_sum(
                    math_ops.mul(
                        sigmoid_cross_entropy_with_logits(predictions, labels),
                        weights)) / math_ops.reduce_sum(weights)

            if self._options['loss_type'] in [
                    'hinge_loss', 'smooth_hinge_loss'
            ]:
                # hinge_loss = max{0, 1 - y_i w*x} where y_i \in {-1, 1}. So, we need to
                # first convert 0/1 labels into -1/1 labels.
                all_ones = array_ops.ones_like(predictions)
                adjusted_labels = math_ops.sub(2 * labels, all_ones)
                # Tensor that contains (unweighted) error (hinge loss) per
                # example.
                error = nn_ops.relu(
                    math_ops.sub(all_ones,
                                 math_ops.mul(adjusted_labels, predictions)))
                weighted_error = math_ops.mul(error, weights)
                return math_ops.reduce_sum(
                    weighted_error) / math_ops.reduce_sum(weights)

            # squared loss
            err = math_ops.sub(labels, predictions)

            weighted_squared_err = math_ops.mul(math_ops.square(err), weights)
            # SDCA squared loss function is sum(err^2) / (2*sum(weights))
            return (math_ops.reduce_sum(weighted_squared_err) /
                    (2.0 * math_ops.reduce_sum(weights)))
Ejemplo n.º 11
0
    def GetParams(self):
        """Create a graph containing multiple segment."""
        # TODO(aaroey): test graph with different dtypes.
        dtype = dtypes.float32
        input_name = "input"
        input_dims = [100, 24, 24, 2]
        g = ops.Graph()
        with g.as_default():
            inp = array_ops.placeholder(dtype=dtype,
                                        shape=[None] + input_dims[1:],
                                        name=input_name)
            with g.device("/GPU:0"):
                conv_filter = constant_op.constant(
                    [[[[1., 0.5, 4., 6., 0.5, 1.], [1., 0.5, 1., 1., 0.5, 1.]]]
                     ],
                    name="weights",
                    dtype=dtype)
                conv = nn.conv2d(input=inp,
                                 filter=conv_filter,
                                 strides=[1, 2, 2, 1],
                                 padding="SAME",
                                 name="conv")
                c1 = constant_op.constant(np.random.randn(
                    input_dims[0], 12, 12, 6),
                                          dtype=dtype,
                                          name="c1")
                p = math_ops.mul(conv, c1, name="mul")
                c2 = constant_op.constant(np.random.randn(
                    input_dims[0], 12, 12, 6),
                                          dtype=dtype,
                                          name="c2")
                q = math_ops.div(conv, c2, name="div")

                edge = self.trt_incompatible_op(q, name="incompatible")
                edge = math_ops.div(edge, edge, name="div1")
                r = math_ops.add(edge, edge, name="add")

                p = math_ops.sub(p, edge, name="sub")
                q = math_ops.mul(q, edge, name="mul1")
                s = math_ops.add(p, q, name="add1")
                s = math_ops.sub(s, r, name="sub1")
            array_ops.squeeze(s, name=self.output_name)
        return trt_test.TfTrtIntegrationTestParams(
            gdef=g.as_graph_def(),
            input_names=[input_name],
            input_dims=[input_dims],
            # TODO(aaroey): LayoutOptimizer adds additional nodes to the graph which
            # breaks the connection check, fix it.
            # - my_trt_op_0 should have ["mul", "sub", "div1", "mul1", "add1",
            #   "add", "sub1"];
            # - my_trt_op_1 should have ["weights","conv", "div"]
            expected_engines=["my_trt_op_0", "my_trt_op_1"],
            expected_output_dims=(100, 12, 12, 6),
            allclose_atol=1.e-03,
            allclose_rtol=1.e-03)
Ejemplo n.º 12
0
def _binary_hinge_loss(logits, target):
    """Method that returns the loss vector for binary hinge loss."""
    check_shape_op = logging_ops.Assert(
        math_ops.less_equal(array_ops.rank(target), 2),
        ["target's shape should be either [batch_size, 1] or [batch_size]"],
    )
    with ops.control_dependencies([check_shape_op]):
        target = array_ops.reshape(target, shape=[array_ops.shape(target)[0], 1])
    # First need to convert binary labels to -1/1 labels (as floats).
    all_ones = array_ops.ones_like(logits)
    labels = math_ops.sub(2 * math_ops.to_float(target), all_ones)
    loss_vec = nn_ops.relu(math_ops.sub(all_ones, math_ops.mul(labels, logits)))
    return loss_vec
Ejemplo n.º 13
0
  def unregularized_loss(self, examples):
    """Add operations to compute the loss (without the regularization loss).

    Args:
      examples: Examples to compute unregularized loss on.

    Returns:
      An Operation that computes mean (unregularized) loss for given set of
      examples.

    Raises:
      ValueError: if examples are not well defined.
    """
    self._assertSpecified(['example_labels', 'example_weights',
                           'sparse_features', 'dense_features'], examples)
    self._assertList(['sparse_features', 'dense_features'], examples)
    with name_scope('sdca/unregularized_loss'):
      predictions = math_ops.cast(
          self._linear_predictions(examples), dtypes.float64)
      labels = math_ops.cast(
          internal_convert_to_tensor(
              examples['example_labels']), dtypes.float64)
      weights = math_ops.cast(
          internal_convert_to_tensor(
              examples['example_weights']), dtypes.float64)

      if self._options['loss_type'] == 'logistic_loss':
        return math_ops.reduce_sum(math_ops.mul(
            sigmoid_cross_entropy_with_logits(predictions, labels),
            weights)) / math_ops.reduce_sum(weights)

      if self._options['loss_type'] in ['hinge_loss', 'smooth_hinge_loss']:
        # hinge_loss = max{0, 1 - y_i w*x} where y_i \in {-1, 1}. So, we need to
        # first convert 0/1 labels into -1/1 labels.
        all_ones = array_ops.ones_like(predictions)
        adjusted_labels = math_ops.sub(2 * labels, all_ones)
        # Tensor that contains (unweighted) error (hinge loss) per
        # example.
        error = nn_ops.relu(math_ops.sub(all_ones, math_ops.mul(adjusted_labels,
                                                                predictions)))
        weighted_error = math_ops.mul(error, weights)
        return math_ops.reduce_sum(weighted_error) / math_ops.reduce_sum(
            weights)

      # squared loss
      err = math_ops.sub(labels, predictions)

      weighted_squared_err = math_ops.mul(math_ops.square(err), weights)
      # SDCA squared loss function is sum(err^2) / (2*sum(weights))
      return (math_ops.reduce_sum(weighted_squared_err) /
              (2.0 * math_ops.reduce_sum(weights)))
Ejemplo n.º 14
0
    def GetParams(self):
        """Create a graph containing multiple segment."""
        # TODO(aaroey): test graph with different dtypes.
        dtype = dtypes.float32
        input_name = "input"
        input_dims = [100, 24, 24, 2]
        output_name = "output"
        g = ops.Graph()
        with g.as_default():
            inp = array_ops.placeholder(dtype=dtype,
                                        shape=[None] + input_dims[1:],
                                        name=input_name)
            with g.device("/GPU:0"):
                conv_filter = constant_op.constant(
                    [[[[1., 0.5, 4., 6., 0.5, 1.], [1., 0.5, 1., 1., 0.5, 1.]]]
                     ],
                    name="weights",
                    dtype=dtype)
                conv = nn.conv2d(input=inp,
                                 filter=conv_filter,
                                 strides=[1, 2, 2, 1],
                                 padding="SAME",
                                 name="conv")
                c1 = constant_op.constant(np.random.randn(
                    input_dims[0], 12, 12, 6),
                                          dtype=dtype,
                                          name="c1")
                p = math_ops.mul(conv, c1, name="mul")
                c2 = constant_op.constant(np.random.randn(
                    input_dims[0], 12, 12, 6),
                                          dtype=dtype,
                                          name="c2")
                q = math_ops.div(conv, c2, name="div")

                edge = self.trt_incompatible_op(q, name="incompatible")
                edge = math_ops.div(edge, edge, name="div1")
                r = math_ops.add(edge, edge, name="add")

                p = math_ops.sub(p, edge, name="sub")
                q = math_ops.mul(q, edge, name="mul1")
                s = math_ops.add(p, q, name="add1")
                s = math_ops.sub(s, r, name="sub1")
            array_ops.squeeze(s, name=output_name)
        return trt_test.TfTrtIntegrationTestParams(gdef=g.as_graph_def(),
                                                   input_names=[input_name],
                                                   input_dims=[input_dims],
                                                   output_names=[output_name],
                                                   expected_output_dims=[
                                                       (100, 12, 12, 6)
                                                   ])
Ejemplo n.º 15
0
  def GetParams(self):
    """Create a graph containing multiple segment."""
    # TODO(aaroey): test graph with different dtypes.
    dtype = dtypes.float32
    input_name = "input"
    input_dims = [100, 24, 24, 2]
    g = ops.Graph()
    with g.as_default():
      inp = array_ops.placeholder(
          dtype=dtype, shape=[None] + input_dims[1:], name=input_name)
      with g.device("/GPU:0"):
        conv_filter = constant_op.constant(
            [[[[1., 0.5, 4., 6., 0.5, 1.], [1., 0.5, 1., 1., 0.5, 1.]]]],
            name="weights",
            dtype=dtype)
        conv = nn.conv2d(
            input=inp,
            filter=conv_filter,
            strides=[1, 2, 2, 1],
            padding="SAME",
            name="conv")
        c1 = constant_op.constant(
            np.random.randn(input_dims[0], 12, 12, 6), dtype=dtype, name="c1")
        p = math_ops.mul(conv, c1, name="mul")
        c2 = constant_op.constant(
            np.random.randn(input_dims[0], 12, 12, 6), dtype=dtype, name="c2")
        q = math_ops.div(conv, c2, name="div")

        edge = self.trt_incompatible_op(q, name="incompatible")
        edge = math_ops.div(edge, edge, name="div1")
        r = math_ops.add(edge, edge, name="add")

        p = math_ops.sub(p, edge, name="sub")
        q = math_ops.mul(q, edge, name="mul1")
        s = math_ops.add(p, q, name="add1")
        s = math_ops.sub(s, r, name="sub1")
      array_ops.squeeze(s, name=self.output_name)
    return trt_test.TfTrtIntegrationTestParams(
        gdef=g.as_graph_def(),
        input_names=[input_name],
        input_dims=[input_dims],
        # TODO(aaroey): LayoutOptimizer adds additional nodes to the graph which
        # breaks the connection check, fix it.
        # - my_trt_op_0 should have ["mul", "sub", "div1", "mul1", "add1",
        #   "add", "sub1"];
        # - my_trt_op_1 should have ["weights","conv", "div"]
        expected_engines=["my_trt_op_0", "my_trt_op_1"],
        expected_output_dims=(100, 12, 12, 6),
        allclose_atol=1.e-03,
        allclose_rtol=1.e-03)
Ejemplo n.º 16
0
def _binary_hinge_loss(logits, target):
    """Method that returns the loss vector for binary hinge loss."""
    check_shape_op = logging_ops.Assert(
        math_ops.less_equal(array_ops.rank(target), 2),
        ["target's shape should be either [batch_size, 1] or [batch_size]"])
    with ops.control_dependencies([check_shape_op]):
        target = array_ops.reshape(target,
                                   shape=[array_ops.shape(target)[0], 1])
    # First need to convert binary labels to -1/1 labels (as floats).
    all_ones = array_ops.ones_like(logits)
    labels = math_ops.sub(2 * math_ops.to_float(target), all_ones)
    loss_vec = nn_ops.relu(math_ops.sub(all_ones, math_ops.mul(labels,
                                                               logits)))
    return loss_vec
 def GetParams(self):
     """Neighboring node wiring tests in TF-TRT conversion."""
     dtype = dtypes.float32
     input_name = "input"
     input_dims = [2, 3, 7, 5]
     output_name = "output"
     g = ops.Graph()
     with g.as_default():
         x = array_ops.placeholder(dtype=dtype,
                                   shape=input_dims,
                                   name=input_name)
         e = constant_op.constant(np.random.normal(.3, 0.05, [3, 2, 3, 4]),
                                  name="weights",
                                  dtype=dtype)
         conv = nn.conv2d(input=x,
                          filter=e,
                          data_format="NCHW",
                          strides=[1, 1, 1, 1],
                          padding="VALID",
                          name="conv")
         b = constant_op.constant(np.random.normal(1.0, 1.0, [1, 4, 1, 1]),
                                  name="bias",
                                  dtype=dtype)
         t = math_ops.mul(conv, b, name="mul")
         e = self.trt_incompatible_op(conv, name="incompatible")
         t = math_ops.sub(t, e, name="sub")
         array_ops.squeeze(t, name=output_name)
     return trt_test.TfTrtIntegrationTestParams(gdef=g.as_graph_def(),
                                                input_names=[input_name],
                                                input_dims=[input_dims],
                                                output_names=[output_name],
                                                expected_output_dims=[
                                                    (2, 4, 5, 4)
                                                ])
Ejemplo n.º 18
0
    def _resource_apply_sparse(self, grad, var, indices):
        var_dtype = var.dtype.base_dtype
        lr_t = self._decayed_lr(var_dtype)
        local_step = math_ops.cast(self.iterations + 1, var_dtype)
        m = self.get_slot(var, 'm')
        v = self.get_slot(var, 'v')
        beta_1_t = array_ops.identity(self._get_hyper('beta_1', var_dtype))
        beta_2_t = array_ops.identity(self._get_hyper('beta_2', var_dtype))
        beta_1_power = math_ops.pow(beta_1_t, local_step)
        beta_2_power = math_ops.pow(beta_2_t, local_step)
        epsilon_t = ops.convert_to_tensor(self.epsilon, var_dtype)
        total_iterations = self.total_iterations

        lr_t = lr_t * math_ops.sqrt(1 - beta_2_power) / (1 - beta_1_power)

        # Learning rate multipliers
        if self.lr_multipliers is not None:
            lr_t = _apply_lr_multiplier(self, lr_t, var)
        # Cosine annealing
        if self.use_cosine_annealing and total_iterations != 0:
            self.eta_t = _compute_eta_t(self)

        m_scaled_g_values = grad * (1 - beta_1_t)
        m_t = state_ops.assign(m, m * beta_1_t, use_locking=self._use_locking)
        with ops.control_dependencies([m_t]):
            m_t = self._resource_scatter_add(m, indices, m_scaled_g_values)

        v_scaled_g_values = (grad * grad) * (1 - beta_2_t)
        v_t = state_ops.assign(v, v * beta_2_t, use_locking=self._use_locking)
        with ops.control_dependencies([v_t]):
            v_t = self._resource_scatter_add(v, indices, v_scaled_g_values)

        if self.amsgrad:
            vhat = self.get_slot(var, 'vhat')
            vhat_t = state_ops.assign(vhat,
                                      math_ops.maximum(vhat, v_t),
                                      use_locking=self._use_locking)
            var_delta = m_t / (math_ops.sqrt(vhat_t) + epsilon_t)
        else:
            var_delta = m_t / (math_ops.sqrt(v_t) + epsilon_t)

        var_t = math_ops.sub(var, self.eta_t * lr_t * var_delta)

        # Weight decays
        if var.name in self.weight_decays.keys() and total_iterations != 0:
            var_t = _apply_weight_decays(self, var, var_t)

        iteration_done = self._updates_processed == (self._updates_per_iter - 1)
        _up = self._updates_processed
        self._updates_processed = (_up + 1) if not iteration_done else 0
        if iteration_done and not self._init_notified:
            self._init_notified = True

        var_update = state_ops.assign(var, var_t, use_locking=self._use_locking)
        t_cur = state_ops.assign_add(self.t_cur, int(iteration_done),
                                     use_locking=self._use_locking)
        updates = [var_update, m_t, v_t, t_cur]
        if self.amsgrad:
            updates.append(vhat_t)
        return control_flow_ops.group(*updates)
Ejemplo n.º 19
0
def normalize_moments(counts, mean_ss, variance_ss, shift, name=None):
  """Calculate the mean and variance of based on the sufficient statistics.

  Args:
    counts: A `Tensor` containing a the total count of the data (one value).
    mean_ss: A `Tensor` containing the mean sufficient statistics: the (possibly
      shifted) sum of the elements to average over.
    variance_ss: A `Tensor` containing the variance sufficient statistics: the
      (possibly shifted) squared sum of the data to compute the variance over.
    shift: A `Tensor` containing the value by which the data is shifted for
      numerical stability, or `None` if no shift was performed.
    name: Name used to scope the operations that compute the moments.

  Returns:
    Two `Tensor` objects: `mean` and `variance`.
  """
  with ops.op_scope([counts, mean_ss, variance_ss, shift], name, "normalize"):
    divisor = math_ops.inv(counts, name="divisor")
    if shift is not None:
      shifted_mean = math_ops.mul(mean_ss, divisor, name="shifted_mean")
      mean = math_ops.add(shifted_mean, shift, name="mean")
    else:  # no shift.
      shifted_mean = math_ops.mul(mean_ss, divisor, name="mean")
      mean = shifted_mean
    variance = math_ops.sub(
        math_ops.mul(variance_ss, divisor),
        math_ops.square(shifted_mean),
        name="variance")
  return (mean, variance)
Ejemplo n.º 20
0
def sum_of_squares(predictions, targets, weight=1.0, scope=None):
  """Adds a Sum-of-Squares loss to the training procedure.

  `weight` acts as a coefficient for the loss. If a scalar is provided, then the
  loss is simply scaled by the given value. If `weight` is a tensor of size
  [batch_size], then the total loss for each sample of the batch is rescaled
  by the corresponding element in the `weight` vector. If the shape of
  `weight` matches the shape of `predictions`, then the loss of each
  measurable element of `predictions` is scaled by the corresponding value of
  `weight`.

  Args:
    predictions: The predicted outputs.
    targets: The ground truth output tensor, same dimensions as 'predictions'.
    weight: Coefficients for the loss a scalar, a tensor of shape
      [batch_size] or a tensor whose shape matches `predictions`.
    scope: The scope for the operations performed in computing the loss.

  Returns:
    A scalar `Tensor` representing the loss value.

  Raises:
    ValueError: If the shape of `predictions` doesn't match that of `targets` or
      if the shape of `weight` is invalid.
  """
  with ops.op_scope([predictions, targets],
                    scope, "sum_of_squares_loss") as scope:
    predictions.get_shape().assert_is_compatible_with(targets.get_shape())
    if weight is None:
      raise ValueError("`weight` cannot be None")
    predictions = math_ops.to_float(predictions)
    targets = math_ops.to_float(targets)
    losses = math_ops.square(math_ops.sub(predictions, targets))
    return _compute_weighted_loss(losses, weight)
Ejemplo n.º 21
0
  def unregularized_loss(self, examples):
    """Add operations to compute the loss (without the regularization loss).

        Args:
          examples: Examples to compute unregularized loss on.

        Returns:
          An Operation that computes mean (unregularized) loss for given set of
          examples.
        Raises:
          ValueError: if examples are not well defined.
        """
    self._assertSpecified(
        ['example_labels', 'example_weights', 'sparse_features',
         'dense_features'], examples)
    self._assertList(['sparse_features', 'dense_features'], examples)
    with name_scope('sdca/unregularized_loss'):
      predictions = self._linear_predictions(examples)
      labels = convert_to_tensor(examples['example_labels'])
      weights = convert_to_tensor(examples['example_weights'])

      if self._options['loss_type'] == 'logistic_loss':
        return math_ops.reduce_sum(math_ops.mul(
            sigmoid_cross_entropy_with_logits(
                predictions, labels), weights)) / math_ops.reduce_sum(weights)

      # squared loss
      err = math_ops.sub(labels, predictions)

      weighted_squared_err = math_ops.mul(math_ops.square(err), weights)
      return (math_ops.reduce_sum(weighted_squared_err) /
              math_ops.reduce_sum(weights))
Ejemplo n.º 22
0
def _ragged_substr(text_input, begin, end):
    text_input_flat = None
    if ragged_tensor.is_ragged(text_input):
        text_input_flat = text_input.flat_values
    else:
        text_input_flat = text_input

    def _ragged_tile(x):
        input_text, indices = x
        multiple = math_ops.reduce_sum(indices.row_lengths())
        return array_ops.tile([input_text], [multiple])

    broadcasted_text = ragged_map_ops.map_fn(
        _ragged_tile,
        (text_input_flat, begin),
        dtype=ragged_tensor.RaggedTensorType(dtype=dtypes.string,
                                             ragged_rank=1),
        infer_shape=False,
    )
    size = math_ops.sub(array_ops.squeeze(end.flat_values),
                        array_ops.squeeze(begin.flat_values))
    new_tokens = string_ops.substr_v2(broadcasted_text,
                                      array_ops.squeeze(begin.flat_values),
                                      size)
    return begin.with_flat_values(new_tokens.flat_values)
Ejemplo n.º 23
0
    def unregularized_loss(self, examples):
        """Add operations to compute the loss (without the regularization loss).

        Args:
          examples: Examples to compute unregularized loss on.

        Returns:
          An Operation that computes mean (unregularized) loss for given set of
          examples.
        Raises:
          ValueError: if examples are not well defined.
        """
        self._assertSpecified([
            'example_labels', 'example_weights', 'sparse_features',
            'dense_features'
        ], examples)
        self._assertList(['sparse_features', 'dense_features'], examples)
        with name_scope('sdca/unregularized_loss'):
            predictions = self._linear_predictions(examples)
            labels = convert_to_tensor(examples['example_labels'])
            weights = convert_to_tensor(examples['example_weights'])

            if self._options['loss_type'] == 'logistic_loss':
                return math_ops.reduce_sum(
                    math_ops.mul(
                        sigmoid_cross_entropy_with_logits(predictions, labels),
                        weights)) / math_ops.reduce_sum(weights)

            # squared loss
            err = math_ops.sub(labels, predictions)

            weighted_squared_err = math_ops.mul(math_ops.square(err), weights)
            return (math_ops.reduce_sum(weighted_squared_err) /
                    math_ops.reduce_sum(weights))
 def GetParams(self):
   """Neighboring node wiring tests in TF-TRT conversion."""
   dtype = dtypes.float32
   input_name = "input"
   input_dims = [2, 3, 7, 5]
   g = ops.Graph()
   with g.as_default():
     x = array_ops.placeholder(dtype=dtype, shape=input_dims, name=input_name)
     e = constant_op.constant(
         np.random.normal(.3, 0.05, [3, 2, 3, 4]), name="weights", dtype=dtype)
     conv = nn.conv2d(
         input=x,
         filter=e,
         data_format="NCHW",
         strides=[1, 1, 1, 1],
         padding="VALID",
         name="conv")
     b = constant_op.constant(
         np.random.normal(1.0, 1.0, [1, 4, 1, 1]), name="bias", dtype=dtype)
     t = math_ops.mul(conv, b, name="mul")
     e = self.trt_incompatible_op(conv, name="incompatible")
     t = math_ops.sub(t, e, name="sub")
     array_ops.squeeze(t, name=self.output_name)
   return trt_test.TfTrtIntegrationTestParams(
       gdef=g.as_graph_def(),
       input_names=[input_name],
       input_dims=[input_dims],
       expected_engines={
           "my_trt_op_0": ["bias", "mul", "sub"],
           "my_trt_op_1": ["weights", "conv"]
       },
       expected_output_dims=(2, 4, 5, 4),
       allclose_atol=1.e-03,
       allclose_rtol=1.e-03)
Ejemplo n.º 25
0
def normalize_moments(counts, mean_ss, variance_ss, shift, name=None):
  """Calculate the mean and variance of based on the sufficient statistics.

  Args:
    counts: A `Tensor` containing a the total count of the data (one value).
    mean_ss: A `Tensor` containing the mean sufficient statistics: the (possibly
      shifted) sum of the elements to average over.
    variance_ss: A `Tensor` containing the variance sufficient statistics: the
      (possibly shifted) squared sum of the data to compute the variance over.
    shift: A `Tensor` containing the value by which the data is shifted for
      numerical stability, or `None` if no shift was performed.
    name: Name used to scope the operations that compute the moments.

  Returns:
    Two `Tensor` objects: `mean` and `variance`.
  """
  with ops.op_scope([counts, mean_ss, variance_ss, shift], name, "normalize"):
    divisor = math_ops.inv(counts, name="divisor")
    if shift is not None:
      shifted_mean = math_ops.mul(mean_ss, divisor, name="shifted_mean")
      mean = math_ops.add(shifted_mean, shift, name="mean")
    else:  # no shift.
      shifted_mean = math_ops.mul(mean_ss, divisor, name="mean")
      mean = shifted_mean
    variance = math_ops.sub(
        math_ops.mul(variance_ss, divisor),
        math_ops.square(shifted_mean),
        name="variance")
  return (mean, variance)
Ejemplo n.º 26
0
def mean_squared_error(predictions, labels=None, weights=1.0, scope=None):
  """Adds a Sum-of-Squares loss to the training procedure.

  `weights` acts as a coefficient for the loss. If a scalar is provided, then
  the loss is simply scaled by the given value. If `weights` is a tensor of size
  [batch_size], then the total loss for each sample of the batch is rescaled
  by the corresponding element in the `weights` vector. If the shape of
  `weights` matches the shape of `predictions`, then the loss of each
  measurable element of `predictions` is scaled by the corresponding value of
  `weights`.

  Args:
    predictions: The predicted outputs.
    labels: The ground truth output tensor, same dimensions as 'predictions'.
    weights: Coefficients for the loss a scalar, a tensor of shape
      [batch_size] or a tensor whose shape matches `predictions`.
    scope: The scope for the operations performed in computing the loss.

  Returns:
    A scalar `Tensor` representing the loss value.

  Raises:
    ValueError: If the shape of `predictions` doesn't match that of `labels` or
      if the shape of `weights` is invalid.
  """
  with ops.name_scope(scope, "mean_squared_error",
                      [predictions, labels, weights]) as scope:
    predictions.get_shape().assert_is_compatible_with(labels.get_shape())
    predictions = math_ops.to_float(predictions)
    labels = math_ops.to_float(labels)
    losses = math_ops.square(math_ops.sub(predictions, labels))
    return compute_weighted_loss(losses, weights, scope=scope)
Ejemplo n.º 27
0
def absolute_difference(predictions, labels=None, weights=1.0, scope=None):
  """Adds an Absolute Difference loss to the training procedure.

  `weights` acts as a coefficient for the loss. If a scalar is provided, then
  the loss is simply scaled by the given value. If `weights` is a tensor of size
  [batch_size], then the total loss for each sample of the batch is rescaled
  by the corresponding element in the `weights` vector. If the shape of
  `weights` matches the shape of `predictions`, then the loss of each
  measurable element of `predictions` is scaled by the corresponding value of
  `weights`.

  Args:
    predictions: The predicted outputs.
    labels: The ground truth output tensor, same dimensions as 'predictions'.
    weights: Coefficients for the loss a scalar, a tensor of shape
      [batch_size] or a tensor whose shape matches `predictions`.
    scope: The scope for the operations performed in computing the loss.

  Returns:
    A scalar `Tensor` representing the loss value.

  Raises:
    ValueError: If the shape of `predictions` doesn't match that of `labels` or
      if the shape of `weights` is invalid.
  """
  with ops.name_scope(scope, "absolute_difference",
                      [predictions, labels, weights]) as scope:
    predictions.get_shape().assert_is_compatible_with(labels.get_shape())
    predictions = math_ops.to_float(predictions)
    labels = math_ops.to_float(labels)
    losses = math_ops.abs(math_ops.sub(predictions, labels))
    return compute_weighted_loss(losses, weights, scope=scope)
Ejemplo n.º 28
0
def sum_of_squares(predictions, targets, weight=1.0, scope=None):
  """Adds a Sum-of-Squares loss to the training procedure.

  `weight` acts as a coefficient for the loss. If a scalar is provided, then the
  loss is simply scaled by the given value. If `weight` is a tensor of size
  [batch_size], then the total loss for each sample of the batch is rescaled
  by the corresponding element in the `weight` vector. If the shape of
  `weight` matches the shape of `predictions`, then the loss of each
  measurable element of `predictions` is scaled by the corresponding value of
  `weight`.

  Args:
    predictions: The predicted outputs.
    targets: The ground truth output tensor, same dimensions as 'predictions'.
    weight: Coefficients for the loss a scalar, a tensor of shape
      [batch_size] or a tensor whose shape matches `predictions`.
    scope: The scope for the operations performed in computing the loss.

  Returns:
    A scalar `Tensor` representing the loss value.

  Raises:
    ValueError: If the shape of `predictions` doesn't match that of `targets` or
      if the shape of `weight` is invalid.
  """
  with ops.name_scope(scope, "sum_of_squares_loss",
                      [predictions, targets]) as scope:
    predictions.get_shape().assert_is_compatible_with(targets.get_shape())
    if weight is None:
      raise ValueError("`weight` cannot be None")
    predictions = math_ops.to_float(predictions)
    targets = math_ops.to_float(targets)
    losses = math_ops.square(math_ops.sub(predictions, targets))
    return compute_weighted_loss(losses, weight)
Ejemplo n.º 29
0
def convert_PN_enc(target):
  """ Convert label encoding from one-hot 0-1 encoding to +1, -1 encoding
  """
  all_ones = array_ops.ones_like(target)
  # convert labels into {1, -1} matrix
  labels = math_ops.sub(2 * target, all_ones)
  return labels
Ejemplo n.º 30
0
def mean_squared_error(labels, predictions, weights=1.0, scope=None,
                       loss_collection=ops.GraphKeys.LOSSES):
  """Adds a Sum-of-Squares loss to the training procedure.

  `weight` acts as a coefficient for the loss. If a scalar is provided, then the
  loss is simply scaled by the given value. If `weight` is a tensor of size
  [batch_size], then the total loss for each sample of the batch is rescaled
  by the corresponding element in the `weight` vector. If the shape of
  `weight` matches the shape of `predictions`, then the loss of each
  measurable element of `predictions` is scaled by the corresponding value of
  `weight`.

  Args:
    labels: The ground truth output tensor, same dimensions as 'predictions'.
    predictions: The predicted outputs.
    weights: Coefficients for the loss a scalar, a tensor of shape
      [batch_size] or a tensor whose shape matches `predictions`.
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.

  Returns:
    A scalar `Tensor` representing the loss value.

  Raises:
    ValueError: If the shape of `predictions` doesn't match that of `labels` or
      if the shape of `weight` is invalid.
  """
  with ops.name_scope(scope, "mean_squared_error",
                      [predictions, labels, weights]) as scope:
    predictions.get_shape().assert_is_compatible_with(labels.get_shape())
    predictions = math_ops.to_float(predictions)
    labels = math_ops.to_float(labels)
    losses = math_ops.square(math_ops.sub(predictions, labels))
    return compute_weighted_loss(losses, weights, scope, loss_collection)
Ejemplo n.º 31
0
    def _resource_apply_sparse(self, grad, var, indices):
        var_dtype = var.dtype.base_dtype
        lr_t = self._decayed_lr(var_dtype)
        local_step = math_ops.cast(self.iterations + 1, var_dtype)
        m = self.get_slot(var, 'm')
        v = self.get_slot(var, 'v')
        beta_1_t = array_ops.identity(self._get_hyper('beta_1', var_dtype))
        beta_2_t = array_ops.identity(self._get_hyper('beta_2', var_dtype))
        beta_1_power = math_ops.pow(beta_1_t, local_step)
        beta_2_power = math_ops.pow(beta_2_t, local_step)
        epsilon_t = ops.convert_to_tensor(self.epsilon, var_dtype)

        lr_t = lr_t * math_ops.sqrt(1 - beta_2_power) / (1 - beta_1_power)
        # Learning rate multipliers
        if self.lr_multipliers is not None:
            lr_t = _apply_lr_multiplier(self, lr_t, var)

        m_scaled_g_values = grad * (1 - beta_1_t)
        m_t = state_ops.assign(m, m * beta_1_t, use_locking=self._use_locking)
        with ops.control_dependencies([m_t]):
            m_t = self._resource_scatter_add(m, indices, m_scaled_g_values)

        v_scaled_g_values = (grad * grad) * (1 - beta_2_t)
        v_t = state_ops.assign(v, v * beta_2_t, use_locking=self._use_locking)
        with ops.control_dependencies([v_t]):
            v_t = self._resource_scatter_add(v, indices, v_scaled_g_values)

        if self.amsgrad:
            v_hat = self.get_slot(var, 'vhat')
            v_hat_t = math_ops.maximum(v_hat, v_t)
            with ops.control_dependencies([v_hat_t]):
                v_hat_t = state_ops.assign(
                    v_hat, v_hat_t, use_locking=self._use_locking)
            v_hat_sqrt = math_ops.sqrt(v_hat_t)
            var_delta = m_t / (v_hat_sqrt + epsilon_t)
        else:
            v_sqrt = math_ops.sqrt(v_t)
            var_delta = m_t / (v_sqrt + epsilon_t)

        var_t = math_ops.sub(var, self.eta_t * lr_t * var_delta)

        # Weight decays
        if var.name in self.weight_decays.keys():
            var_t = _apply_weight_decays(self, var, var_t)

        var_update = state_ops.assign(var, var_t, use_locking=self._use_locking)

        # Cosine annealing
        (iteration_done, t_cur_update, eta_t_update
         ) = _update_t_cur_eta_t_v2(self, lr_t, var)
        if iteration_done and not self._init_notified:
            self._init_notified = True

        updates = [var_update, m_t, v_t]
        if iteration_done:
            updates += [t_cur_update]
        if self.use_cosine_annealing and iteration_done:
            updates += [eta_t_update]
        return control_flow_ops.group(*updates)
Ejemplo n.º 32
0
  def testFindNodesWithBadTensorValues(self):
    with session.Session() as sess:
      u_name = "testFindNodesWithBadTensorValues/u"
      v_name = "testFindNodesWithBadTensorValues/v"
      w_name = "testFindNodesWithBadTensorValues/w"
      x_name = "testFindNodesWithBadTensorValues/x"
      y_name = "testFindNodesWithBadTensorValues/y"
      z_name = "testFindNodesWithBadTensorValues/z"

      u_init = constant_op.constant([2.0, 4.0])
      u = variables.Variable(u_init, name=u_name)
      v_init = constant_op.constant([2.0, 1.0])
      v = variables.Variable(v_init, name=v_name)

      # Expected output: [0.0, 3.0]
      w = math_ops.sub(u, v, name=w_name)

      # Expected output: [inf, 1.3333]
      x = math_ops.div(u, w, name=x_name)

      # Expected output: [nan, 4.0]
      y = math_ops.mul(w, x, name=y_name)

      z = math_ops.mul(y, y, name=z_name)

      u.initializer.run()
      v.initializer.run()

      run_options = config_pb2.RunOptions()
      debug_utils.watch_graph(
          run_options,
          sess.graph,
          debug_ops=["DebugIdentity"],
          debug_urls="file://%s" % self._dump_root)

      run_metadata = config_pb2.RunMetadata()
      sess.run(z, options=run_options, run_metadata=run_metadata)

      dump = debug_data.DebugDumpDir(self._dump_root)

      def has_bad_value(_, tensor):
        return np.any(np.isnan(tensor)) or np.any(np.isinf(tensor))

      # Find all "offending tensors".
      bad_data = dump.find(has_bad_value)

      # Verify that the nodes with bad values are caught through running find
      # on the debug dump.
      self.assertEqual(3, len(bad_data))
      self.assertEqual(x_name, bad_data[0].node_name)
      self.assertEqual(y_name, bad_data[1].node_name)
      self.assertEqual(z_name, bad_data[2].node_name)

      # Test first_n kwarg of find(): Find the first offending tensor.
      first_bad_datum = dump.find(has_bad_value, first_n=1)

      self.assertEqual(1, len(first_bad_datum))
      self.assertEqual(x_name, first_bad_datum[0].node_name)
Ejemplo n.º 33
0
    def testFindNodesWithBadTensorValues(self):
        with session.Session() as sess:
            u_name = "testFindNodesWithBadTensorValues/u"
            v_name = "testFindNodesWithBadTensorValues/v"
            w_name = "testFindNodesWithBadTensorValues/w"
            x_name = "testFindNodesWithBadTensorValues/x"
            y_name = "testFindNodesWithBadTensorValues/y"
            z_name = "testFindNodesWithBadTensorValues/z"

            u_init = constant_op.constant([2.0, 4.0])
            u = variables.Variable(u_init, name=u_name)
            v_init = constant_op.constant([2.0, 1.0])
            v = variables.Variable(v_init, name=v_name)

            # Expected output: [0.0, 3.0]
            w = math_ops.sub(u, v, name=w_name)

            # Expected output: [inf, 1.3333]
            x = math_ops.div(u, w, name=x_name)

            # Expected output: [nan, 4.0]
            y = math_ops.mul(w, x, name=y_name)

            z = math_ops.mul(y, y, name=z_name)

            u.initializer.run()
            v.initializer.run()

            run_options = config_pb2.RunOptions()
            debug_utils.watch_graph(run_options,
                                    sess.graph,
                                    debug_ops=["DebugIdentity"],
                                    debug_urls="file://%s" % self._dump_root)

            run_metadata = config_pb2.RunMetadata()
            sess.run(z, options=run_options, run_metadata=run_metadata)

            dump = debug_data.DebugDumpDir(self._dump_root)

            def has_bad_value(_, tensor):
                return np.any(np.isnan(tensor)) or np.any(np.isinf(tensor))

            # Find all "offending tensors".
            bad_data = dump.find(has_bad_value)

            # Verify that the nodes with bad values are caught through running find
            # on the debug dump.
            self.assertEqual(3, len(bad_data))
            self.assertEqual(x_name, bad_data[0].node_name)
            self.assertEqual(y_name, bad_data[1].node_name)
            self.assertEqual(z_name, bad_data[2].node_name)

            # Test first_n kwarg of find(): Find the first offending tensor.
            first_bad_datum = dump.find(has_bad_value, first_n=1)

            self.assertEqual(1, len(first_bad_datum))
            self.assertEqual(x_name, first_bad_datum[0].node_name)
Ejemplo n.º 34
0
def _AcosGrad(op, grad):
  """Returns grad * -1/sqrt(1-x^2)."""
  x = op.inputs[0]
  with ops.control_dependencies([grad.op]):
    x2 = math_ops.square(x)
    one = constant_op.constant(1, dtype=grad.dtype)
    den = math_ops.sqrt(math_ops.sub(one, x2))
    inv = math_ops.inv(den)
    return -grad * inv
Ejemplo n.º 35
0
    def testStripUnused(self):
        input_graph_name = "input_graph.pb"
        output_graph_name = "output_graph.pb"

        # We'll create an input graph that has a single constant containing 1.0,
        # and that then multiplies it by 2.
        with ops.Graph().as_default():
            constant_node = constant_op.constant(1.0, name="constant_node")
            wanted_input_node = math_ops.sub(constant_node,
                                             3.0,
                                             name="wanted_input_node")
            output_node = math_ops.multiply(wanted_input_node,
                                            2.0,
                                            name="output_node")
            math_ops.add(output_node, 2.0, name="later_node")
            sess = session.Session()
            output = sess.run(output_node)
            self.assertNear(-4.0, output, 0.00001)
            graph_io.write_graph(sess.graph, self.get_temp_dir(),
                                 input_graph_name)

        # We save out the graph to disk, and then call the const conversion
        # routine.
        input_graph_path = os.path.join(self.get_temp_dir(), input_graph_name)
        input_binary = False
        input_node_names = "wanted_input_node"
        output_binary = True
        output_node_names = "output_node"
        output_graph_path = os.path.join(self.get_temp_dir(),
                                         output_graph_name)

        strip_unused_lib.strip_unused_from_files(
            input_graph_path, input_binary, output_graph_path, output_binary,
            input_node_names, output_node_names,
            dtypes.float32.as_datatype_enum)

        # Now we make sure the variable is now a constant, and that the graph still
        # produces the expected result.
        with ops.Graph().as_default():
            output_graph_def = graph_pb2.GraphDef()
            with open(output_graph_path, "rb") as f:
                output_graph_def.ParseFromString(f.read())
                _ = importer.import_graph_def(output_graph_def, name="")

            self.assertEqual(3, len(output_graph_def.node))
            for node in output_graph_def.node:
                self.assertNotEqual("Add", node.op)
                self.assertNotEqual("Sub", node.op)
                if node.name == input_node_names:
                    self.assertTrue("shape" in node.attr)

            with session.Session() as sess:
                input_node = sess.graph.get_tensor_by_name(
                    "wanted_input_node:0")
                output_node = sess.graph.get_tensor_by_name("output_node:0")
                output = sess.run(output_node, feed_dict={input_node: [10.0]})
                self.assertNear(20.0, output, 0.00001)
Ejemplo n.º 36
0
def _AcosGrad(op, grad):
    """Returns grad * -1/sqrt(1-x^2)."""
    x = op.inputs[0]
    with ops.control_dependencies([grad.op]):
        x2 = math_ops.square(x)
        one = constant_op.constant(1, dtype=grad.dtype)
        den = math_ops.sqrt(math_ops.sub(one, x2))
        inv = math_ops.inv(den)
        return -grad * inv
Ejemplo n.º 37
0
  def GetParams(self):
    """Create a graph containing multiple segment."""
    # TODO(aaroey): test graph with different dtypes.
    dtype = dtypes.float32
    input_name = "input"
    input_dims = [100, 24, 24, 2]
    output_name = "output"
    g = ops.Graph()
    with g.as_default():
      inp = array_ops.placeholder(
          dtype=dtype, shape=input_dims, name=input_name)
      with g.device("/GPU:0"):
        conv_filter = constant_op.constant(
            [[[[1., 0.5, 4., 6., 0.5, 1.], [1., 0.5, 1., 1., 0.5, 1.]]]],
            name="weights",
            dtype=dtype)
        conv = nn.conv2d(
            input=inp,
            filter=conv_filter,
            strides=[1, 2, 2, 1],
            padding="SAME",
            name="conv")
        c1 = constant_op.constant(
            np.random.randn(12, 12, 6), dtype=dtype, name="c1")
        p = math_ops.mul(conv, c1, name="mul")
        c2 = constant_op.constant(
            np.random.randn(12, 12, 6), dtype=dtype, name="c2")
        q = math_ops.div(conv, c2, name="div")

        edge = self.trt_incompatible_op(q, name="incompatible")
        edge = math_ops.div(edge, edge, name="div1")
        r = math_ops.add(edge, edge, name="add")

        p = math_ops.sub(p, edge, name="sub")
        q = math_ops.mul(q, edge, name="mul1")
        s = math_ops.add(p, q, name="add1")
        s = math_ops.sub(s, r, name="sub1")
      array_ops.squeeze(s, name=output_name)
    return trt_test.TfTrtIntegrationTestParams(
        gdef=g.as_graph_def(),
        input_names=[input_name],
        input_dims=[input_dims],
        output_names=[output_name],
        expected_output_dims=[(100, 12, 12, 6)])
Ejemplo n.º 38
0
def moments(x, axes, name=None):
    """Calculate the mean and variance of `x`.

  The mean and variance are calculated by aggregating the contents of `x`
  across `axes`.  If `x` is 1-D and `axes = [0]` this is just the mean
  and variance of a vector.

  For so-called "global normalization" needed for convolutional filters pass
  `axes=[0, 1, 2]` (batch, height, width).  For batch normalization pass
  `axes=[0]` (batch).

  Args:
    x: A `Tensor`.
    axes: array of ints.  Axes along which to compute mean and
      variance.
    name: Name used to scope the operations that compute the moments.

  Returns:
    Two `Tensor` objects: `mean` and `variance`.
  """
    with ops.op_scope([x, axes], name, "moments"):
        x = ops.convert_to_tensor(x, name="x")
        x_shape = x.get_shape()
        if all(x_shape[d].value is not None for d in axes):
            # The shape is known in the relevant axes, so we can statically
            # compute the divisor.
            divisor = 1.0
            for d in set(axes):
                divisor *= x.get_shape()[d].value
            divisor = constant_op.constant(1.0 / divisor,
                                           x.dtype,
                                           name="divisor")
        else:
            divisor = constant_op.constant(1.0, dtype=x.dtype)
            x_dynamic_shape = array_ops.shape(x)
            for d in set(axes):
                divisor *= math_ops.cast(x_dynamic_shape[d], x.dtype)
            divisor = math_ops.inv(divisor, name="divisor")
        axes = constant_op.constant(axes, name="axes")
        # Note: We do not use Mean here because it is very slow on GPU.
        # Note 2: The expression below is potentially more stable.
        # It is however a bit slower and stability doesn't appear to be an issue.
        # mean = math_ops.reduce_sum(math_ops.mul(x, divisor), axes, name="mean")
        # var = math_ops.reduce_sum(math_ops.mul(math_ops.square(x - mean),
        #                                        divisor), axes,
        #                    name="variance")
        mean = math_ops.mul(math_ops.reduce_sum(x, axes), divisor, name="mean")
        # Give x-mean a specific name, so the caller might take advantage of it.
        # The caller should have a fallback plan, however: this tensor may not be
        # available if this function implementation changes.
        x_centered = math_ops.sub(x, mean, name="x_centered")
        var = math_ops.mul(math_ops.reduce_sum(math_ops.square(x_centered),
                                               axes),
                           divisor,
                           name="variance")
        return mean, var
Ejemplo n.º 39
0
def sufficient_statistics(x, axes, shift=None, keep_dims=False, name=None):
  """Calculate the sufficient statistics for the mean and variance of `x`.

  These sufficient statistics are computed using the one pass algorithm on
  an input that's optionally shifted. See:
  https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Computing_shifted_data

  Args:
    x: A `Tensor`.
    axes: Array of ints. Axes along which to compute mean and variance.
    shift: A `Tensor` containing the value by which to shift the data for
      numerical stability, or `None` if no shift is to be performed. A shift
      close to the true mean provides the most numerically stable results.
    keep_dims: produce statistics with the same dimensionality as the input.
    name: Name used to scope the operations that compute the sufficient stats.

  Returns:
    Four `Tensor` objects of the same type as `x`:
    * the count (number of elements to average over).
    * the (possibly shifted) sum of the elements in the array.
    * the (possibly shifted) sum of squares of the elements in the array.
    * the shift by which the mean must be corrected or None if `shift` is None.
  """
  with ops.op_scope([x, axes, shift], name, "sufficient_statistics"):
    x = ops.convert_to_tensor(x, name="x")
    x_shape = x.get_shape()
    if x_shape.is_fully_defined():
      counts = 1
      m_shape = []
      for d in xrange(x_shape.ndims):
        dim = x_shape[d].value
        if d in set(axes):
          counts *= dim
          dim = 1
        m_shape.append(dim)
      counts = constant_op.constant(counts, dtype=x.dtype)
    else:  # shape needs to be inferred at runtime.
      x_shape = array_ops.shape(x)
      select_axes = sparse_ops.sparse_to_dense(axes, array_ops.shape(x_shape),
                                               True, False)
      m_shape = math_ops.select(select_axes, array_ops.ones_like(x_shape),
                                x_shape)
      counts = math_ops.cast(
          math_ops.reduce_prod(x_shape / m_shape),
          x.dtype,
          name="count")
    if shift is not None:
      shift = ops.convert_to_tensor(shift, name="shift")
      m_ss = math_ops.sub(x, shift)
      v_ss = math_ops.squared_difference(x, shift)
    else:  # no shift.
      m_ss = x
      v_ss = math_ops.square(x)
    m_ss = math_ops.reduce_sum(m_ss, axes, keep_dims=keep_dims, name="mean_ss")
    v_ss = math_ops.reduce_sum(v_ss, axes, keep_dims=keep_dims, name="var_ss")
  return counts, m_ss, v_ss, shift
Ejemplo n.º 40
0
def sufficient_statistics(x, axes, shift=None, keep_dims=False, name=None):
  """Calculate the sufficient statistics for the mean and variance of `x`.

  These sufficient statistics are computed using the one pass algorithm on
  an input that's optionally shifted. See:
  https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Computing_shifted_data

  Args:
    x: A `Tensor`.
    axes: Array of ints. Axes along which to compute mean and variance.
    shift: A `Tensor` containing the value by which to shift the data for
      numerical stability, or `None` if no shift is to be performed. A shift
      close to the true mean provides the most numerically stable results.
    keep_dims: produce statistics with the same dimensionality as the input.
    name: Name used to scope the operations that compute the sufficient stats.

  Returns:
    Four `Tensor` objects of the same type as `x`:
    * the count (number of elements to average over).
    * the (possibly shifted) sum of the elements in the array.
    * the (possibly shifted) sum of squares of the elements in the array.
    * the shift by which the mean must be corrected or None if `shift` is None.
  """
  with ops.op_scope([x, axes, shift], name, "sufficient_statistics"):
    x = ops.convert_to_tensor(x, name="x")
    x_shape = x.get_shape()
    if x_shape.is_fully_defined():
      counts = 1
      m_shape = []
      for d in xrange(x_shape.ndims):
        dim = x_shape[d].value
        if d in set(axes):
          counts *= dim
          dim = 1
        m_shape.append(dim)
      counts = constant_op.constant(counts, dtype=x.dtype)
    else:  # shape needs to be inferred at runtime.
      x_shape = array_ops.shape(x)
      select_axes = sparse_ops.sparse_to_dense(axes, array_ops.shape(x_shape),
                                               True, False)
      m_shape = math_ops.select(select_axes, array_ops.ones_like(x_shape),
                                x_shape)
      counts = math_ops.cast(
          math_ops.reduce_prod(x_shape / m_shape),
          x.dtype,
          name="count")
    if shift is not None:
      shift = ops.convert_to_tensor(shift, name="shift")
      m_ss = math_ops.sub(x, shift)
      v_ss = math_ops.squared_difference(x, shift)
    else:  # no shift.
      m_ss = x
      v_ss = math_ops.square(x)
    m_ss = math_ops.reduce_sum(m_ss, axes, keep_dims=keep_dims, name="mean_ss")
    v_ss = math_ops.reduce_sum(v_ss, axes, keep_dims=keep_dims, name="var_ss")
  return counts, m_ss, v_ss, shift
Ejemplo n.º 41
0
def _AsinGrad(op, grad):
  """Returns grad * 1/sqrt(1-x^2)."""
  x = op.inputs[0]
  with ops.control_dependencies([grad.op]):
    x = math_ops.conj(x)
    x2 = math_ops.square(x)
    one = constant_op.constant(1, dtype=grad.dtype)
    den = math_ops.sqrt(math_ops.sub(one, x2))
    inv = math_ops.reciprocal(den)
    return grad * inv
Ejemplo n.º 42
0
def _AsinGrad(op, grad):
    """Returns grad * 1/sqrt(1-x^2)."""
    x = op.inputs[0]
    with ops.control_dependencies([grad.op]):
        x = math_ops.conj(x)
        x2 = math_ops.square(x)
        one = constant_op.constant(1, dtype=grad.dtype)
        den = math_ops.sqrt(math_ops.sub(one, x2))
        inv = math_ops.reciprocal(den)
        return grad * inv
def _ragged_substr(text_input, begin, end):
  text_input_flat = None
  if ragged_tensor.is_ragged(text_input):
    text_input_flat = text_input.flat_values
  else:
    text_input_flat = text_input
  broadcasted_text = array_ops.gather_v2(text_input_flat,
                                         begin.nested_value_rowids()[-1])
  size = math_ops.sub(end.flat_values, begin.flat_values)
  new_tokens = string_ops.substr_v2(broadcasted_text, begin.flat_values, size)
  return begin.with_flat_values(new_tokens)
Ejemplo n.º 44
0
def moments(x, axes, name=None):
  """Calculate the mean and variance of `x`.

  The mean and variance are calculated by aggregating the contents of `x`
  across `axes`.  If `x` is 1-D and `axes = [0]` this is just the mean
  and variance of a vector.

  For so-called "global normalization" needed for convolutional filters pass
  `axes=[0, 1, 2]` (batch, height, width).  For batch normalization pass
  `axes=[0]` (batch).

  Args:
    x: A `Tensor`.
    axes: array of ints.  Axes along which to compute mean and
      variance.
    name: Name used to scope the operations that compute the moments.

  Returns:
    Two `Tensor` objects: `mean` and `variance`.
  """
  with ops.op_scope([x, axes], name, "moments"):
    x = ops.convert_to_tensor(x, name="x")
    x_shape = x.get_shape()
    if all(x_shape[d].value is not None for d in axes):
      # The shape is known in the relevant axes, so we can statically
      # compute the divisor.
      divisor = 1.0
      for d in set(axes):
        divisor *= x.get_shape()[d].value
      divisor = constant_op.constant(1.0 / divisor, x.dtype, name="divisor")
    else:
      divisor = constant_op.constant(1.0, dtype=x.dtype)
      x_dynamic_shape = array_ops.shape(x)
      for d in set(axes):
        divisor *= math_ops.cast(x_dynamic_shape[d], x.dtype)
      divisor = math_ops.inv(divisor, name="divisor")
    axes = constant_op.constant(axes, name="axes")
    # Note: We do not use Mean here because it is very slow on GPU.
    # Note 2: The expression below is potentially more stable.
    # It is however a bit slower and stability doesn't appear to be an issue.
    # mean = math_ops.reduce_sum(math_ops.mul(x, divisor), axes, name="mean")
    # var = math_ops.reduce_sum(math_ops.mul(math_ops.square(x - mean),
    #                                        divisor), axes,
    #                    name="variance")
    mean = math_ops.mul(math_ops.reduce_sum(x, axes), divisor, name="mean")
    # Give x-mean a specific name, so the caller might take advantage of it.
    # The caller should have a fallback plan, however: this tensor may not be
    # available if this function implementation changes.
    x_centered = math_ops.sub(x, mean, name="x_centered")
    var = math_ops.mul(math_ops.reduce_sum(math_ops.square(x_centered), axes),
                       divisor, name="variance")
    return mean, var
Ejemplo n.º 45
0
 def _prepare(self):
   self._lr_t = ops.convert_to_tensor(self._lr, name="learning_rate")
   self._mu_t = ops.convert_to_tensor(self._mu, name="mu")
   if isinstance(self._mu, ops.Tensor):
     mu_max = self._mu.op.inputs[0]
     effective_mu = self._mu.op.inputs[1]
     mu_one = effective_mu.op.inputs[0]
     minus_mu = effective_mu.op.inputs[1]
     mu_rate = minus_mu.op.inputs[0]
     mu_decay = minus_mu.op.inputs[1]
     mu_decay_rate = mu_decay.op.inputs[0]
     mu_decay_power = mu_decay.op.inputs[1]
     if mu_decay_power.op.name.endswith('Floor'):
       global_step = mu_decay_power.op.inputs[0].op.inputs[0]
       mu_decay_steps = mu_decay_power.op.inputs[0].op.inputs[1]
       self._mu2_t = math_ops.mul(mu_max, math_ops.sub(mu_one, math_ops.mul(mu_rate, math_ops.pow(mu_decay_rate, math_ops.floor(math_ops.div(global_step+1, mu_decay_steps))))))
     else:
       global_step = mu_decay_power.op.inputs[0]
       mu_decay_steps = mu_decay_power.op.inputs[1]
       self._mu2_t = math_ops.mul(mu_max, math_ops.sub(mu_one, math_ops.mul(mu_rate, math_ops.pow(mu_decay_rate, math_ops.div(global_step+1, mu_decay_steps)))))
   else:
     self._mu2_t = self._mu_t
Ejemplo n.º 46
0
def _RaggedSubstr(text_input, begin, end):
  text_input_flat = None
  if ragged_tensor.is_ragged(text_input):
    text_input_flat = text_input.flat_values
  else:
    text_input_flat = ops.convert_to_tensor(text_input)

  if ragged_tensor.is_ragged(begin):
    broadcasted_text = array_ops.gather_v2(text_input_flat,
                                           begin.nested_value_rowids()[-1])

    # convert boardcasted_text into a 1D tensor.
    broadcasted_text = array_ops.reshape(broadcasted_text, [-1])
    size = math_ops.sub(end.flat_values, begin.flat_values)
    new_tokens = string_ops.substr_v2(broadcasted_text, begin.flat_values, size)
    return begin.with_flat_values(new_tokens)
  else:
    assert begin.shape.ndims == 1
    assert text_input_flat.shape.ndims == 0
    size = math_ops.sub(end, begin)
    new_tokens = string_ops.substr_v2(text_input_flat, begin, size)
    return new_tokens
Ejemplo n.º 47
0
    def testTupleOutfeedGetLast(self):
        feed_name = next_feed_id()
        shape_1 = [10, 10]
        shape_2 = [4, 4]

        with ops.device("/device:IPU:0"):
            a = array_ops.placeholder(np.float32, shape_1)
            b = array_ops.placeholder(np.float32, shape_1)
            c = array_ops.placeholder(np.float32, shape_2)
            d = array_ops.placeholder(np.float32, shape_2)
            add = math_ops.add(a, b)
            sub = math_ops.sub(c, d)
            outfeed_op = gen_pop_datastream_ops.pop_datastream_outfeed_enqueue(
                [add, sub],
                feed_id=feed_name,
                replication_factor=1,
                outfeed_mode='get_last')

        with ops.device('cpu'):
            outfeed = gen_pop_datastream_ops.pop_datastream_outfeed_dequeue(
                feed_id=feed_name,
                replication_factor=1,
                output_types=[np.float32, np.float32],
                output_shapes=[shape_1, shape_2])

        def get_result(sess, result):
            result.append(sess.run(outfeed))

        with session_lib.Session() as sess:
            sess.run(variables.global_variables_initializer())
            result = []
            sess.run(outfeed_op,
                     feed_dict={
                         a: np.ones(shape_1, np.float32),
                         b: np.ones(shape_1, np.float32),
                         c: np.ones(shape_2, np.float32),
                         d: np.ones(shape_2, np.float32)
                     })
            sess.run(outfeed_op,
                     feed_dict={
                         a: 2 * np.ones(shape_1, np.float32),
                         b: np.ones(shape_1, np.float32),
                         c: 2 * np.ones(shape_2, np.float32),
                         d: np.ones(shape_2, np.float32)
                     })
            outfed = sess.run(outfeed)
            self.assertTrue(len(outfed) == 2)
            self.assertEqual(outfed[0].shape, (10, 10))
            self.assertEqual(outfed[1].shape, (4, 4))
            self.assertAllClose(outfed[0], np.broadcast_to(3, [10, 10]))
            self.assertAllClose(outfed[1], np.broadcast_to(1, [4, 4]))
Ejemplo n.º 48
0
    def _resource_apply_dense(self, grad, var):
        var_dtype = var.dtype.base_dtype
        lr_t = self._decayed_lr(var_dtype)

        # Learning rate multipliers
        if self.lr_multipliers is not None:
            lr_t = _apply_lr_multiplier(self, lr_t, var)

        if self._momentum:
            momentum = array_ops.identity(
                self._get_hyper('momentum', var_dtype))
            m = self.get_slot(var, 'momentum')
            v = momentum * m - self.eta_t * lr_t * grad  # velocity
            m = state_ops.assign(m, v, use_locking=self._use_locking)

            if self.nesterov:
                var_t = math_ops.sub(var,
                                     -momentum * v + self.eta_t * lr_t * grad)
            else:
                var_t = var + v
        else:
            v = -self.eta_t * lr_t * grad
            var_t = var + v

        # Weight decays
        if var.name in self.weight_decays.keys():
            var_t = _apply_weight_decays(self, var, var_t)

        var_update = state_ops.assign(var,
                                      var_t,
                                      use_locking=self._use_locking)

        # Cosine annealing
        (iteration_done, t_cur_update,
         eta_t_update) = _update_t_cur_eta_t_v2(self, lr_t, var)
        if iteration_done and not self._init_notified:
            self._init_notified = True

        updates = [var_update]
        if self._momentum:
            updates += [m]
        if iteration_done:
            updates += [t_cur_update]
        if self.use_cosine_annealing and iteration_done:
            updates += [eta_t_update]
        return control_flow_ops.group(*updates)
Ejemplo n.º 49
0
  def setUp(self):
    self.a = variables.Variable(10.0, name="a")
    self.b = variables.Variable(20.0, name="b")

    self.c = math_ops.add(self.a, self.b, name="c")  # Should be 30.0.
    self.d = math_ops.sub(self.a, self.c, name="d")  # Should be -20.0.
    self.e = math_ops.mul(self.c, self.d, name="e")  # Should be -600.0.

    self.ph = array_ops.placeholder(dtypes.float32, shape=(2, 2), name="ph")
    self.f = math_ops.mul(self.e, self.ph, name="f")

    self.opt = gradient_descent.GradientDescentOptimizer(0.1).minimize(
        self.e, name="opt")

    self.sess = session.Session()

    self.sess.run(self.a.initializer)
    self.sess.run(self.b.initializer)
Ejemplo n.º 50
0
def per_image_whitening(image):
  """Linearly scales `image` to have zero mean and unit norm.

  This op computes `(x - mean) / adjusted_stddev`, where `mean` is the average
  of all values in image, and
  `adjusted_stddev = max(stddev, 1.0/sqrt(image.NumElements()))`.

  `stddev` is the standard deviation of all values in `image`. It is capped
  away from zero to protect against division by 0 when handling uniform images.

  Note that this implementation is limited:
  *  It only whitens based on the statistics of an individual image.
  *  It does not take into account the covariance structure.

  Args:
    image: 3-D tensor of shape `[height, width, channels]`.

  Returns:
    The whitened image with same shape as `image`.

  Raises:
    ValueError: if the shape of 'image' is incompatible with this function.
  """
  image = ops.convert_to_tensor(image, name='image')
  _Check3DImage(image, require_static=False)
  num_pixels = math_ops.reduce_prod(array_ops.shape(image))

  image = math_ops.cast(image, dtype=dtypes.float32)
  image_mean = math_ops.reduce_mean(image)

  variance = (math_ops.reduce_mean(math_ops.square(image)) -
              math_ops.square(image_mean))
  variance = gen_nn_ops.relu(variance)
  stddev = math_ops.sqrt(variance)

  # Apply a minimum normalization that protects us against uniform images.
  min_stddev = math_ops.inv(
      math_ops.sqrt(math_ops.cast(num_pixels, dtypes.float32)))
  pixel_value_scale = math_ops.maximum(stddev, min_stddev)
  pixel_value_offset = image_mean

  image = math_ops.sub(image, pixel_value_offset)
  image = math_ops.div(image, pixel_value_scale)
  return image
Ejemplo n.º 51
0
def absolute_difference(
    labels, predictions, weights=1.0, scope=None,
    loss_collection=ops.GraphKeys.LOSSES):
  """Adds an Absolute Difference loss to the training procedure.

  `weights` acts as a coefficient for the loss. If a scalar is provided, then
  the loss is simply scaled by the given value. If `weights` is a `Tensor` of
  shape `[batch_size]`, then the total loss for each sample of the batch is
  rescaled by the corresponding element in the `weights` vector. If the shape of
  `weights` matches the shape of `predictions`, then the loss of each
  measurable element of `predictions` is scaled by the corresponding value of
  `weights`.

  WARNING: `weights` also supports dimensions of 1, but the broadcasting does
  not work as advertised, you'll wind up with weighted sum instead of weighted
  mean for any but the last dimension. This will be cleaned up soon, so please
  do not rely on the current behavior for anything but the shapes documented for
  `weights` below.

  Args:
    labels: The ground truth output tensor, same dimensions as 'predictions'.
    predictions: The predicted outputs.
    weights: Coefficients for the loss a scalar, a tensor of shape
      `[batch_size]` or a tensor whose shape matches `predictions`.
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which this loss will be added.

  Returns:
    A scalar `Tensor` representing the loss value.

  Raises:
    ValueError: If the shape of `predictions` doesn't match that of `labels` or
      if the shape of `weights` is invalid.
  """
  with ops.name_scope(scope, "absolute_difference",
                      [predictions, labels, weights]) as scope:
    predictions.get_shape().assert_is_compatible_with(labels.get_shape())
    predictions = math_ops.to_float(predictions)
    labels = math_ops.to_float(labels)
    losses = math_ops.abs(math_ops.sub(predictions, labels))
    return compute_weighted_loss(losses, weights, scope, loss_collection)
Ejemplo n.º 52
0
  def AddBackPropCounterLoop(self, count):
    """Add the backprop loop that controls the iterations.

    This is added to the backprop loop. It is used to control the loop
    termination and the slice index.

    The pseudocode is:
      `n = count; while (n >= 1) { n--; }`

    Args:
      count: The number of iterations for backprop.

    Returns:
      always 0.
    """
    one = constant_op.constant(1, name="b_count")
    self.Enter()
    self.AddName(count.name)
    enter_count = _Enter(count, self._name, is_constant=False,
                         parallel_iterations=self._parallel_iterations,
                         name="b_count")
    merge_count = merge([enter_count, enter_count])[0]
    self._pivot_for_pred = merge_count

    cond = math_ops.greater_equal(merge_count, one)
    self._pivot = loop_cond(cond, name="b_count")
    switch_count = switch(merge_count, self._pivot)

    # Add next_iteration right after Switch to match the gradient function.
    next_count = next_iteration(switch_count[1])
    self._pivot_for_body = next_count
    self._index = math_ops.sub(next_count, one)
    merge_count.op._update_input(1, self._index)

    exit_count = exit(switch_count[0], name="b_count")
    self.Exit()
    return exit_count
Ejemplo n.º 53
0
Archivo: nn.py Proyecto: 01-/tensorflow
def sufficient_statistics(x, axes, shift=False, keep_dims=False, name=None):
  """Calculate the sufficient statistics for the mean and variance of `x`.

  These sufficient statistics are computed using the one pass algorithm on
  an input that's optionally shifted using the value of the 1st element in `x`.
  See:
  https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Computing_shifted_data
  Unfortunately, in some cases using a random individual sample as the shift
  value leads experimentally to very poor numerical stability, so it is disabled
  by default. The one-pass approach might have to be revised accordingly.

  Args:
    x: A `Tensor`.
    axes: Array of ints. Axes along which to compute mean and variance.
    shift: If true, shift the data to provide more numerically stable results.
    keep_dims: produce statistics with the same dimensionality as the input.
    name: Name used to scope the operations that compute the sufficient stats.

  Returns:
    Four `Tensor` objects of the same type as `x`:
    * the count (number of elements to average over).
    * the (possibly shifted) sum of the elements in the array.
    * the (possibly shifted) sum of squares of the elements in the array.
    * the shift by which the mean must be corrected or None if `shift` is False.
  """
  with ops.op_scope([x, axes], name, "sufficient_statistics"):
    x = ops.convert_to_tensor(x, name="x")
    x_shape = x.get_shape()
    if x_shape.is_fully_defined():
      counts = 1
      m_shape = []
      for d in xrange(x_shape.ndims):
        dim = x_shape[d].value
        if d in set(axes):
          counts *= dim
          dim = 1
        m_shape.append(dim)
      counts = constant_op.constant(counts, dtype=x.dtype)
    else:  # shape needs to be inferred at runtime.
      x_shape = array_ops.shape(x)
      select_axes = sparse_ops.sparse_to_dense(axes, array_ops.shape(x_shape),
                                               True, False)
      m_shape = math_ops.select(select_axes, array_ops.ones_like(x_shape),
                                x_shape)
      counts = math_ops.cast(
          math_ops.reduce_prod(x_shape / m_shape),
          x.dtype,
          name="count")
    if shift:
      shift_value = array_ops.slice(x, array_ops.zeros_like(m_shape), m_shape)
      m_ss = math_ops.sub(x, shift_value)
      v_ss = math_ops.squared_difference(x, shift_value)
      if keep_dims:
        shift_value = array_ops.identity(shift_value, name="shift")
      else:
        shift_value = array_ops.squeeze(shift_value,
                                        squeeze_dims=axes,
                                        name="shift")
    else:  # not shift.
      m_ss = x
      v_ss = math_ops.square(x)
      shift_value = None
    m_ss = math_ops.reduce_sum(m_ss, axes, keep_dims=keep_dims, name="mean_ss")
    v_ss = math_ops.reduce_sum(v_ss, axes, keep_dims=keep_dims, name="var_ss")
  return counts, m_ss, v_ss, shift_value
Ejemplo n.º 54
0
 def b1(x, y):
     nx = math_ops.sub(x, 1)
     ny = y + gen_data_flow_ops._stack_pop(h, dtypes.float32)
     return [nx, ny]
Ejemplo n.º 55
0
def sum_of_pairwise_squares(predictions, targets, weight=1.0, scope=None):
  """Adds a pairwise-errors-squared loss to the training procedure.

  Unlike the sum_of_squares loss, which is a measure of the differences between
  corresponding elements of `predictions` and `targets`, sum_of_pairwise_squares
  is a measure of the differences between pairs of corresponding elements of
  `predictions` and `targets`.

  For example, if `targets`=[a, b, c] and `predictions`=[x, y, z], there are
  three pairs of differences are summed to compute the loss:
    loss = [ ((a-b) - (x-y)).^2 + ((a-c) - (x-z)).^2 + ((b-c) - (y-z)).^2 ] / 3

  Note that since the inputs are of size [batch_size, d0, ... dN], the
  corresponding pairs are computed within each batch sample but not across
  samples within a batch. For example, if `predictions` represents a batch of
  16 grayscale images of dimenion [batch_size, 100, 200], then the set of pairs
  is drawn from each image, but not across images.

  `weight` acts as a coefficient for the loss. If a scalar is provided, then the
  loss is simply scaled by the given value. If `weight` is a tensor of size
  [batch_size], then the total loss for each sample of the batch is rescaled
  by the corresponding element in the `weight` vector.

  Args:
    predictions: The predicted outputs, a tensor of size [batch_size, d0, .. dN]
      where N+1 is the total number of dimensions in `predictions`.
    targets: The ground truth output tensor, whose shape must match the shape of
      the `predictions` tensor.
    weight: Coefficients for the loss a scalar, a tensor of shape [batch_size]
      or a tensor whose shape matches `predictions`.
    scope: The scope for the operations performed in computing the loss.

  Returns:
    A scalar `Tensor` representing the loss value.

  Raises:
    ValueError: If the shape of `predictions` doesn't match that of `targets` or
      if the shape of `weight` is invalid.
  """
  with ops.name_scope(scope, "sum_of_pairwise_squares_loss",
                      [predictions, targets]) as scope:
    predictions.get_shape().assert_is_compatible_with(targets.get_shape())
    if weight is None:
      raise ValueError("`weight` cannot be None")
    predictions = math_ops.to_float(predictions)
    targets = math_ops.to_float(targets)
    weight = math_ops.to_float(ops.convert_to_tensor(weight))

    diffs = math_ops.sub(predictions, targets)

    # Need to verify here since the function doesn't use compute_weighted_loss
    if diffs.get_shape().ndims is None:
      raise ValueError("diffs.get_shape().ndims cannot be None")
    if weight.get_shape().ndims is None:
      raise ValueError("weight.get_shape().ndims cannot be None")

    reduction_indices = list(range(1, diffs.get_shape().ndims))

    sum_squares_diff_per_batch = math_ops.reduce_sum(
        math_ops.square(diffs),
        reduction_indices=reduction_indices)
    num_present_per_batch = _num_present(diffs, weight, per_batch=True)

    term1 = 2.0 * _safe_div(sum_squares_diff_per_batch,
                            num_present_per_batch)

    sum_diff = math_ops.reduce_sum(diffs, reduction_indices=reduction_indices)
    term2 = 2.0 * _safe_div(math_ops.square(sum_diff),
                            math_ops.square(num_present_per_batch))

    loss = _scale_losses(term1 - term2, weight)

    mean_loss = math_ops.select(math_ops.reduce_sum(num_present_per_batch) > 0,
                                loss,
                                array_ops.zeros_like(loss),
                                name="value")
    add_loss(mean_loss)
    return mean_loss