def testSqueezeMatrix(self):
    matrix = [[1, 2, 3]]
    matrix_squeezed = array_ops.squeeze(matrix, [0])
    self.assertEqual(matrix_squeezed.get_shape(), (3))

    with self.assertRaises(ValueError):
      matrix_squeezed = array_ops.squeeze(matrix, [1])
Beispiel #2
0
 def GetParams(self):
   """Create a graph containing multiple segment."""
   input_name = "input"
   input_dims = [2, 32, 32, 3]
   g = ops.Graph()
   with g.as_default():
     inp = array_ops.placeholder(
         dtype=dtypes.float32, shape=input_dims, name=input_name)
     with g.device("/GPU:0"):
       n = inp
       c = constant_op.constant(1.0, name="c")
       n = math_ops.add(n, c, name="add")
       n = math_ops.mul(n, n, name="mul")
       n = math_ops.add(n, n, name="add1")
       n = self.trt_incompatible_op(n, name="incompatible1")
       n = math_ops.add(n, c, name="add2")
       n = math_ops.mul(n, n, name="mul1")
       n = math_ops.add(n, n, name="add3")
     array_ops.squeeze(n, name=self.output_name)
   return trt_test.TfTrtIntegrationTestParams(
       gdef=g.as_graph_def(),
       input_names=[input_name],
       input_dims=[input_dims],
       expected_engines={
           "my_trt_op_0": ["add2", "add3", "mul1"],
           # Why segment ["add", "add1", "mul"] was assigned segment id 1
           # instead of 0: the parent node of this segment is actually const
           # node 'c', but it's removed later since it's const output of the
           # segment which is not allowed.
           "my_trt_op_1": ["add", "add1", "mul"]
       },
       expected_output_dims=tuple(input_dims),
       allclose_atol=1.e-06,
       allclose_rtol=1.e-06)
def _statistics(x, axes):
  """Calculate the mean and mean square of `x`.

  Modified from the implementation of `tf.nn.moments`.

  Args:
    x: A `Tensor`.
    axes: Array of ints.  Axes along which to compute mean and
      variance.

  Returns:
    Two `Tensor` objects: `mean` and `square mean`.
  """
  # The dynamic range of fp16 is too limited to support the collection of
  # sufficient statistics. As a workaround we simply perform the operations
  # on 32-bit floats before converting the mean and variance back to fp16
  y = math_ops.cast(x, dtypes.float32) if x.dtype == dtypes.float16 else x

  # Compute true mean while keeping the dims for proper broadcasting.
  shift = array_ops.stop_gradient(math_ops.reduce_mean(y, axes, keepdims=True))

  shifted_mean = math_ops.reduce_mean(y - shift, axes, keepdims=True)
  mean = shifted_mean + shift
  mean_squared = math_ops.reduce_mean(math_ops.square(y), axes, keepdims=True)

  mean = array_ops.squeeze(mean, axes)
  mean_squared = array_ops.squeeze(mean_squared, axes)
  if x.dtype == dtypes.float16:
    return (math_ops.cast(mean, dtypes.float16),
            math_ops.cast(mean_squared, dtypes.float16))
  else:
    return (mean, mean_squared)
Beispiel #4
0
def _test_squeeze(data, squeeze_dims=None):
    """ One iteration of squeeze """

    if squeeze_dims is None:
        squeeze_dims = []

    # see relay/frontend/tflite.py convert_squeeze more detail of channel first rule
    if len(data.shape) == 1 or len(data.shape) == 2:
        tvm_data = data
    elif len(data.shape) == 3:
        tvm_data = np.transpose(data, axes=(0, 2, 1))
    elif len(data.shape) == 4:
        tvm_data = np.transpose(data, axes=(0, 3, 1, 2))
    else:
        raise NotImplementedError("Not support input shape {} of reshape : ".
                                  format(str(len(data.shape))))

    tvm_data = np.transpose(data, axes=(0, 3, 1, 2))

    with tf.Graph().as_default():
        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)

        if squeeze_dims:
            out = array_ops.squeeze(in_data, squeeze_dims)
        else:
            out = array_ops.squeeze(in_data)

        compare_tflite_with_tvm(data, tvm_data, 'Placeholder:0', [in_data], [out])
Beispiel #5
0
  def call(self, inputs):
    # There is no TF op for 1D pooling, hence we make the inputs 4D.
    if self.data_format == 'channels_last':
      # input is NWC, make it NHWC
      inputs = array_ops.expand_dims(inputs, 1)
      # pool on the W dim
      pool_shape = (1, 1) + self.pool_size + (1,)
      strides = (1, 1) + self.strides + (1,)
      data_format = 'NHWC'
    else:
      # input is NCW, make it NCHW
      inputs = array_ops.expand_dims(inputs, 2)
      # pool on the W dim
      pool_shape = (1, 1, 1) + self.pool_size
      strides = (1, 1, 1) + self.strides
      data_format = 'NCHW'

    outputs = self.pool_function(
        inputs,
        ksize=pool_shape,
        strides=strides,
        padding=self.padding.upper(),
        data_format=data_format)

    if self.data_format == 'channels_last':
      return array_ops.squeeze(outputs, 1)
    else:
      return array_ops.squeeze(outputs, 2)
Beispiel #6
0
 def GetParams(self):
   """Create a graph containing two segment."""
   input_name = "input"
   input_dims = [2, 32, 32, 3]
   output_name = "output"
   g = ops.Graph()
   with g.as_default():
     inp = array_ops.placeholder(
         dtype=dtypes.float32, shape=input_dims, name=input_name)
     with g.device("/GPU:0"):
       n = inp
       for i in range(2):
         c = constant_op.constant(1.0, name="c%d" % i)
         n = math_ops.add(n, c, name="add%d" % i)
         n = math_ops.mul(n, n, name="mul%d" % i)
       edge = self.trt_incompatible_op(n, name="incompatible")
       with g.control_dependencies([edge]):
         c = constant_op.constant(1.0, name="c2")
         n = math_ops.add(n, c, name="add2")
       n = math_ops.mul(n, n, name="mul2")
       c = constant_op.constant(1.0, name="c3")
       n = math_ops.add(n, c, name="add3")
       n = math_ops.mul(n, n, name="mul3")
     array_ops.squeeze(n, name=output_name)
   return trt_test.TfTrtIntegrationTestParams(
       gdef=g.as_graph_def(),
       input_names=[input_name],
       input_dims=[input_dims],
       output_names=[output_name],
       expected_output_dims=[tuple(input_dims)])
  def test_virtual_statistics(self):
    """Check that `_virtual_statistics` gives same result as `nn.moments`."""
    random_seed.set_random_seed(1234)

    batch_axis = 0
    partial_batch = random_ops.random_normal([4, 5, 7, 3])
    single_example = random_ops.random_normal([1, 5, 7, 3])
    full_batch = array_ops.concat([partial_batch, single_example], axis=0)

    for reduction_axis in range(1, 4):
      # Get `nn.moments` on the full batch.
      reduction_axes = list(range(4))
      del reduction_axes[reduction_axis]
      mom_mean, mom_variance = nn.moments(full_batch, reduction_axes)

      # Get virtual batch statistics.
      vb_reduction_axes = list(range(4))
      del vb_reduction_axes[reduction_axis]
      del vb_reduction_axes[batch_axis]
      vbn = virtual_batchnorm.VBN(partial_batch, reduction_axis)
      vb_mean, mean_sq = vbn._virtual_statistics(
          single_example, vb_reduction_axes)
      vb_variance = mean_sq - math_ops.square(vb_mean)
      # Remove singleton batch dim for easy comparisons.
      vb_mean = array_ops.squeeze(vb_mean, batch_axis)
      vb_variance = array_ops.squeeze(vb_variance, batch_axis)

      with self.cached_session(use_gpu=True) as sess:
        vb_mean_np, vb_var_np, mom_mean_np, mom_var_np = sess.run([
            vb_mean, vb_variance, mom_mean, mom_variance])

      self.assertAllClose(mom_mean_np, vb_mean_np)
      self.assertAllClose(mom_var_np, vb_var_np)
Beispiel #8
0
 def GetParams(self):
   """Create a graph containing multiple segment."""
   input_name = "input"
   input_dims = [2, 32, 32, 3]
   output_name = "output"
   g = ops.Graph()
   with g.as_default():
     inp = array_ops.placeholder(
         dtype=dtypes.float32, shape=input_dims, name=input_name)
     with g.device("/GPU:0"):
       n = inp
       c = constant_op.constant(1.0, name="c")
       n = math_ops.add(n, c, name="add")
       n = math_ops.mul(n, n, name="mul")
       n = math_ops.add(n, n, name="add1")
       n = self.trt_incompatible_op(n, name="incompatible1")
       n = math_ops.add(n, c, name="add2")
       n = math_ops.mul(n, n, name="mul1")
       n = math_ops.add(n, n, name="add3")
     array_ops.squeeze(n, name=output_name)
   return trt_test.TfTrtIntegrationTestParams(
       gdef=g.as_graph_def(),
       input_names=[input_name],
       input_dims=[input_dims],
       output_names=[output_name],
       expected_output_dims=[tuple(input_dims)])
Beispiel #9
0
 def GetParams(self):
   """Create a graph containing multiple segment."""
   input_name = "input"
   input_dims = [2, 32, 32, 3]
   output_name = "output"
   g = ops.Graph()
   with g.as_default():
     inp = array_ops.placeholder(
         dtype=dtypes.float32, shape=input_dims, name=input_name)
     with g.device("/GPU:0"):
       n = inp
       c = constant_op.constant(1.0, name="c")
       # Adds control dependency from the constant op to a trt incompatible op,
       # and adds control dependency from the trt incompatible op to all other
       # ops, to make sure the constant op cannot be contracted with any trt
       # segment that depends on it.
       with g.control_dependencies([c]):
         d = self.trt_incompatible_op(n, name="incompatible")
       with g.control_dependencies([d]):
         n = math_ops.add(n, c, name="add")
         n = math_ops.mul(n, n, name="mul")
         n = math_ops.add(n, n, name="add1")
       n = self.trt_incompatible_op(n, name="incompatible1")
       with g.control_dependencies([d]):
         n = math_ops.add(n, c, name="add2")
         n = math_ops.mul(n, n, name="mul1")
         n = math_ops.add(n, n, name="add3")
     array_ops.squeeze(n, name=output_name)
   return trt_test.TfTrtIntegrationTestParams(
       gdef=g.as_graph_def(),
       input_names=[input_name],
       input_dims=[input_dims],
       output_names=[output_name],
       expected_output_dims=[tuple(input_dims)])
 def GetParams(self):
   """Neighboring node wiring tests in TF-TRT conversion."""
   dtype = dtypes.float32
   input_name = "input"
   input_dims = [2, 3, 7, 5]
   output_name = "output"
   g = ops.Graph()
   with g.as_default():
     x = array_ops.placeholder(dtype=dtype, shape=input_dims, name=input_name)
     e = constant_op.constant(
         np.random.normal(.3, 0.05, [3, 2, 3, 4]), name="weights", dtype=dtype)
     conv = nn.conv2d(
         input=x,
         filter=e,
         data_format="NCHW",
         strides=[1, 1, 1, 1],
         padding="VALID",
         name="conv")
     b = constant_op.constant(
         np.random.normal(1.0, 1.0, [1, 4, 1, 1]), name="bias", dtype=dtype)
     t = math_ops.mul(conv, b, name="mul")
     e = self.trt_incompatible_op(conv, name="incompatible")
     t = math_ops.sub(t, e, name="sub")
     array_ops.squeeze(t, name=output_name)
   return trt_test.TfTrtIntegrationTestParams(
       gdef=g.as_graph_def(),
       input_names=[input_name],
       input_dims=[input_dims],
       output_names=[output_name],
       expected_output_dims=[(2, 4, 5, 4)])
Beispiel #11
0
 def GetParams(self):
   """Create a graph containing multiple segment."""
   input_name = "input"
   input_dims = [2, 32, 32, 3]
   output_name = "output"
   g = ops.Graph()
   with g.as_default():
     inp = array_ops.placeholder(
         dtype=dtypes.float32, shape=input_dims, name=input_name)
     with g.device("/GPU:0"):
       c1 = constant_op.constant(1.0, name="c1")
       c2 = constant_op.constant(1.0, name="c2")
       d1 = constant_op.constant(1.0, name="d1")
       d2 = self.trt_incompatible_op(inp, name="d2")
       with g.control_dependencies([d1, d2]):
         add = math_ops.add(inp, c1, name="add")
       with g.control_dependencies([d1, d2]):
         mul = math_ops.mul(add, add, name="mul")
       with g.control_dependencies([d1, d2]):
         add1 = math_ops.add(mul, mul, name="add1")
       edge = self.trt_incompatible_op(add1, name="incompatible")
       with g.control_dependencies([d1, d2, add, mul]):
         add2 = math_ops.add(edge, c2, name="add2")
       with g.control_dependencies([d1, d2, add1, mul]):
         mul1 = math_ops.mul(add2, add2, name="mul1")
       with g.control_dependencies([d1, d2, add, add1]):
         add3 = math_ops.add(mul1, mul1, name="add3")
     array_ops.squeeze(add3, name=output_name)
   return trt_test.TfTrtIntegrationTestParams(
       gdef=g.as_graph_def(),
       input_names=[input_name],
       input_dims=[input_dims],
       output_names=[output_name],
       expected_output_dims=[tuple(input_dims)])
Beispiel #12
0
 def GetParams(self):
   """Create a graph containing two segment."""
   input_name = "input"
   input_dims = [2, 32, 32, 3]
   g = ops.Graph()
   with g.as_default():
     inp = array_ops.placeholder(
         dtype=dtypes.float32, shape=input_dims, name=input_name)
     with g.device("/GPU:0"):
       n = inp
       for i in range(2):
         c = constant_op.constant(1.0, name="c%d" % i)
         n = math_ops.add(n, c, name="add%d" % i)
         n = math_ops.mul(n, n, name="mul%d" % i)
       edge = self.trt_incompatible_op(n, name="incompatible")
       with g.control_dependencies([edge]):
         c = constant_op.constant(1.0, name="c2")
         n = math_ops.add(n, c, name="add2")
       n = math_ops.mul(n, n, name="mul2")
       c = constant_op.constant(1.0, name="c3")
       n = math_ops.add(n, c, name="add3")
       n = math_ops.mul(n, n, name="mul3")
     array_ops.squeeze(n, name=self.output_name)
   return trt_test.TfTrtIntegrationTestParams(
       gdef=g.as_graph_def(),
       input_names=[input_name],
       input_dims=[input_dims],
       expected_engines={
           # Only the first engine is built.
           "my_trt_op_0": ["c0", "c1", "add0", "add1", "mul0", "mul1"]
       },
       expected_output_dims=tuple(input_dims),
       allclose_atol=1.e-06,
       allclose_rtol=1.e-06)
def GetMultiEngineGraphDef(dtype=dtypes.float32):
  """Create a graph containing multiple segment."""
  g = ops.Graph()
  with g.as_default():
    inp = array_ops.placeholder(
        dtype=dtype, shape=[None] + INPUT_DIMS[1:], name=INPUT_NAME)
    with g.device("/GPU:0"):
      conv_filter = constant_op.constant(
          [[[[1., 0.5, 4., 6., 0.5, 1.], [1., 0.5, 1., 1., 0.5, 1.]]]],
          name="weights",
          dtype=dtype)
      conv = nn.conv2d(
          input=inp,
          filter=conv_filter,
          strides=[1, 2, 2, 1],
          padding="SAME",
          name="conv")
      c1 = constant_op.constant(
          np.random.randn(INPUT_DIMS[0], 12, 12, 6), dtype=dtype)
      p = conv * c1
      c2 = constant_op.constant(
          np.random.randn(INPUT_DIMS[0], 12, 12, 6), dtype=dtype)
      q = conv / c2

      edge = math_ops.sin(q)
      edge /= edge
      r = edge + edge

      p -= edge
      q *= edge
      s = p + q
      s -= r
    array_ops.squeeze(s, name=OUTPUT_NAME)
  return g.as_graph_def()
 def GetParams(self):
   """Testing conversion of BatchMatMul in TF-TRT conversion."""
   dtype = dtypes.float32
   input_name = "input"
   input_dims = [2, 15, 15, 3]
   g = ops.Graph()
   with g.as_default():
     inp = array_ops.placeholder(
         dtype=dtype, shape=[None] + input_dims[1:], name=input_name)
     with g.device("/GPU:0"):
       e1 = constant_op.constant(
           np.random.randn(1, 1, 3, 5), name="kernel_1", dtype=dtype)
       e2 = constant_op.constant(
           np.random.randn(1, 1, 5, 10), name="kernel_2", dtype=dtype)
       conv = nn.conv2d(
           input=inp,
           filter=e1,
           strides=[1, 1, 1, 1],
           padding="VALID",
           name="conv")
       out = nn.conv2d(
           input=conv,
           filter=e2,
           strides=[1, 1, 1, 1],
           padding="VALID",
           name="conv_2")
     array_ops.squeeze(out, name=self.output_name)
   return trt_test.TfTrtIntegrationTestParams(
       gdef=g.as_graph_def(),
       input_names=[input_name],
       input_dims=[input_dims],
       expected_engines=["my_trt_op_0"],
       expected_output_dims=(2, 15, 15, 10),
       allclose_atol=1.e-02,
       allclose_rtol=1.e-02)
Beispiel #15
0
 def GetParams(self):
   """Single vgg layer test in TF-TRT conversion."""
   dtype = dtypes.float32
   input_name = "input"
   input_dims = [5, 8, 8, 2]
   output_name = "output"
   g = ops.Graph()
   with g.as_default():
     x = array_ops.placeholder(dtype=dtype, shape=input_dims, name=input_name)
     x, _, _ = nn_impl.fused_batch_norm(
         x, [1.0, 1.0], [0.0, 0.0],
         mean=[0.5, 0.5],
         variance=[1.0, 1.0],
         is_training=False)
     e = constant_op.constant(
         np.random.randn(1, 1, 2, 6), name="weights", dtype=dtype)
     conv = nn.conv2d(
         input=x, filter=e, strides=[1, 2, 2, 1], padding="SAME", name="conv")
     b = constant_op.constant(np.random.randn(6), name="bias", dtype=dtype)
     t = nn.bias_add(conv, b, name="biasAdd")
     relu = nn.relu(t, "relu")
     idty = array_ops.identity(relu, "ID")
     v = nn_ops.max_pool(
         idty, [1, 2, 2, 1], [1, 2, 2, 1], "VALID", name="max_pool")
     array_ops.squeeze(v, name=output_name)
   return trt_test.TfTrtIntegrationTestParams(
       gdef=g.as_graph_def(),
       input_names=[input_name],
       input_dims=[input_dims],
       output_names=[output_name],
       expected_output_dims=[(5, 2, 2, 6)])
Beispiel #16
0
  def average_impurity(self):
    """Constructs a TF graph for evaluating the average leaf impurity of a tree.

    If in regression mode, this is the leaf variance. If in classification mode,
    this is the gini impurity.

    Returns:
      The last op in the graph.
    """
    children = array_ops.squeeze(array_ops.slice(
        self.variables.tree, [0, 0], [-1, 1]), squeeze_dims=[1])
    is_leaf = math_ops.equal(constants.LEAF_NODE, children)
    leaves = math_ops.to_int32(array_ops.squeeze(array_ops.where(is_leaf),
                                                 squeeze_dims=[1]))
    counts = array_ops.gather(self.variables.node_sums, leaves)
    gini = self._weighted_gini(counts)
    # Guard against step 1, when there often are no leaves yet.
    def impurity():
      return gini
    # Since average impurity can be used for loss, when there's no data just
    # return a big number so that loss always decreases.
    def big():
      return array_ops.ones_like(gini, dtype=dtypes.float32) * 10000000.
    return control_flow_ops.cond(math_ops.greater(
        array_ops.shape(leaves)[0], 0), impurity, big)
 def get_simple_graph_def(self):
   """Create a simple graph and return its graph_def."""
   g = ops.Graph()
   with g.as_default():
     a = aops.placeholder(
         dtype=dtypes.float32, shape=(None, 24, 24, 2), name="input")
     e = cop.constant(
         [[[[1., 0.5, 4., 6., 0.5, 1.], [1., 0.5, 1., 1., 0.5, 1.]]]],
         name="weights",
         dtype=dtypes.float32)
     conv = nn.conv2d(
         input=a,
         filter=e,
         strides=[1, 2, 2, 1],
         padding="SAME",
         name="conv")
     b = cop.constant(
         [4., 1.5, 2., 3., 5., 7.], name="bias", dtype=dtypes.float32)
     t = nn.bias_add(conv, b, name="biasAdd")
     relu = nn.relu(t, "relu")
     idty = aops.identity(relu, "ID")
     v = nn_ops.max_pool(
         idty, [1, 2, 2, 1], [1, 2, 2, 1], "VALID", name="max_pool")
     aops.squeeze(v, name="output")
   return g.as_graph_def()
 def GetParams(self):
   """Neighboring node wiring tests in TF-TRT conversion."""
   dtype = dtypes.float32
   input_name = "input"
   input_dims = [2, 3, 7, 5]
   g = ops.Graph()
   with g.as_default():
     x = array_ops.placeholder(dtype=dtype, shape=input_dims, name=input_name)
     e = constant_op.constant(
         np.random.normal(.3, 0.05, [3, 2, 3, 4]), name="weights", dtype=dtype)
     conv = nn.conv2d(
         input=x,
         filter=e,
         data_format="NCHW",
         strides=[1, 1, 1, 1],
         padding="VALID",
         name="conv")
     b = constant_op.constant(
         np.random.normal(1.0, 1.0, [1, 4, 1, 1]), name="bias", dtype=dtype)
     t = conv * b
     e = gen_math_ops.tan(conv)
     t = t - e
     array_ops.squeeze(t, name=self.output_name)
   return trt_test.TfTrtIntegrationTestParams(
       gdef=g.as_graph_def(),
       input_names=[input_name],
       input_dims=[input_dims],
       num_expected_engines=2,
       expected_output_dims=(2, 4, 5, 4),
       allclose_atol=1.e-03,
       allclose_rtol=1.e-03)
Beispiel #19
0
 def GetParams(self):
   """Test for rank 2 input in TF-TRT."""
   input_names = ["input", "input2"]
   # Two paths: first with rank 2 input, second with rank 4 input.
   input_dims = [[12, 5], [12, 5, 2, 2]]
   output_name = "output"
   g = ops.Graph()
   with g.as_default():
     outputs = []
     for i in range(2):
       x = array_ops.placeholder(
           dtype=dtypes.float32, shape=input_dims[i], name=input_names[i])
       c = constant_op.constant(1.0, name="c%d_1" % i)
       q = math_ops.add(x, c, name="add%d_1" % i)
       q = math_ops.abs(q, name="abs%d_1" % i)
       c = constant_op.constant(2.2, name="c%d_2" % i)
       q = math_ops.add(q, c, name="add%d_2" % i)
       q = math_ops.abs(q, name="abs%d_2" % i)
       c = constant_op.constant(3.0, name="c%d_3" % i)
       q = math_ops.add(q, c, name="add%d_3" % i)
       if i == 0:
         for j in range(2):
           q = array_ops.expand_dims(q, -1, name="expand%d_%d" % (i, j))
       q = gen_math_ops.reciprocal(q, name="reciprocal%d" % i)
       outputs.append(q)
     # Combine both paths
     q = math_ops.add(outputs[0], outputs[1], name="add")
     array_ops.squeeze(q, name=output_name)
   return trt_test.TfTrtIntegrationTestParams(
       gdef=g.as_graph_def(),
       input_names=input_names,
       input_dims=input_dims,
       output_names=[output_name],
       expected_output_dims=[tuple(input_dims[1])])
def GetSingleEngineGraphDef(dtype=dtypes.float32):
  """Create a graph containing single segment."""
  g = ops.Graph()
  with g.as_default():
    inp = array_ops.placeholder(
        dtype=dtype, shape=[None] + INPUT_DIMS[1:], name=INPUT_NAME)
    with g.device("/GPU:0"):
      conv_filter = constant_op.constant(
          [[[[1., 0.5, 4., 6., 0.5, 1.], [1., 0.5, 1., 1., 0.5, 1.]]]],
          name="weights",
          dtype=dtype)
      conv = nn.conv2d(
          input=inp,
          filter=conv_filter,
          strides=[1, 2, 2, 1],
          padding="SAME",
          name="conv")
      bias = constant_op.constant(
          [4., 1.5, 2., 3., 5., 7.], name="bias", dtype=dtype)
      added = nn.bias_add(conv, bias, name="bias_add")
      relu = nn.relu(added, "relu")
      identity = array_ops.identity(relu, "identity")
      pool = nn_ops.max_pool(
          identity, [1, 2, 2, 1], [1, 2, 2, 1], "VALID", name="max_pool")
    array_ops.squeeze(pool, name=OUTPUT_NAME)
  return g.as_graph_def()
Beispiel #21
0
def _recall_at_threshold(labels, predictions, weights, threshold, name=None):
  with ops.name_scope(
      name, 'recall_at_%s' % threshold,
      (predictions, labels, weights, threshold)) as scope:
    precision_tensor, update_op = metrics_lib.recall_at_thresholds(
        labels=labels, predictions=predictions, thresholds=(threshold,),
        weights=weights, name=scope)
    return array_ops.squeeze(precision_tensor), array_ops.squeeze(update_op)
  def testSqueezeMatrix(self):
    matrix = [[1, 2, 3]]
    matrix_squeezed = array_ops.squeeze(matrix, [0])
    self.assertEqual(matrix_squeezed.get_shape(), (3))

    with self.assertRaisesRegexp(
        Exception, "Can not squeeze dim.1., expected a dimension of 1, got 3"):
      matrix_squeezed = array_ops.squeeze(matrix, [1])
def remove_squeezable_dimensions(
    labels, predictions, expected_rank_diff=0, name=None):
  """Squeeze last dim if ranks differ from expected by exactly 1.

  In the common case where we expect shapes to match, `expected_rank_diff`
  defaults to 0, and we squeeze the last dimension of the larger rank if they
  differ by 1.

  But, for example, if `labels` contains class IDs and `predictions` contains 1
  probability per class, we expect `predictions` to have 1 more dimension than
  `labels`, so `expected_rank_diff` would be 1. In this case, we'd squeeze
  `labels` if `rank(predictions) - rank(labels) == 0`, and
  `predictions` if `rank(predictions) - rank(labels) == 2`.

  This will use static shape if available. Otherwise, it will add graph
  operations, which could result in a performance hit.

  Args:
    labels: Label values, a `Tensor` whose dimensions match `predictions`.
    predictions: Predicted values, a `Tensor` of arbitrary dimensions.
    expected_rank_diff: Expected result of `rank(predictions) - rank(labels)`.
    name: Name of the op.

  Returns:
    Tuple of `labels` and `predictions`, possibly with last dim squeezed.
  """
  with ops.name_scope(name, 'remove_squeezable_dimensions',
                      [labels, predictions]):
    predictions = ops.convert_to_tensor(predictions)
    labels = ops.convert_to_tensor(labels)
    predictions_shape = predictions.get_shape()
    predictions_rank = predictions_shape.ndims
    labels_shape = labels.get_shape()
    labels_rank = labels_shape.ndims
    if (labels_rank is not None) and (predictions_rank is not None):
      # Use static rank.
      rank_diff = predictions_rank - labels_rank
      if rank_diff == expected_rank_diff + 1:
        predictions = array_ops.squeeze(predictions, [-1])
      elif rank_diff == expected_rank_diff - 1:
        labels = array_ops.squeeze(labels, [-1])
      return labels, predictions

    # Use dynamic rank.
    rank_diff = array_ops.rank(predictions) - array_ops.rank(labels)
    if (predictions_rank is None) or (
        predictions_shape.dims[-1].is_compatible_with(1)):
      predictions = control_flow_ops.cond(
          math_ops.equal(expected_rank_diff + 1, rank_diff),
          lambda: array_ops.squeeze(predictions, [-1]),
          lambda: predictions)
    if (labels_rank is None) or (
        labels_shape.dims[-1].is_compatible_with(1)):
      labels = control_flow_ops.cond(
          math_ops.equal(expected_rank_diff - 1, rank_diff),
          lambda: array_ops.squeeze(labels, [-1]),
          lambda: labels)
    return labels, predictions
Beispiel #24
0
def crf_decode(potentials, transition_params, sequence_length):
  """Decode the highest scoring sequence of tags in TensorFlow.

  This is a function for tensor.

  Args:
    potentials: A [batch_size, max_seq_len, num_tags] tensor of
              unary potentials.
    transition_params: A [num_tags, num_tags] matrix of
              binary potentials.
    sequence_length: A [batch_size] vector of true sequence lengths.

  Returns:
    decode_tags: A [batch_size, max_seq_len] matrix, with dtype `tf.int32`.
                Contains the highest scoring tag indicies.
    best_score: A [batch_size] vector, containing the score of `decode_tags`.
  """
  # For simplicity, in shape comments, denote:
  # 'batch_size' by 'B', 'max_seq_len' by 'T' , 'num_tags' by 'O' (output).
  num_tags = potentials.get_shape()[2].value

  # Computes forward decoding. Get last score and backpointers.
  crf_fwd_cell = CrfDecodeForwardRnnCell(transition_params)
  initial_state = array_ops.slice(potentials, [0, 0, 0], [-1, 1, -1])
  initial_state = array_ops.squeeze(initial_state, axis=[1])      # [B, O]
  inputs = array_ops.slice(potentials, [0, 1, 0], [-1, -1, -1])   # [B, T-1, O]
  backpointers, last_score = rnn.dynamic_rnn(
      crf_fwd_cell,
      inputs=inputs,
      sequence_length=sequence_length - 1,
      initial_state=initial_state,
      time_major=False,
      dtype=dtypes.int32)             # [B, T - 1, O], [B, O]
  backpointers = gen_array_ops.reverse_sequence(
      backpointers, sequence_length - 1, seq_dim=1)               # [B, T-1, O]

  # Computes backward decoding. Extract tag indices from backpointers.
  crf_bwd_cell = CrfDecodeBackwardRnnCell(num_tags)
  initial_state = math_ops.cast(math_ops.argmax(last_score, axis=1),
                                dtype=dtypes.int32)               # [B]
  initial_state = array_ops.expand_dims(initial_state, axis=-1)   # [B, 1]
  decode_tags, _ = rnn.dynamic_rnn(
      crf_bwd_cell,
      inputs=backpointers,
      sequence_length=sequence_length - 1,
      initial_state=initial_state,
      time_major=False,
      dtype=dtypes.int32)           # [B, T - 1, 1]
  decode_tags = array_ops.squeeze(decode_tags, axis=[2])           # [B, T - 1]
  decode_tags = array_ops.concat([initial_state, decode_tags], axis=1)  # [B, T]
  decode_tags = gen_array_ops.reverse_sequence(
      decode_tags, sequence_length, seq_dim=1)                     # [B, T]

  best_score = math_ops.reduce_max(last_score, axis=1)             # [B]
  return decode_tags, best_score
Beispiel #25
0
 def lookup(self):
   """Returns cached_tree_ids, cached_node_ids, cached_logits."""
   cached_tree_ids, cached_node_ids, cached_logits = array_ops.split(
       lookup_ops.lookup_table_find_v2(
           self._table_ref, self._example_ids, default_value=[0.0, 0.0, 0.0]),
       [1, 1, self._logits_dimension],
       axis=1)
   cached_tree_ids = array_ops.squeeze(
       array_ops.bitcast(cached_tree_ids, dtypes.int32))
   cached_node_ids = array_ops.squeeze(
       array_ops.bitcast(cached_node_ids, dtypes.int32))
   return (cached_tree_ids, cached_node_ids, cached_logits)
Beispiel #26
0
 def _compareSqueeze(self, x, squeeze_dims, use_gpu):
   with self.test_session(use_gpu=use_gpu):
     if squeeze_dims:
       np_ans = np.squeeze(x, axis=tuple(squeeze_dims))
       tensor = array_ops.squeeze(x, squeeze_dims)
       tf_ans = tensor.eval()
     else:
       np_ans = np.squeeze(x)
       tensor = array_ops.squeeze(x)
       tf_ans = tensor.eval()
   self.assertShapeEqual(np_ans, tensor)
   self.assertAllEqual(np_ans, tf_ans)
Beispiel #27
0
  def testSqueezeWithUnknownShape(self):
    with self.test_session():
      a = array_ops.placeholder(dtypes.float32, shape=[2, None])

      squeezed = array_ops.squeeze(a, [1])
      self.assertEqual([2], squeezed.get_shape().as_list())

      squeezed = array_ops.squeeze(a)
      self.assertEqual(None, squeezed.get_shape())

      self.assertRaises(ValueError, array_ops.squeeze, a, [0])
      self.assertRaises(ValueError, array_ops.squeeze, a, [100])
Beispiel #28
0
  def GetParams(self):
    """Create a graph containing multiple segment."""
    # TODO(aaroey): test graph with different dtypes.
    dtype = dtypes.float32
    input_name = "input"
    input_dims = [100, 24, 24, 2]
    g = ops.Graph()
    with g.as_default():
      inp = array_ops.placeholder(
          dtype=dtype, shape=[None] + input_dims[1:], name=input_name)
      with g.device("/GPU:0"):
        conv_filter = constant_op.constant(
            [[[[1., 0.5, 4., 6., 0.5, 1.], [1., 0.5, 1., 1., 0.5, 1.]]]],
            name="weights",
            dtype=dtype)
        conv = nn.conv2d(
            input=inp,
            filter=conv_filter,
            strides=[1, 2, 2, 1],
            padding="SAME",
            name="conv")
        c1 = constant_op.constant(
            np.random.randn(input_dims[0], 12, 12, 6), dtype=dtype, name="c1")
        p = math_ops.mul(conv, c1, name="mul")
        c2 = constant_op.constant(
            np.random.randn(input_dims[0], 12, 12, 6), dtype=dtype, name="c2")
        q = math_ops.div(conv, c2, name="div")

        edge = self.trt_incompatible_op(q, name="incompatible")
        edge = math_ops.div(edge, edge, name="div1")
        r = math_ops.add(edge, edge, name="add")

        p = math_ops.sub(p, edge, name="sub")
        q = math_ops.mul(q, edge, name="mul1")
        s = math_ops.add(p, q, name="add1")
        s = math_ops.sub(s, r, name="sub1")
      array_ops.squeeze(s, name=self.output_name)
    return trt_test.TfTrtIntegrationTestParams(
        gdef=g.as_graph_def(),
        input_names=[input_name],
        input_dims=[input_dims],
        # TODO(aaroey): LayoutOptimizer adds additional nodes to the graph which
        # breaks the connection check, fix it.
        # - my_trt_op_0 should have ["mul", "sub", "div1", "mul1", "add1",
        #   "add", "sub1"];
        # - my_trt_op_1 should have ["weights","conv", "div"]
        expected_engines=["my_trt_op_0", "my_trt_op_1"],
        expected_output_dims=(100, 12, 12, 6),
        allclose_atol=1.e-03,
        allclose_rtol=1.e-03)
  def quantiles_ready():
    """The subgraph for when the quantiles are ready."""
    quantized_feature = quantile_ops.quantiles([], [sparse_column_values], [],
                                               [quantile_buckets],
                                               [sparse_column_indices])

    quantized_feature = math_ops.cast(quantized_feature[1], dtypes.int64)
    quantized_feature = array_ops.squeeze(quantized_feature, axis=0)

    example_indices, _ = array_ops.split(
        sparse_column_indices, num_or_size_splits=2, axis=1)
    example_indices = array_ops.squeeze(example_indices, [1])
    filtered_gradients = array_ops.gather(gradients, example_indices)
    filtered_hessians = array_ops.gather(hessians, example_indices)
    filtered_partition_ids = array_ops.gather(example_partition_ids,
                                              example_indices)
    unique_partitions, mapped_partitions = array_ops.unique(
        example_partition_ids)

    # Compute aggregate stats for each partition.
    # Since unsorted_segment_sum can be numerically unstable, use 64bit
    # operation.
    gradients64 = math_ops.cast(gradients, dtypes.float64)
    hessians64 = math_ops.cast(hessians, dtypes.float64)
    per_partition_gradients = math_ops.unsorted_segment_sum(
        gradients64, mapped_partitions, array_ops.size(unique_partitions))
    per_partition_hessians = math_ops.unsorted_segment_sum(
        hessians64, mapped_partitions, array_ops.size(unique_partitions))
    per_partition_gradients = math_ops.cast(per_partition_gradients,
                                            dtypes.float32)
    per_partition_hessians = math_ops.cast(per_partition_hessians,
                                           dtypes.float32)
    # Prepend a bias feature per partition that accumulates the stats for all
    # examples in that partition.
    bias_feature_ids = array_ops.fill(
        array_ops.shape(unique_partitions), _BIAS_FEATURE_ID)
    bias_feature_ids = math_ops.cast(bias_feature_ids, dtypes.int64)
    zeros = array_ops.zeros_like(bias_feature_ids)
    bias_feature_ids = array_ops.stack([bias_feature_ids, zeros], axis=1)

    partition_ids = array_ops.concat(
        [unique_partitions, filtered_partition_ids], 0)
    filtered_gradients = array_ops.concat(
        [per_partition_gradients, filtered_gradients], 0)
    filtered_hessians = array_ops.concat(
        [per_partition_hessians, filtered_hessians], 0)

    bucket_ids = array_ops.concat([bias_feature_ids, quantized_feature], 0)

    return partition_ids, bucket_ids, filtered_gradients, filtered_hessians
Beispiel #30
0
def _test_squeeze(data, squeeze_dims=None):
    """ One iteration of squeeze """

    if squeeze_dims is None:
        squeeze_dims = []

    with tf.Graph().as_default():
        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)

        if squeeze_dims:
            array_ops.squeeze(in_data, squeeze_dims)
        else:
            array_ops.squeeze(in_data)

        compare_tf_with_tvm(data, 'Placeholder:0', 'Squeeze:0')
Beispiel #31
0
def resize_images(images,
                  new_height,
                  new_width,
                  method=ResizeMethod.BILINEAR,
                  align_corners=False):
    """Resize `images` to `new_width`, `new_height` using the specified `method`.

  Resized images will be distorted if their original aspect ratio is not
  the same as `new_width`, `new_height`.  To avoid distortions see
  [`resize_image_with_crop_or_pad`](#resize_image_with_crop_or_pad).

  `method` can be one of:

  *   <b>`ResizeMethod.BILINEAR`</b>: [Bilinear interpolation.]
      (https://en.wikipedia.org/wiki/Bilinear_interpolation)
  *   <b>`ResizeMethod.NEAREST_NEIGHBOR`</b>: [Nearest neighbor interpolation.]
      (https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation)
  *   <b>`ResizeMethod.BICUBIC`</b>: [Bicubic interpolation.]
      (https://en.wikipedia.org/wiki/Bicubic_interpolation)
  *   <b>`ResizeMethod.AREA`</b>: Area interpolation.

  Args:
    images: 4-D Tensor of shape `[batch, height, width, channels]` or
            3-D Tensor of shape `[height, width, channels]`.
    new_height: integer.
    new_width: integer.
    method: ResizeMethod.  Defaults to `ResizeMethod.BILINEAR`.
    align_corners: bool. If true, exactly align all 4 corners of the input and
                   output. Defaults to `false`.

  Raises:
    ValueError: if the shape of `images` is incompatible with the
      shape arguments to this function
    ValueError: if an unsupported resize method is specified.

  Returns:
    If `images` was 4-D, a 4-D float Tensor of shape
    `[batch, new_height, new_width, channels]`.
    If `images` was 3-D, a 3-D float Tensor of shape
    `[new_height, new_width, channels]`.
  """
    images = ops.convert_to_tensor(images, name='images')
    if images.get_shape().ndims is None:
        raise ValueError('\'images\' contains no shape.')
    # TODO(shlens): Migrate this functionality to the underlying Op's.
    is_batch = True
    if len(images.get_shape()) == 3:
        is_batch = False
        images = array_ops.expand_dims(images, 0)

    _, height, width, depth = _ImageDimensions(images)

    # Handle tensor-valued sizes as well as Python integers.
    try:
        new_width = ops.convert_to_tensor(new_width,
                                          dtypes.int32,
                                          name='new_width')
        new_width.get_shape().assert_has_rank(0)
    except (TypeError, ValueError):
        raise ValueError('new_width must be a scalar integer')
    try:
        new_height = ops.convert_to_tensor(new_height,
                                           dtypes.int32,
                                           name='new_height')
        new_height.get_shape().assert_has_rank(0)
    except (TypeError, ValueError):
        raise ValueError('new_height must be a scalar integer')

    new_width_const = tensor_util.constant_value(new_width)
    new_height_const = tensor_util.constant_value(new_height)

    # If we can determine that the height and width will be unmodified by this
    # transformation, we avoid performing the resize.
    if all(x is not None
           for x in [new_width_const, width, new_height_const, height]) and (
               width == new_width_const and height == new_height_const):
        if not is_batch:
            images = array_ops.squeeze(images, squeeze_dims=[0])
        return images

    new_size = array_ops.pack([new_height, new_width])

    if method == ResizeMethod.BILINEAR:
        images = gen_image_ops.resize_bilinear(images,
                                               new_size,
                                               align_corners=align_corners)
    elif method == ResizeMethod.NEAREST_NEIGHBOR:
        images = gen_image_ops.resize_nearest_neighbor(
            images, new_size, align_corners=align_corners)
    elif method == ResizeMethod.BICUBIC:
        images = gen_image_ops.resize_bicubic(images,
                                              new_size,
                                              align_corners=align_corners)
    elif method == ResizeMethod.AREA:
        images = gen_image_ops.resize_area(images,
                                           new_size,
                                           align_corners=align_corners)
    else:
        raise ValueError('Resize method is not implemented.')

    # NOTE(mrry): The shape functions for the resize ops cannot unpack
    # the packed values in `new_size`, so set the shape here.
    images.set_shape([None, new_height_const, new_width_const, None])

    if not is_batch:
        images = array_ops.squeeze(images, squeeze_dims=[0])
    return images
Beispiel #32
0
 def _tile(feature):
     return array_ops.squeeze(array_ops.tile(
         array_ops.expand_dims(feature, 1), [1, num_unroll, 1]),
                              axis=2)
Beispiel #33
0
def resize_images(images,
                  size,
                  method=ResizeMethod.BILINEAR,
                  align_corners=False):
  """Resize `images` to `size` using the specified `method`.

  Resized images will be distorted if their original aspect ratio is not
  the same as `size`.  To avoid distortions see
  [`resize_image_with_crop_or_pad`](#resize_image_with_crop_or_pad).

  `method` can be one of:

  *   <b>`ResizeMethod.BILINEAR`</b>: [Bilinear interpolation.](https://en.wikipedia.org/wiki/Bilinear_interpolation)
  *   <b>`ResizeMethod.NEAREST_NEIGHBOR`</b>: [Nearest neighbor interpolation.](https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation)
  *   <b>`ResizeMethod.BICUBIC`</b>: [Bicubic interpolation.](https://en.wikipedia.org/wiki/Bicubic_interpolation)
  *   <b>`ResizeMethod.AREA`</b>: Area interpolation.

  Args:
    images: 4-D Tensor of shape `[batch, height, width, channels]` or
            3-D Tensor of shape `[height, width, channels]`.
    size: A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
          new size for the images.
    method: ResizeMethod.  Defaults to `ResizeMethod.BILINEAR`.
    align_corners: bool. If true, exactly align all 4 corners of the input and
                   output. Defaults to `false`.

  Raises:
    ValueError: if the shape of `images` is incompatible with the
      shape arguments to this function
    ValueError: if `size` has invalid shape or type.
    ValueError: if an unsupported resize method is specified.

  Returns:
    If `images` was 4-D, a 4-D float Tensor of shape
    `[batch, new_height, new_width, channels]`.
    If `images` was 3-D, a 3-D float Tensor of shape
    `[new_height, new_width, channels]`.
  """
  images = ops.convert_to_tensor(images, name='images')
  if images.get_shape().ndims is None:
    raise ValueError('\'images\' contains no shape.')
  # TODO(shlens): Migrate this functionality to the underlying Op's.
  is_batch = True
  if images.get_shape().ndims == 3:
    is_batch = False
    images = array_ops.expand_dims(images, 0)
  elif images.get_shape().ndims != 4:
    raise ValueError('\'images\' must have either 3 or 4 dimensions.')

  _, height, width, _ = images.get_shape().as_list()

  try:
    size = ops.convert_to_tensor(size, dtypes.int32, name='size')
  except (TypeError, ValueError):
    raise ValueError('\'size\' must be a 1-D int32 Tensor')
  if not size.get_shape().is_compatible_with([2]):
    raise ValueError('\'size\' must be a 1-D Tensor of 2 elements: '
                     'new_height, new_width')
  size_const_as_shape = tensor_util.constant_value_as_shape(size)
  new_height_const = size_const_as_shape[0].value
  new_width_const = size_const_as_shape[1].value

  # If we can determine that the height and width will be unmodified by this
  # transformation, we avoid performing the resize.
  if all(x is not None
         for x in [new_width_const, width, new_height_const, height]) and (
             width == new_width_const and height == new_height_const):
    if not is_batch:
      images = array_ops.squeeze(images, squeeze_dims=[0])
    return images

  if method == ResizeMethod.BILINEAR:
    images = gen_image_ops.resize_bilinear(images,
                                           size,
                                           align_corners=align_corners)
  elif method == ResizeMethod.NEAREST_NEIGHBOR:
    images = gen_image_ops.resize_nearest_neighbor(images,
                                                   size,
                                                   align_corners=align_corners)
  elif method == ResizeMethod.BICUBIC:
    images = gen_image_ops.resize_bicubic(images,
                                          size,
                                          align_corners=align_corners)
  elif method == ResizeMethod.AREA:
    images = gen_image_ops.resize_area(images,
                                       size,
                                       align_corners=align_corners)
  else:
    raise ValueError('Resize method is not implemented.')

  # NOTE(mrry): The shape functions for the resize ops cannot unpack
  # the packed values in `new_size`, so set the shape here.
  images.set_shape([None, new_height_const, new_width_const, None])

  if not is_batch:
    images = array_ops.squeeze(images, squeeze_dims=[0])
  return images
Beispiel #34
0
def squeeze_or_expand_dimensions(y_pred, y_true, sample_weight):
    """Squeeze or expand last dimension if needed.

  1. Squeezes last dim of `y_pred` or `y_true` if their rank differs by 1
  (using `confusion_matrix.remove_squeezable_dimensions`).
  2. Squeezes or expands last dim of `sample_weight` if its rank differs by 1
  from the new rank of `y_pred`.
  If `sample_weight` is scalar, it is kept scalar.

  This will use static shape if available. Otherwise, it will add graph
  operations, which could result in a performance hit.

  Args:
    y_pred: Predicted values, a `Tensor` of arbitrary dimensions.
    y_true: Optional label `Tensor` whose dimensions match `y_pred`.
    sample_weight: Optional weight scalar or `Tensor` whose dimensions match
      `y_pred`.

  Returns:
    Tuple of `y_pred`, `y_true` and `sample_weight`. Each of them possibly has
    the last dimension squeezed,
    `sample_weight` could be extended by one dimension.
  """
    y_pred_shape = y_pred.shape
    y_pred_rank = y_pred_shape.ndims
    if y_true is not None:

        # If sparse matrix is provided as `y_true`, the last dimension in `y_pred`
        # may be > 1. Eg: y_true = [0, 1, 2] (shape=(3,)),
        # y_pred = [[.9, .05, .05], [.5, .89, .6], [.05, .01, .94]] (shape=(3, 3))
        # In this case, we should not try to remove squeezable dimension.
        y_true_shape = y_true.shape
        y_true_rank = y_true_shape.ndims
        if (y_true_rank is not None) and (y_pred_rank is not None):
            # Use static rank for `y_true` and `y_pred`.
            if (y_pred_rank - y_true_rank != 1) or y_pred_shape[-1] == 1:
                y_true, y_pred = confusion_matrix.remove_squeezable_dimensions(
                    y_true, y_pred)
        else:
            # Use dynamic rank.
            rank_diff = array_ops.rank(y_pred) - array_ops.rank(y_true)
            squeeze_dims = lambda: confusion_matrix.remove_squeezable_dimensions(  # pylint: disable=g-long-lambda
                y_true, y_pred)
            is_last_dim_1 = math_ops.equal(1, array_ops.shape(y_pred)[-1])
            maybe_squeeze_dims = lambda: control_flow_ops.cond(  # pylint: disable=g-long-lambda
                is_last_dim_1, squeeze_dims, lambda: (y_true, y_pred))
            y_true, y_pred = control_flow_ops.cond(
                math_ops.equal(1, rank_diff), maybe_squeeze_dims, squeeze_dims)

    if sample_weight is None:
        return y_pred, y_true, None

    sample_weight = ops.convert_to_tensor(sample_weight)
    weights_shape = sample_weight.shape
    weights_rank = weights_shape.ndims
    if weights_rank == 0:  # If weights is scalar, do nothing.
        return y_pred, y_true, sample_weight

    if (y_pred_rank is not None) and (weights_rank is not None):
        # Use static rank.
        if weights_rank - y_pred_rank == 1:
            sample_weight = array_ops.squeeze(sample_weight, [-1])
        elif y_pred_rank - weights_rank == 1:
            sample_weight = array_ops.expand_dims(sample_weight, [-1])
        return y_pred, y_true, sample_weight

    # Use dynamic rank.
    weights_rank_tensor = array_ops.rank(sample_weight)
    rank_diff = weights_rank_tensor - array_ops.rank(y_pred)
    maybe_squeeze_weights = lambda: array_ops.squeeze(sample_weight, [-1])

    def _maybe_expand_weights():
        return control_flow_ops.cond(
            math_ops.equal(rank_diff, -1),
            lambda: array_ops.expand_dims(sample_weight, [-1]),
            lambda: sample_weight)

    def _maybe_adjust_weights():
        return control_flow_ops.cond(math_ops.equal(rank_diff,
                                                    1), maybe_squeeze_weights,
                                     _maybe_expand_weights)

    # squeeze or expand last dim of `sample_weight` if its rank differs by 1
    # from the new rank of `y_pred`.
    sample_weight = control_flow_ops.cond(
        math_ops.equal(weights_rank_tensor, 0), lambda: sample_weight,
        _maybe_adjust_weights)
    return y_pred, y_true, sample_weight
Beispiel #35
0
def model_fn(features, labels, mode, params):
  """
  Based on https://github.com/tensorflow/tpu/blob/master/models/experimental/inception/inception_v2_tpu_model.py
  :param features:
  :param labels:
  :param mode:
  :param params:
  :return:
  """
  tf.summary.image('0_input', features, max_outputs=4)

  # 224 x 224 x 3
  end_point = 'Conv2d_1a_7x7'
  net = layers.conv2d(features, 64, [7, 7], stride=2, weights_initializer=trunc_normal(1.0), scope=end_point)
  tf.summary.image('1_{}'.format(end_point), net[:, :, :, 0:3], max_outputs=4)

  # 112 x 112 x 64
  end_point = 'MaxPool_2a_3x3'
  net = layers_lib.max_pool2d(net, [3, 3], scope=end_point, stride=2, padding='SAME')
  tf.summary.image('2_{}'.format(end_point), net[:, :, :, 0:3], max_outputs=4)

  # 56 x 56 x 64
  end_point = 'Conv2d_2b_1x1'
  net = layers.conv2d(net, 64, [1, 1], scope=end_point, weights_initializer=trunc_normal(0.1))
  tf.summary.image('3_{}'.format(end_point), net[:, :, :, 0:3], max_outputs=4)

  # 56 x 56 x 64
  end_point = 'Conv2d_2c_3x3'
  net = layers.conv2d(net, 192, [3, 3], scope=end_point)
  tf.summary.image('4_{}'.format(end_point), net[:, :, :, 0:3], max_outputs=4)

  # 56 x 56 x 192
  end_point = 'MaxPool_3a_3x3'
  net = layers_lib.max_pool2d(net, [3, 3], scope=end_point, stride=2, padding='SAME')
  tf.summary.image('5_{}'.format(end_point), net[:, :, :, 0:3], max_outputs=4)

  # 28 x 28 x 192
  # Inception module.
  end_point = 'Mixed_3b'
  with variable_scope.variable_scope(end_point):
    with variable_scope.variable_scope('Branch_0'):
      branch_0 = layers.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1')
    with variable_scope.variable_scope('Branch_1'):
      branch_1 = layers.conv2d(net, 64, [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1')
      branch_1 = layers.conv2d(branch_1, 64, [3, 3], scope='Conv2d_0b_3x3')
    with variable_scope.variable_scope('Branch_2'):
      branch_2 = layers.conv2d(net, 64, [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1')
      branch_2 = layers.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0b_3x3')
      branch_2 = layers.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0c_3x3')
    with variable_scope.variable_scope('Branch_3'):
      branch_3 = layers_lib.avg_pool2d(net, [3, 3], padding='SAME', stride=1, scope='AvgPool_0a_3x3')
      branch_3 = layers.conv2d(branch_3, 32, [1, 1], weights_initializer=trunc_normal(0.1), scope='Conv2d_0b_1x1')
    net = array_ops.concat([branch_0, branch_1, branch_2, branch_3], 3)

    # 28 x 28 x 256
    end_point = 'Mixed_3c'
    with variable_scope.variable_scope(end_point):
      with variable_scope.variable_scope('Branch_0'):
        branch_0 = layers.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1')
      with variable_scope.variable_scope('Branch_1'):
        branch_1 = layers.conv2d(net, 64, [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1')
        branch_1 = layers.conv2d(branch_1, 96, [3, 3], scope='Conv2d_0b_3x3')
      with variable_scope.variable_scope('Branch_2'):
        branch_2 = layers.conv2d(net, 64, [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1')
        branch_2 = layers.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0b_3x3')
        branch_2 = layers.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0c_3x3')
      with variable_scope.variable_scope('Branch_3'):
        branch_3 = layers_lib.avg_pool2d(net, [3, 3], padding='SAME', stride=1, scope='AvgPool_0a_3x3')
        branch_3 = layers.conv2d(branch_3, 64, [1, 1], weights_initializer=trunc_normal(0.1), scope='Conv2d_0b_1x1')
      net = array_ops.concat([branch_0, branch_1, branch_2, branch_3], 3)

    # 28 x 28 x 320
    end_point = 'Mixed_4a'
    with variable_scope.variable_scope(end_point):
      with variable_scope.variable_scope('Branch_0'):
        branch_0 = layers.conv2d(net, 128, [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1')
        branch_0 = layers.conv2d(branch_0, 160, [3, 3], stride=2, scope='Conv2d_1a_3x3')
      with variable_scope.variable_scope('Branch_1'):
        branch_1 = layers.conv2d(net, 64, [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1')
        branch_1 = layers.conv2d(branch_1, 96, [3, 3], scope='Conv2d_0b_3x3')
        branch_1 = layers.conv2d(branch_1, 96, [3, 3], stride=2, scope='Conv2d_1a_3x3')
      with variable_scope.variable_scope('Branch_2'):
        branch_2 = layers_lib.max_pool2d(net, [3, 3], stride=2, padding='SAME', scope='MaxPool_1a_3x3')
      net = array_ops.concat([branch_0, branch_1, branch_2], 3)

    # 14 x 14 x 576
    end_point = 'Mixed_4b'
    with variable_scope.variable_scope(end_point):
      with variable_scope.variable_scope('Branch_0'):
        branch_0 = layers.conv2d(net, 224, [1, 1], scope='Conv2d_0a_1x1')
      with variable_scope.variable_scope('Branch_1'):
        branch_1 = layers.conv2d(net, 64, [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1')
        branch_1 = layers.conv2d(branch_1, 96, [3, 3], scope='Conv2d_0b_3x3')
      with variable_scope.variable_scope('Branch_2'):
        branch_2 = layers.conv2d(net, 96, [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1')
        branch_2 = layers.conv2d(branch_2, 128, [3, 3], scope='Conv2d_0b_3x3')
        branch_2 = layers.conv2d(branch_2, 128, [3, 3], scope='Conv2d_0c_3x3')
      with variable_scope.variable_scope('Branch_3'):
        branch_3 = layers_lib.avg_pool2d(net, [3, 3], padding='SAME', stride=1, scope='AvgPool_0a_3x3')
        branch_3 = layers.conv2d(branch_3, 128, [1, 1], weights_initializer=trunc_normal(0.1), scope='Conv2d_0b_1x1')
      net = array_ops.concat([branch_0, branch_1, branch_2, branch_3], 3)

    # 14 x 14 x 576
    end_point = 'Mixed_4c'
    with variable_scope.variable_scope(end_point):
      with variable_scope.variable_scope('Branch_0'):
        branch_0 = layers.conv2d(net, 192, [1, 1], scope='Conv2d_0a_1x1')
      with variable_scope.variable_scope('Branch_1'):
        branch_1 = layers.conv2d(net, 96, [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1')
        branch_1 = layers.conv2d(branch_1, 128, [3, 3], scope='Conv2d_0b_3x3')
      with variable_scope.variable_scope('Branch_2'):
        branch_2 = layers.conv2d(net, 96, [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1')
        branch_2 = layers.conv2d(branch_2, 128, [3, 3], scope='Conv2d_0b_3x3')
        branch_2 = layers.conv2d(branch_2, 128, [3, 3], scope='Conv2d_0c_3x3')
      with variable_scope.variable_scope('Branch_3'):
        branch_3 = layers_lib.avg_pool2d(net, [3, 3], padding='SAME', stride=1, scope='AvgPool_0a_3x3')
        branch_3 = layers.conv2d(branch_3, 128, [1, 1], weights_initializer=trunc_normal(0.1), scope='Conv2d_0b_1x1')
      net = array_ops.concat([branch_0, branch_1, branch_2, branch_3], 3)

    # 14 x 14 x 576
    end_point = 'Mixed_4d'
    with variable_scope.variable_scope(end_point):
      with variable_scope.variable_scope('Branch_0'):
        branch_0 = layers.conv2d(net, 160, [1, 1], scope='Conv2d_0a_1x1')
      with variable_scope.variable_scope('Branch_1'):
        branch_1 = layers.conv2d(net, 128, [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1')
        branch_1 = layers.conv2d(branch_1, 160, [3, 3], scope='Conv2d_0b_3x3')
      with variable_scope.variable_scope('Branch_2'):
        branch_2 = layers.conv2d(net, 128, [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1')
        branch_2 = layers.conv2d(branch_2, 160, [3, 3], scope='Conv2d_0b_3x3')
        branch_2 = layers.conv2d(branch_2, 160, [3, 3], scope='Conv2d_0c_3x3')
      with variable_scope.variable_scope('Branch_3'):
        branch_3 = layers_lib.avg_pool2d(net, [3, 3], padding='SAME', stride=1, scope='AvgPool_0a_3x3')
        branch_3 = layers.conv2d(branch_3, 96, [1, 1], weights_initializer=trunc_normal(0.1), scope='Conv2d_0b_1x1')
      net = array_ops.concat([branch_0, branch_1, branch_2, branch_3], 3)

    # 14 x 14 x 576
    end_point = 'Mixed_4e'
    with variable_scope.variable_scope(end_point):
      with variable_scope.variable_scope('Branch_0'):
        branch_0 = layers.conv2d(net, 96, [1, 1], scope='Conv2d_0a_1x1')
      with variable_scope.variable_scope('Branch_1'):
        branch_1 = layers.conv2d(net, 128, [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1')
        branch_1 = layers.conv2d(branch_1, 192, [3, 3], scope='Conv2d_0b_3x3')
      with variable_scope.variable_scope('Branch_2'):
        branch_2 = layers.conv2d(net, 160, [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1')
        branch_2 = layers.conv2d(branch_2, 192, [3, 3], scope='Conv2d_0b_3x3')
        branch_2 = layers.conv2d(branch_2, 192, [3, 3], scope='Conv2d_0c_3x3')
      with variable_scope.variable_scope('Branch_3'):
        branch_3 = layers_lib.avg_pool2d(net, [3, 3], padding='SAME', stride=1, scope='AvgPool_0a_3x3')
        branch_3 = layers.conv2d(branch_3, 96, [1, 1], weights_initializer=trunc_normal(0.1), scope='Conv2d_0b_1x1')
      net = array_ops.concat([branch_0, branch_1, branch_2, branch_3], 3)

    # 14 x 14 x 576
    end_point = 'Mixed_5a'
    with variable_scope.variable_scope(end_point):
      with variable_scope.variable_scope('Branch_0'):
        branch_0 = layers.conv2d(net, 128, [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1')
        branch_0 = layers.conv2d(branch_0, 192, [3, 3], stride=2, scope='Conv2d_1a_3x3')
      with variable_scope.variable_scope('Branch_1'):
        branch_1 = layers.conv2d(net, 192, [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1')
        branch_1 = layers.conv2d(branch_1, 256, [3, 3], scope='Conv2d_0b_3x3')
        branch_1 = layers.conv2d(branch_1, 256, [3, 3], stride=2, scope='Conv2d_1a_3x3')
      with variable_scope.variable_scope('Branch_2'):
        branch_2 = layers_lib.max_pool2d(net, [3, 3], stride=2, padding='SAME', scope='MaxPool_1a_3x3')
      net = array_ops.concat([branch_0, branch_1, branch_2], 3)

    # 7 x 7 x 1024
    end_point = 'Mixed_5b'
    with variable_scope.variable_scope(end_point):
      with variable_scope.variable_scope('Branch_0'):
        branch_0 = layers.conv2d(net, 352, [1, 1], scope='Conv2d_0a_1x1')
      with variable_scope.variable_scope('Branch_1'):
        branch_1 = layers.conv2d(net, 192, [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1')
        branch_1 = layers.conv2d(branch_1, 320, [3, 3], scope='Conv2d_0b_3x3')
      with variable_scope.variable_scope('Branch_2'):
        branch_2 = layers.conv2d(net, 160, [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1')
        branch_2 = layers.conv2d(branch_2, 224, [3, 3], scope='Conv2d_0b_3x3')
        branch_2 = layers.conv2d(branch_2, 224, [3, 3], scope='Conv2d_0c_3x3')
      with variable_scope.variable_scope('Branch_3'):
        branch_3 = layers_lib.avg_pool2d(net, [3, 3], padding='SAME', stride=1, scope='AvgPool_0a_3x3')
        branch_3 = layers.conv2d(branch_3, 128, [1, 1], weights_initializer=trunc_normal(0.1), scope='Conv2d_0b_1x1')
      net = array_ops.concat([branch_0, branch_1, branch_2, branch_3], 3)

    # 7 x 7 x 1024
    end_point = 'Mixed_5c'
    with variable_scope.variable_scope(end_point):
      with variable_scope.variable_scope('Branch_0'):
        branch_0 = layers.conv2d(net, 352, [1, 1], scope='Conv2d_0a_1x1')
      with variable_scope.variable_scope('Branch_1'):
        branch_1 = layers.conv2d(net, 192, [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1')
        branch_1 = layers.conv2d(branch_1, 320, [3, 3], scope='Conv2d_0b_3x3')
      with variable_scope.variable_scope('Branch_2'):
        branch_2 = layers.conv2d(net, 192, [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1')
        branch_2 = layers.conv2d(branch_2, 224, [3, 3], scope='Conv2d_0b_3x3')
        branch_2 = layers.conv2d(branch_2, 224, [3, 3], scope='Conv2d_0c_3x3')
      with variable_scope.variable_scope('Branch_3'):
        branch_3 = layers_lib.max_pool2d(net, [3, 3], padding='SAME', stride=1, scope='MaxPool_0a_3x3')
        branch_3 = layers.conv2d(branch_3, 128, [1, 1], weights_initializer=trunc_normal(0.1), scope='Conv2d_0b_1x1')
      net = array_ops.concat([branch_0, branch_1, branch_2, branch_3], 3)

  with variable_scope.variable_scope('Logits'):
    kernel_size = util._reduced_kernel_size_for_small_input(net, [7, 7])
    net = layers_lib.avg_pool2d(net, kernel_size, stride=1, padding='VALID', scope='AvgPool_1a_{}x{}'.format(*kernel_size))

    # 1 x 1 x 1024
    net = layers_lib.dropout(net, keep_prob=params['dropout_keep_prob'], scope='Dropout_1b')
    logits = layers.conv2d(net, params['num_classes'], [1, 1], activation_fn=None, normalizer_fn=None, scope='Conv2d_1c_1x1')
    if params['spatial_squeeze']:
      logits = array_ops.squeeze(logits, [1, 2], name='SpatialSqueeze')

  predictions = {
    'argmax': tf.argmax(logits, axis=1, name='prediction_classes'),
    'predictions': layers_lib.softmax(logits, scope='Predictions'),
  }

  if mode == tf.estimator.ModeKeys.PREDICT:
    return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

  loss = tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=labels)
  tf.summary.scalar('loss', loss)

  eval_metric_ops = {
    'accuracy_val': tf.metrics.accuracy(labels=labels, predictions=predictions['argmax'])
  }

  if mode == tf.estimator.ModeKeys.EVAL:
    return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)

  optimizer = tf.train.GradientDescentOptimizer(learning_rate=params['learning_rate'])
  extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
  with tf.control_dependencies(extra_update_ops):
    train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step())

  tf.summary.scalar('accuracy_train', eval_metric_ops['accuracy_val'][1])
  tf.summary.histogram('labels', labels)
  tf.summary.histogram('predictions', predictions['argmax'])

  return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
Beispiel #36
0
def vgg_16_small_img(inputs,
                     num_classes=1000,
                     is_training=True,
                     dropout_keep_prob=0.5,
                     spatial_squeeze=True,
                     scope='vgg_16'):
    # Collect outputs for conv2d, fully_connected and max_pool2d.

    net = layers_lib.conv2d(inputs,
                            64, [3, 3],
                            padding="SAME",
                            data_format="NHWC",
                            scope="conv1")
    net = tf.nn.relu(net, name="relu_conv1")
    net = layers_lib.conv2d(net,
                            64, [3, 3],
                            padding="SAME",
                            data_format="NHWC",
                            scope="conv2")
    net = tf.nn.relu(net, name="relu_conv2")
    net = layers_lib.max_pool2d(net, [2, 2], scope='pool1')
    net = layers_lib.conv2d(net,
                            128, [3, 3],
                            padding="SAME",
                            data_format="NHWC",
                            scope="conv3")
    net = tf.nn.relu(net, name="relu_conv3")
    net = layers_lib.conv2d(net,
                            128, [3, 3],
                            padding="SAME",
                            data_format="NHWC",
                            scope="conv4")
    net = tf.nn.relu(net, name="relu_conv4")
    net = layers_lib.max_pool2d(net, [2, 2], scope='pool2')
    net = layers_lib.conv2d(net,
                            256, [3, 3],
                            padding="SAME",
                            data_format="NHWC",
                            scope="conv5")
    net = tf.nn.relu(net, name="relu_conv5")
    net = layers_lib.conv2d(net,
                            256, [3, 3],
                            padding="SAME",
                            data_format="NHWC",
                            scope="conv6")
    net = tf.nn.relu(net, name="relu_conv6")
    net = layers_lib.conv2d(net,
                            256, [3, 3],
                            padding="SAME",
                            data_format="NHWC",
                            scope="conv7")
    net = tf.nn.relu(net, name="relu_conv7")
    net = layers_lib.max_pool2d(net, [2, 2], scope='pool3')
    net = layers_lib.conv2d(net,
                            512, [3, 3],
                            padding="SAME",
                            data_format="NHWC",
                            scope="conv8")
    net = tf.nn.relu(net, name="relu_conv8")
    net = layers_lib.conv2d(net,
                            512, [3, 3],
                            padding="SAME",
                            data_format="NHWC",
                            scope="conv9")
    net = tf.nn.relu(net, name="relu_conv9")
    net = layers_lib.conv2d(net,
                            512, [3, 3],
                            padding="SAME",
                            data_format="NHWC",
                            scope="conv10")
    net = tf.nn.relu(net, name="relu_conv10")
    net = layers_lib.max_pool2d(net, [2, 2], scope='pool4')
    net = layers_lib.conv2d(net,
                            512, [3, 3],
                            padding="SAME",
                            data_format="NHWC",
                            scope="conv11")
    net = tf.nn.relu(net, name="relu_conv11")
    net = layers_lib.conv2d(net,
                            512, [3, 3],
                            padding="SAME",
                            data_format="NHWC",
                            scope="conv12")
    net = tf.nn.relu(net, name="relu_conv12")
    net = layers_lib.conv2d(net,
                            512, [3, 3],
                            padding="SAME",
                            data_format="NHWC",
                            scope="conv13")
    net = tf.nn.relu(net, name="relu_conv13")
    net = layers_lib.max_pool2d(net, [2, 2], scope='pool5')

    # Use conv2d instead of fully_connected layers.
    net = layers.conv2d(net, 512, [1, 1], padding='VALID', scope='fc6')
    net = tf.nn.relu(net, name="relu_fc6")
    net = layers_lib.dropout(net,
                             dropout_keep_prob,
                             is_training=is_training,
                             scope='dropout6')
    net = layers.conv2d(net,
                        num_classes, [1, 1],
                        activation_fn=None,
                        normalizer_fn=None,
                        scope='fc8')
    # Convert end_points_collection into a end_point dict.
    if spatial_squeeze:
        net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed')
    return net
Beispiel #37
0
def _read_batch(cell,
                features,
                labels,
                mode,
                num_unroll,
                num_rnn_layers,
                batch_size,
                sequence_feature_columns,
                context_feature_columns=None,
                num_threads=3,
                queue_capacity=1000,
                seed=None):
    """Reads a batch from a state saving sequence queue.

  Args:
    cell: An initialized `RNNCell` to be used in the RNN.
    features: A dict of Python string to an iterable of `Tensor`, the
      `features` argument of a TF.Learn model_fn.
    labels: An iterable of `Tensor`, the `labels` argument of a
      TF.Learn model_fn.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    num_unroll: Python integer, how many time steps to unroll at a time.
      The input sequences of length `k` are then split into `k / num_unroll`
      many segments.
    num_rnn_layers: Python integer, number of layers in the RNN.
    batch_size: Python integer, the size of the minibatch produced by the SQSS.
    sequence_feature_columns: An iterable containing all the feature columns
      describing sequence features. All items in the set should be instances
      of classes derived from `FeatureColumn`.
    context_feature_columns: An iterable containing all the feature columns
      describing context features, i.e., features that apply accross all time
      steps. All items in the set should be instances of classes derived from
      `FeatureColumn`.
    num_threads: The Python integer number of threads enqueuing input examples
      into a queue. Defaults to 3.
    queue_capacity: The max capacity of the queue in number of examples.
      Needs to be at least `batch_size`. Defaults to 1000. When iterating
      over the same input example multiple times reusing their keys the
      `queue_capacity` must be smaller than the number of examples.
    seed: Fixes the random seed used for generating input keys by the SQSS.

  Returns:
    batch: A `NextQueuedSequenceBatch` containing batch_size `SequenceExample`
      values and their saved internal states.
  """
    # Set batch_size=1 to initialize SQSS with cell's zero state.
    values = cell.zero_state(batch_size=1, dtype=dtypes.float32)

    # Set up stateful queue reader.
    states = {}
    state_names = _get_lstm_state_names(num_rnn_layers)
    for i in range(num_rnn_layers):
        states[state_names[i][0]] = array_ops.squeeze(values[i][0], axis=0)
        states[state_names[i][1]] = array_ops.squeeze(values[i][1], axis=0)

    sequences, context = _prepare_features_for_sqss(features, labels, mode,
                                                    sequence_feature_columns,
                                                    context_feature_columns)

    return sqss.batch_sequences_with_states(
        input_key='key',
        input_sequences=sequences,
        input_context=context,
        input_length=None,  # infer sequence lengths
        initial_states=states,
        num_unroll=num_unroll,
        batch_size=batch_size,
        pad=True,  # pad to a multiple of num_unroll
        make_keys_unique=True,
        make_keys_unique_seed=seed,
        num_threads=num_threads,
        capacity=queue_capacity)
Beispiel #38
0
        def _train_op_fn(loss):
            """Run one training iteration."""
            if training_state_cache:
                train_op.append(
                    training_state_cache.insert(tree_ids, node_ids, logits))
            if closed_form_grad_and_hess_fn:
                gradients, hessians = closed_form_grad_and_hess_fn(
                    logits, labels)
            else:
                gradients = gradients_impl.gradients(loss,
                                                     logits,
                                                     name='Gradients')[0]
                hessians = gradients_impl.gradients(gradients,
                                                    logits,
                                                    name='Hessians')[0]

            stats_summaries_list = []
            for i, feature_ids in enumerate(feature_ids_list):
                num_buckets = bucket_size_list[i]
                summaries = [
                    array_ops.squeeze(boosted_trees_ops.make_stats_summary(
                        node_ids=node_ids,
                        gradients=gradients,
                        hessians=hessians,
                        bucketized_features_list=[input_feature_list[f]],
                        max_splits=max_splits,
                        num_buckets=num_buckets),
                                      axis=0) for f in feature_ids
                ]
                stats_summaries_list.append(summaries)

            accumulators = []

            def grow_tree_from_stats_summaries(stats_summaries_list,
                                               feature_ids_list):
                """Updates ensemble based on the best gains from stats summaries."""
                node_ids_per_feature = []
                gains_list = []
                thresholds_list = []
                left_node_contribs_list = []
                right_node_contribs_list = []
                all_feature_ids = []

                assert len(stats_summaries_list) == len(feature_ids_list)

                for i, feature_ids in enumerate(feature_ids_list):
                    (numeric_node_ids_per_feature, numeric_gains_list,
                     numeric_thresholds_list, numeric_left_node_contribs_list,
                     numeric_right_node_contribs_list) = (
                         boosted_trees_ops.calculate_best_gains_per_feature(
                             node_id_range=last_layer_nodes_range,
                             stats_summary_list=stats_summaries_list[i],
                             l1=tree_hparams.l1,
                             l2=tree_hparams.l2,
                             tree_complexity=tree_hparams.tree_complexity,
                             min_node_weight=tree_hparams.min_node_weight,
                             max_splits=max_splits))

                    all_feature_ids += feature_ids
                    node_ids_per_feature += numeric_node_ids_per_feature
                    gains_list += numeric_gains_list
                    thresholds_list += numeric_thresholds_list
                    left_node_contribs_list += numeric_left_node_contribs_list
                    right_node_contribs_list += numeric_right_node_contribs_list

                grow_op = boosted_trees_ops.update_ensemble(
                    # Confirm if local_tree_ensemble or tree_ensemble should be used.
                    tree_ensemble.resource_handle,
                    feature_ids=all_feature_ids,
                    node_ids=node_ids_per_feature,
                    gains=gains_list,
                    thresholds=thresholds_list,
                    left_node_contribs=left_node_contribs_list,
                    right_node_contribs=right_node_contribs_list,
                    learning_rate=tree_hparams.learning_rate,
                    max_depth=tree_hparams.max_depth,
                    pruning_mode=boosted_trees_ops.PruningMode.NO_PRUNING)
                return grow_op

            if train_in_memory and is_single_machine:
                train_op.append(distribute_lib.increment_var(global_step))
                train_op.append(
                    grow_tree_from_stats_summaries(stats_summaries_list,
                                                   feature_ids_list))
            else:
                dependencies = []

                for i, feature_ids in enumerate(feature_ids_list):
                    stats_summaries = stats_summaries_list[i]
                    accumulator = data_flow_ops.ConditionalAccumulator(
                        dtype=dtypes.float32,
                        # The stats consist of grads and hessians (the last dimension).
                        shape=[
                            len(feature_ids), max_splits, bucket_size_list[i],
                            2
                        ],
                        shared_name='numeric_stats_summary_accumulator_' +
                        str(i))
                    accumulators.append(accumulator)

                    apply_grad = accumulator.apply_grad(
                        array_ops.stack(stats_summaries, axis=0), stamp_token)
                    dependencies.append(apply_grad)

                def grow_tree_from_accumulated_summaries_fn():
                    """Updates the tree with the best layer from accumulated summaries."""
                    # Take out the accumulated summaries from the accumulator and grow.
                    stats_summaries_list = []

                    stats_summaries_list = [
                        array_ops.unstack(accumulator.take_grad(1), axis=0)
                        for accumulator in accumulators
                    ]

                    grow_op = grow_tree_from_stats_summaries(
                        stats_summaries_list, feature_ids_list)
                    return grow_op

                with ops.control_dependencies(dependencies):
                    train_op.append(distribute_lib.increment_var(global_step))
                    if config.is_chief:
                        min_accumulated = math_ops.reduce_min(
                            array_ops.stack([
                                acc.num_accumulated() for acc in accumulators
                            ]))

                        train_op.append(
                            control_flow_ops.cond(
                                math_ops.greater_equal(min_accumulated,
                                                       n_batches_per_layer),
                                grow_tree_from_accumulated_summaries_fn,
                                control_flow_ops.no_op,
                                name='wait_until_n_batches_accumulated'))

            return control_flow_ops.group(train_op, name='train_op')
Beispiel #39
0
def _BatchNormGrad(grad_y,
                   x,
                   scale,
                   pop_mean,
                   pop_var,
                   epsilon,
                   data_format,
                   is_training=True):
  """Returns the gradients for the 3 inputs of BatchNorm.

  Args:
    grad_y: A `Tensor` of 4 dimensions for gradient for y.
    x: A `Tensor` of 4 dimensions for x.
    scale: A `Tensor` of 1 dimension for scaling.
    pop_mean: A `Tensor` of 1 dimension for the population mean. Only used when
      is_training=False.
    pop_var: A `Tensor` of 1 dimension for the population variance. Only used
      when is_training=False.
    epsilon: A small float number added to the variance of x.
    data_format: The data format for input. Either b"NHWC" or b"NCHW".
    is_training: A bool value to indicate the operation is for training
      (default) or inference.

  Returns:
    A tuple (grad_x, grad_scale, grad_offset), where grad_x is the gradient
    for x, grad_scale the gradient for scale, and grad_offset the gradient
    for offset.
  """
  x_dtype = x.dtype.base_dtype
  if x_dtype == dtypes.float16:
    # float16 math is too imprecise, so we do the batch norm gradient
    # computations in float32.
    x = math_ops.cast(x, dtypes.float32)
    grad_y = math_ops.cast(grad_y, dtypes.float32)
  if is_training:
    if data_format == b"NHWC":
      keepdims = False
      reduce_axis = [0, 1, 2]
    else:
      keepdims = True
      reduce_axis = [0, 2, 3]
      shape = [1, array_ops.size(scale), 1, 1]
      scale = array_ops.reshape(scale, shape)
    mean_grad_y = math_ops.reduce_mean(grad_y, reduce_axis, keepdims=keepdims)
    mean_x = math_ops.reduce_mean(x, reduce_axis, keepdims=keepdims)
    var_x = math_ops.reduce_mean(
        math_ops.squared_difference(x, array_ops.stop_gradient(mean_x)),
        reduce_axis,
        keepdims=keepdims)
    grad_y_offset = grad_y - mean_grad_y
    x_offset = x - mean_x
    mean = math_ops.reduce_mean(
        grad_y * x_offset, axis=reduce_axis, keepdims=keepdims)
    grad_x = scale * math_ops.rsqrt(var_x + epsilon) * (
        grad_y_offset - math_ops.reciprocal(var_x + epsilon) * mean * x_offset)
    grad_scale = math_ops.rsqrt(var_x + epsilon) * math_ops.reduce_sum(
        grad_y * x_offset, axis=reduce_axis, keepdims=keepdims)
    if data_format == b"NCHW":
      grad_scale = array_ops.squeeze(grad_scale)
    grad_offset = math_ops.reduce_sum(grad_y, axis=reduce_axis)
    return math_ops.cast(grad_x, x_dtype), grad_scale, grad_offset
  else:
    if data_format == b"NHWC":
      reduce_axis = [0, 1, 2]
    else:
      reduce_axis = [0, 2, 3]
      shape = [1, array_ops.size(pop_mean), 1, 1]
      pop_mean = array_ops.reshape(pop_mean, shape)
      pop_var = array_ops.reshape(pop_var, shape)
      scale = array_ops.reshape(scale, shape)

    grad_offset = math_ops.reduce_sum(grad_y, axis=reduce_axis)
    var_rsqrt = math_ops.rsqrt(pop_var + epsilon)
    grad_scale = math_ops.reduce_sum(
        grad_y * (x - pop_mean) * var_rsqrt, axis=reduce_axis)
    grad_x = grad_y * scale * var_rsqrt
    return math_ops.cast(grad_x, x_dtype), grad_scale, grad_offset
Beispiel #40
0
    def train(self, loss, predictions_dict, labels):
        """Grows a new tree and adds it to the ensemble.

    Args:
      loss: A scalar tensor representing average loss of examples.
      predictions_dict: Dictionary of Rank 2 `Tensor` representing information
          about predictions per example.
      labels: Rank 2 `Tensor` representing labels per example.

    Returns:
      An op that adds a new tree to the ensemble.

    Raises:
      ValueError: if inputs are not valid.
    """
        # Get the worker device from input dependencies.
        input_deps = (self._dense_floats + self._sparse_float_indices +
                      self._sparse_int_indices)
        worker_device = input_deps[0].device

        # Get tensors relevant for training and form the loss.
        predictions = predictions_dict[PREDICTIONS]
        partition_ids = predictions_dict[PARTITION_IDS]
        ensemble_stamp = predictions_dict[ENSEMBLE_STAMP]
        gradients = gradients_impl.gradients(loss,
                                             predictions,
                                             name="Gradients",
                                             colocate_gradients_with_ops=False,
                                             gate_gradients=0,
                                             aggregation_method=None)[0]
        strategy = self._learner_config.multi_class_strategy

        class_id = -1
        # Handle different multiclass strategies.
        if strategy == learner_pb2.LearnerConfig.TREE_PER_CLASS:
            # We build one vs rest trees.
            gradient_shape = tensor_shape.scalar()
            hessian_shape = tensor_shape.scalar()

            if self._logits_dimension == 1:
                # We have only 1 score, gradients is of shape [batch, 1].
                hessians = gradients_impl.gradients(
                    gradients,
                    predictions,
                    name="Hessian",
                    colocate_gradients_with_ops=False,
                    gate_gradients=0,
                    aggregation_method=None)[0]

                squeezed_gradients = array_ops.squeeze(gradients, axis=[1])
                squeezed_hessians = array_ops.squeeze(hessians, axis=[1])
            else:
                hessian_list = self._diagonal_hessian(gradients, predictions)
                # Assemble hessian list into a tensor.
                hessians = array_ops.stack(hessian_list, axis=1)

                # Choose the class for which the tree is built (one vs rest).
                class_id = math_ops.to_int32(
                    predictions_dict[NUM_TREES_ATTEMPTED] %
                    self._logits_dimension)

                # Use class id tensor to get the column with that index from gradients
                # and hessians.
                squeezed_gradients = array_ops.squeeze(
                    _get_column_by_index(gradients, class_id))
                squeezed_hessians = array_ops.squeeze(
                    _get_column_by_index(hessians, class_id))
        else:
            # Other multiclass strategies.
            gradient_shape = tensor_shape.TensorShape([self._logits_dimension])

            if strategy == learner_pb2.LearnerConfig.FULL_HESSIAN:
                hessian_shape = tensor_shape.TensorShape(
                    ([self._logits_dimension, self._logits_dimension]))
                hessian_list = self._full_hessian(gradients, predictions)
            else:
                # Diagonal hessian strategy.
                hessian_shape = tensor_shape.TensorShape(
                    ([self._logits_dimension]))
                hessian_list = self._diagonal_hessian(gradients, predictions)

            squeezed_gradients = gradients
            hessians = array_ops.stack(hessian_list, axis=1)
            squeezed_hessians = hessians

        # Get the weights for each example for quantiles calculation,
        weights = self._get_weights(hessian_shape, squeezed_hessians)

        regularization_config = self._learner_config.regularization
        min_node_weight = self._learner_config.constraints.min_node_weight
        # Create all handlers ensuring resources are evenly allocated across PS.
        fc_name_idx = 0
        handlers = []
        init_stamp_token = constant_op.constant(0, dtype=dtypes.int64)
        with ops.device(self._get_replica_device_setter(worker_device)):
            # Create handlers for dense float columns
            for dense_float_column_idx in range(len(self._dense_floats)):
                fc_name = self._fc_names[fc_name_idx]
                handlers.append(
                    ordinal_split_handler.DenseSplitHandler(
                        l1_regularization=regularization_config.l1,
                        l2_regularization=regularization_config.l2,
                        tree_complexity_regularization=(
                            regularization_config.tree_complexity),
                        min_node_weight=min_node_weight,
                        feature_column_group_id=dense_float_column_idx,
                        epsilon=0.01,
                        num_quantiles=100,
                        dense_float_column=self.
                        _dense_floats[dense_float_column_idx],
                        name=fc_name,
                        gradient_shape=gradient_shape,
                        hessian_shape=hessian_shape,
                        multiclass_strategy=strategy,
                        init_stamp_token=init_stamp_token))
                fc_name_idx += 1

            # Create handlers for sparse float columns.
            for sparse_float_column_idx in range(
                    len(self._sparse_float_indices)):
                fc_name = self._fc_names[fc_name_idx]
                handlers.append(
                    ordinal_split_handler.SparseSplitHandler(
                        l1_regularization=regularization_config.l1,
                        l2_regularization=regularization_config.l2,
                        tree_complexity_regularization=(
                            regularization_config.tree_complexity),
                        min_node_weight=min_node_weight,
                        feature_column_group_id=sparse_float_column_idx,
                        epsilon=0.01,
                        num_quantiles=100,
                        sparse_float_column=sparse_tensor.SparseTensor(
                            self.
                            _sparse_float_indices[sparse_float_column_idx],
                            self._sparse_float_values[sparse_float_column_idx],
                            self._sparse_float_shapes[sparse_float_column_idx]
                        ),
                        name=fc_name,
                        gradient_shape=gradient_shape,
                        hessian_shape=hessian_shape,
                        multiclass_strategy=strategy,
                        init_stamp_token=init_stamp_token))
                fc_name_idx += 1

            # Create handlers for sparse int columns.
            for sparse_int_column_idx in range(len(self._sparse_int_indices)):
                fc_name = self._fc_names[fc_name_idx]
                handlers.append(
                    categorical_split_handler.EqualitySplitHandler(
                        l1_regularization=regularization_config.l1,
                        l2_regularization=regularization_config.l2,
                        tree_complexity_regularization=(
                            regularization_config.tree_complexity),
                        min_node_weight=min_node_weight,
                        feature_column_group_id=sparse_int_column_idx,
                        sparse_int_column=sparse_tensor.SparseTensor(
                            self._sparse_int_indices[sparse_int_column_idx],
                            self._sparse_int_values[sparse_int_column_idx],
                            self._sparse_int_shapes[sparse_int_column_idx]),
                        name=fc_name,
                        gradient_shape=gradient_shape,
                        hessian_shape=hessian_shape,
                        multiclass_strategy=strategy,
                        init_stamp_token=init_stamp_token))
                fc_name_idx += 1

            # Create steps accumulator.
            steps_accumulator = stats_accumulator_ops.StatsAccumulator(
                stamp_token=0,
                gradient_shape=tensor_shape.scalar(),
                hessian_shape=tensor_shape.scalar(),
                name="StepsAccumulator")

            # Create bias stats accumulator.
            bias_stats_accumulator = stats_accumulator_ops.StatsAccumulator(
                stamp_token=0,
                gradient_shape=gradient_shape,
                hessian_shape=hessian_shape,
                name="BiasAccumulator")

            # Create ensemble stats variables.
            num_layer_examples = variables.Variable(
                initial_value=array_ops.zeros([], dtypes.int64),
                name="num_layer_examples",
                trainable=False)
            num_layer_steps = variables.Variable(initial_value=array_ops.zeros(
                [], dtypes.int64),
                                                 name="num_layer_steps",
                                                 trainable=False)
            num_layers = variables.Variable(initial_value=array_ops.zeros(
                [], dtypes.int64),
                                            name="num_layers",
                                            trainable=False)
            active_tree = variables.Variable(initial_value=array_ops.zeros(
                [], dtypes.int64),
                                             name="active_tree",
                                             trainable=False)
            active_layer = variables.Variable(initial_value=array_ops.zeros(
                [], dtypes.int64),
                                              name="active_layer",
                                              trainable=False)

        # Create ensemble stats summaries.
        summary.scalar("layer_stats/num_examples", num_layer_examples)
        summary.scalar("layer_stats/num_steps", num_layer_steps)
        summary.scalar("ensemble_stats/active_tree", active_tree)
        summary.scalar("ensemble_stats/active_layer", active_layer)

        # Update bias stats.
        stats_update_ops = []
        continue_centering = variables.Variable(
            initial_value=self._center_bias,
            name="continue_centering",
            trainable=False)
        stats_update_ops.append(
            control_flow_ops.cond(
                continue_centering,
                self._make_update_bias_stats_fn(ensemble_stamp, predictions,
                                                gradients,
                                                bias_stats_accumulator),
                control_flow_ops.no_op))

        # Update handler stats.
        handler_reads = {}
        for handler in handlers:
            handler_reads[handler] = handler.scheduled_reads()

        handler_results = batch_ops_utils.run_handler_scheduled_ops(
            handler_reads, ensemble_stamp, worker_device)
        per_handler_updates = {}
        # Two values per handler. First one is if the handler is active for the
        # current layer. The second one is if the handler is going to be active
        # for the next layer.
        subsampling_type = self._learner_config.WhichOneof("feature_fraction")
        if subsampling_type == "feature_fraction_per_level":
            seed = predictions_dict[NUM_LAYERS_ATTEMPTED]
            active_handlers_current_layer = stateless.stateless_random_uniform(
                shape=[len(handlers)], seed=[seed, 1])
            active_handlers_next_layer = stateless.stateless_random_uniform(
                shape=[len(handlers)], seed=[seed + 1, 1])
            active_handlers = array_ops.stack(
                [active_handlers_current_layer, active_handlers_next_layer],
                axis=1)
            active_handlers = (active_handlers <
                               self._learner_config.feature_fraction_per_level)
        elif subsampling_type == "feature_fraction_per_tree":
            seed = predictions_dict[NUM_TREES_ATTEMPTED]
            active_handlers_current_layer = stateless.stateless_random_uniform(
                shape=[len(handlers)], seed=[seed, 2])
            active_handlers_current_layer = (
                active_handlers_current_layer <
                self._learner_config.feature_fraction_per_tree)
            active_handlers = array_ops.stack(
                active_handlers_current_layer,
                array_ops.ones([len(handlers)], dtype=dtypes.bool))
        else:
            active_handlers = array_ops.ones([len(handlers), 2],
                                             dtype=dtypes.bool)

        # Prepare empty gradients and hessians when handlers are not ready.
        empty_hess_shape = [1] + hessian_shape.as_list()
        empty_grad_shape = [1] + gradient_shape.as_list()

        empty_gradients = constant_op.constant([],
                                               dtype=dtypes.float32,
                                               shape=empty_grad_shape)
        empty_hessians = constant_op.constant([],
                                              dtype=dtypes.float32,
                                              shape=empty_hess_shape)

        for handler_idx in range(len(handlers)):
            handler = handlers[handler_idx]
            is_active = active_handlers[handler_idx]
            updates, scheduled_updates = handler.update_stats(
                ensemble_stamp, partition_ids, squeezed_gradients,
                squeezed_hessians, empty_gradients, empty_hessians, weights,
                is_active, handler_results[handler])
            stats_update_ops.append(updates)
            per_handler_updates[handler] = scheduled_updates

        update_results = batch_ops_utils.run_handler_scheduled_ops(
            per_handler_updates, ensemble_stamp, worker_device)
        for update in update_results.values():
            stats_update_ops += update
        # Accumulate a step after updating stats.
        batch_size = math_ops.cast(array_ops.shape(labels)[0], dtypes.float32)
        with ops.control_dependencies(stats_update_ops):
            add_step_op = steps_accumulator.add(ensemble_stamp, [0], [[0, 0]],
                                                [batch_size], [1.0])

        # Determine learning rate.
        learning_rate_tuner = self._learner_config.learning_rate_tuner.WhichOneof(
            "tuner")
        if learning_rate_tuner == "fixed" or learning_rate_tuner == "dropout":
            tuner = getattr(self._learner_config.learning_rate_tuner,
                            learning_rate_tuner)
            learning_rate = tuner.learning_rate
        else:
            # TODO(nponomareva, soroush) do the line search.
            raise ValueError("Line search learning rate is not yet supported.")

        # After adding the step, decide if further processing is needed.
        ensemble_update_ops = [add_step_op]
        with ops.control_dependencies([add_step_op]):
            if self._is_chief:
                dropout_seed = predictions_dict[NUM_TREES_ATTEMPTED]

                # Get accumulated steps and examples for the current layer.
                _, _, _, _, acc_examples, acc_steps = steps_accumulator.serialize(
                )
                acc_examples = math_ops.cast(acc_examples[0], dtypes.int64)
                acc_steps = math_ops.cast(acc_steps[0], dtypes.int64)
                ensemble_update_ops.append(
                    num_layer_examples.assign(acc_examples))
                ensemble_update_ops.append(num_layer_steps.assign(acc_steps))
                # Determine whether we need to update tree ensemble.
                examples_per_layer = self._examples_per_layer
                if callable(examples_per_layer):
                    examples_per_layer = examples_per_layer(active_layer)
                ensemble_update_ops.append(
                    control_flow_ops.cond(
                        acc_examples >= examples_per_layer,
                        self._make_update_ensemble_fn(
                            ensemble_stamp, steps_accumulator,
                            bias_stats_accumulator, continue_centering,
                            learning_rate, handlers, num_layers, active_tree,
                            active_layer, dropout_seed, class_id),
                        control_flow_ops.no_op))

        # Calculate the loss to be reported.
        # Note, the loss is calculated from the prediction considering dropouts, so
        # that the value might look staggering over steps when the dropout ratio is
        # high. eval_loss might be referred instead in the aspect of convergence.
        return control_flow_ops.group(*ensemble_update_ops)
Beispiel #41
0
def squeeze_or_expand_dimensions(y_pred, y_true, sample_weight):
  """Squeeze or expand last dimension if needed.

  1. Squeezes last dim of `y_pred` or `y_true` if their rank differs by 1
  (using `confusion_matrix.remove_squeezable_dimensions`).
  2. Squeezes or expands last dim of `sample_weight` if its rank differs by 1
  from the new rank of `y_pred`.
  If `sample_weight` is scalar, it is kept scalar.

  This will use static shape if available. Otherwise, it will add graph
  operations, which could result in a performance hit.

  Args:
    y_pred: Predicted values, a `Tensor` of arbitrary dimensions.
    y_true: Optional label `Tensor` whose dimensions match `y_pred`.
    sample_weight: Optional weight scalar or `Tensor` whose dimensions match
      `y_pred`.

  Returns:
    Tuple of `y_pred`, `y_true` and `sample_weight`. Each of them possibly has
    the last dimension squeezed,
    `sample_weight` could be extended by one dimension.
  """
  if y_true is not None:
    # squeeze last dim of `y_pred` or `y_true` if their rank differs by 1
    y_true, y_pred = confusion_matrix.remove_squeezable_dimensions(
        y_true, y_pred)

  if sample_weight is None:
    return y_pred, y_true, None

  sample_weight = ops.convert_to_tensor(sample_weight)
  weights_shape = sample_weight.get_shape()
  weights_rank = weights_shape.ndims
  if weights_rank == 0:  # If weights is scalar, do nothing.
    return y_pred, y_true, sample_weight

  y_pred_shape = y_pred.get_shape()
  y_pred_rank = y_pred_shape.ndims
  if (y_pred_rank is not None) and (weights_rank is not None):
    # Use static rank.
    if weights_rank - y_pred_rank == 1:
      sample_weight = array_ops.squeeze(sample_weight, [-1])
    elif y_pred_rank - weights_rank == 1:
      sample_weight = array_ops.expand_dims(sample_weight, [-1])
    return y_pred, y_true, sample_weight

  # Use dynamic rank.
  weights_rank_tensor = array_ops.rank(sample_weight)
  rank_diff = weights_rank_tensor - array_ops.rank(y_pred)
  maybe_squeeze_weights = lambda: array_ops.squeeze(sample_weight, [-1])

  def _maybe_expand_weights():
    return control_flow_ops.cond(
        math_ops.equal(rank_diff,
                       -1), lambda: array_ops.expand_dims(sample_weight, [-1]),
        lambda: sample_weight)

  def _maybe_adjust_weights():
    return control_flow_ops.cond(
        math_ops.equal(rank_diff, 1), maybe_squeeze_weights,
        _maybe_expand_weights)

  # squeeze or expand last dim of `sample_weight` if its rank differs by 1
  # from the new rank of `y_pred`.
  sample_weight = control_flow_ops.cond(
      math_ops.equal(weights_rank_tensor, 0), lambda: sample_weight,
      _maybe_adjust_weights)
  return y_pred, y_true, sample_weight
Beispiel #42
0
def RunGRU(sess,
           num_units,
           input_size,
           batch_size,
           time,
           num_layers=1,
           is_training=True,
           variable_seq_lengths=False,
           time_major=True,
           dynamic_shape_input=False,
           dropout=0.,
           num_dirs=True,
           dtype=dtypes.float32):
  # TODO(jamesqin): add multi-layer tests.
  # TODO(jamesqin): add multi-dir tests
  assert num_layers == 1
  assert num_dirs == 1
  if is_training and not np.isclose(dropout, 0):
    raise ValueError("dropout can not be 0. when test training.")

  # set graph level random seed and numpy random seed.
  random_seed.set_random_seed(0)
  np.random.seed(0)

  shape = ([time, batch_size, input_size]
           if time_major else [batch_size, time, input_size])
  inputs_np = np.random.rand(*shape).astype(dtype.as_numpy_dtype)
  inputs_static = variable_scope.get_variable(
      "inputs", initializer=inputs_np, dtype=dtype)
  inputs_dynamic = array_ops.placeholder(
      dtype, shape=[None, None, None], name="inputs")
  inputs = inputs_dynamic if dynamic_shape_input else inputs_static
  initial_h_op = variable_scope.get_variable(
      "initial_h_op",
      initializer=np.random.rand(batch_size,
                                 num_units).astype(dtype.as_numpy_dtype),
      dtype=dtype)

  if variable_seq_lengths:
    lengths_v = np.random.randint(low=1, high=time + 1, size=batch_size)
    lengths_v[0] = time  # make sure the max sequence has 'time' elems
    lengths = ops.convert_to_tensor(lengths_v.astype(np.int32))
  else:
    lengths = None

  initializer = init_ops.random_uniform_initializer(
      -0.01, 0.01, dtype=dtype, seed=19980904)
  with variable_scope.variable_scope("test", initializer=initializer):
    gate_kernel = variable_scope.get_variable(
        "rnn/cudnn_compatible_gru_cell/gates/kernel",
        shape=[input_size + num_units, num_units * 2],
        dtype=dtype)
    gate_bias = variable_scope.get_variable(
        "rnn/cudnn_compatible_gru_cell/gates/bias",
        shape=[num_units * 2],
        dtype=dtype)
    candidate_inp_kernel = variable_scope.get_variable(
        "rnn/cudnn_compatible_gru_cell/candidate/input_projection/kernel",
        shape=[input_size, num_units],
        dtype=dtype)
    candidate_inp_bias = variable_scope.get_variable(
        "rnn/cudnn_compatible_gru_cell/candidate/input_projection/bias",
        shape=[num_units],
        dtype=dtype)
    candidate_hid_kernel = variable_scope.get_variable(
        "rnn/cudnn_compatible_gru_cell/candidate/hidden_projection/kernel",
        shape=[num_units, num_units],
        dtype=dtype)
    candidate_hid_bias = variable_scope.get_variable(
        "rnn/cudnn_compatible_gru_cell/candidate/hidden_projection/bias",
        shape=[num_units],
        dtype=dtype)

    cell = cudnn_rnn_ops.CudnnCompatibleGRUCell(num_units, reuse=True)
    outputs_op, h_op = rnn.dynamic_rnn(
        cell,
        inputs_static,
        sequence_length=lengths,
        initial_state=initial_h_op,
        dtype=dtype,
        time_major=time_major,
        scope=None)

  ws = [gate_kernel, candidate_inp_kernel, candidate_hid_kernel]
  bs = [gate_bias, candidate_inp_bias, candidate_hid_bias]
  # Convert to cudnn opaque param.
  format_converter = cudnn_rnn_ops.CudnnParamsFormatConverterGRU(
      num_layers, num_units, input_size)
  opaque_params = format_converter.tf_canonical_to_opaque(ws + bs)


  cu_initial_h_op = array_ops.expand_dims(
      initial_h_op, axis=(0 if time_major else 1))
  cu_outputs_op, cu_h_op, _ = cudnn_rnn_ops._cudnn_rnn(
      inputs,
      cu_initial_h_op,
      array_ops.zeros_like(cu_initial_h_op),  # not used
      opaque_params,
      sequence_lengths=lengths,
      time_major=time_major,
      dropout=dropout,
      is_training=is_training,
      rnn_mode=cudnn_rnn_ops.CUDNN_GRU)

  if is_training:
    (inp_grad_op, hgrad_op, gk_grad_op, cik_grad_op, chk_grad_op, gb_grad_op,
     cib_grad_op, chb_grad_op) = gradients_impl.gradients(
         outputs_op, [inputs_static, initial_h_op] + ws + bs)

    (cu_inp_grad_op, cu_hgrad_op, opaque_grad_op) = gradients_impl.gradients(
        cu_outputs_op, [inputs, cu_initial_h_op, opaque_params])
    # Remove the trivial 1st dimension
    cu_hgrad_op = array_ops.squeeze(cu_hgrad_op, axis=0 if time_major else 1)

    cu_wgrad_op, cu_bgrad_op = format_converter.opaque_to_tf_canonical(
        opaque_grad_op)
    (cu_gk_grad_op, cu_cik_grad_op, cu_chk_grad_op) = cu_wgrad_op
    (cu_gb_grad_op, cu_cib_grad_op, cu_chb_grad_op) = cu_bgrad_op
    # cudnn gru has 2 biases for reset and update gates. When converting to tf
    # canonical format, the two biases are summed into one.  Thus here relevant
    # bias gradient should be halved before comparing with tf gru.
    cu_gb_grad_op *= 0.5

  init_op = variables.global_variables_initializer()
  sess.run(init_op)

  if is_training:
    outputs, h, inp_grad, hgrad, wgrad, bgrad = sess.run([
        outputs_op, h_op, inp_grad_op, hgrad_op,
        (gk_grad_op, cik_grad_op, chk_grad_op),
        (gb_grad_op, cib_grad_op, chb_grad_op)
    ])
    (cu_outputs, cu_h, cu_inp_grad, cu_hgrad, cu_wgrad, cu_bgrad) = sess.run(
        [
            cu_outputs_op, cu_h_op, cu_inp_grad_op, cu_hgrad_op,
            (cu_gk_grad_op, cu_cik_grad_op, cu_chk_grad_op),
            (cu_gb_grad_op, cu_cib_grad_op, cu_chb_grad_op)
        ],
        feed_dict={inputs: inputs_np} if dynamic_shape_input else None)
    # Remove the trivial 1st dimension
    cu_h = np.squeeze(cu_h, axis=0 if time_major else 1)

    logging.vlog(1, "outputs: %s" % outputs)
    logging.vlog(1, "cu_outputs: %s" % cu_outputs)
    logging.vlog(1, "h: %s" % h)
    logging.vlog(1, "cu_h: %s" % h)
    logging.vlog(1, "inp_grad: %s" % inp_grad)
    logging.vlog(1, "cu_inp_grad: %s" % cu_inp_grad)
    logging.vlog(1, "hgrad: %s" % hgrad)
    logging.vlog(1, "cu_hgrad: %s" % cu_hgrad)
    logging.vlog(1, "wgrad: %s" % str(wgrad))
    logging.vlog(1, "bgrad: %s" % str(bgrad))
    logging.vlog(1, "cu_wgrad: %s" % str(cu_wgrad))
    logging.vlog(1, "cu_bgrad: %s" % str(cu_bgrad))
    return (outputs, cu_outputs, h, cu_h, inp_grad, cu_inp_grad, hgrad,
            cu_hgrad, wgrad, bgrad, cu_wgrad, cu_bgrad)
  else:
    outputs, h = sess.run([outputs_op, h_op])
    cu_outputs, cu_h = sess.run([cu_outputs_op, cu_h_op],
                                feed_dict=({
                                    inputs: inputs_np
                                } if dynamic_shape_input else None))
    # Remove the trivial 1st dimension.
    cu_h = np.squeeze(cu_h, axis=0 if time_major else 1)

    logging.vlog(1, "outputs: %s" % outputs)
    logging.vlog(1, "cu_outputs: %s" % cu_outputs)
    logging.vlog(1, "h: %s" % h)
    logging.vlog(1, "cu_h: %s" % h)
  return outputs, cu_outputs, h, cu_h
Beispiel #43
0
def RunLSTM(sess,
            num_units,
            input_size,
            batch_size,
            time,
            num_layers=1,
            variable_seq_lengths=False,
            time_major=True,
            dynamic_shape_input=False,
            is_training=True,
            dropout=0.,
            num_dirs=True,
            dtype=dtypes.float32):
  # TODO(jamesqin): add multi-layer tests.
  # TODO(jamesqin): add multi-dir tests
  assert num_layers == 1
  assert num_dirs == 1
  if is_training and not np.isclose(dropout, 0):
    raise ValueError("dropout can not be 0. when test training.")

  # set graph level random seed and numpy random seed.
  random_seed.set_random_seed(0)
  np.random.seed(0)

  shape = ([time, batch_size, input_size]
           if time_major else [batch_size, time, input_size])
  inputs_np = np.random.rand(*shape).astype(dtype.as_numpy_dtype)
  inputs_static = variable_scope.get_variable(
      "inputs", initializer=inputs_np, dtype=dtype)
  inputs_dynamic = array_ops.placeholder(
      dtype, shape=[None, None, None], name="inputs")
  inputs = inputs_dynamic if dynamic_shape_input else inputs_static
  initial_h_op = variable_scope.get_variable(
      "initial_h_op",
      initializer=np.random.rand(batch_size,
                                 num_units).astype(dtype.as_numpy_dtype),
      dtype=dtype)
  initial_c_op = variable_scope.get_variable(
      "initial_c_op",
      initializer=np.random.rand(batch_size,
                                 num_units).astype(dtype.as_numpy_dtype),
      dtype=dtype)

  if variable_seq_lengths:
    lengths_v = np.random.randint(low=1, high=time + 1, size=batch_size)
    lengths_v[0] = time  # make sure the max sequence has 'time' elems
    lengths = ops.convert_to_tensor(lengths_v.astype(np.int32))
  else:
    lengths = None

  initializer = init_ops.random_uniform_initializer(
      -0.01, 0.01, dtype=dtype, seed=19980904)

  with variable_scope.variable_scope("test", initializer=initializer):
    w = variable_scope.get_variable(
        "rnn/lstm_cell/kernel",
        shape=[input_size + num_units, num_units * 4],
        dtype=dtype)
    b = variable_scope.get_variable(
        "rnn/lstm_cell/bias", shape=[num_units * 4], dtype=dtype)

    # canonical lstm. must set forget_bias to 0. to align with cudnn lstm.
    cell = rnn_cell_impl.LSTMCell(num_units, forget_bias=0., reuse=True)
    outputs_op, state_tuple_op = rnn.dynamic_rnn(
        cell,
        inputs_static,
        sequence_length=lengths,
        initial_state=rnn_cell_impl.LSTMStateTuple(
            h=initial_h_op, c=initial_c_op),
        dtype=dtype,
        time_major=time_major,
        scope=None)

  # Convert to cudnn opaque param.
  format_converter = cudnn_rnn_ops.CudnnParamsFormatConverterLSTM(
      num_layers, num_units, input_size)
  opaque_params = format_converter.tf_canonical_to_opaque([w, b])

  cu_initial_h_op = array_ops.expand_dims(
      initial_h_op, axis=(0 if time_major else 1))
  cu_initial_c_op = array_ops.expand_dims(
      initial_c_op, axis=(0 if time_major else 1))
  cu_outputs_op, cu_h_op, cu_c_op = cudnn_rnn_ops._cudnn_rnn(
      inputs,
      cu_initial_h_op,
      cu_initial_c_op,
      opaque_params,
      sequence_lengths=lengths,
      time_major=time_major,
      dropout=dropout,
      is_training=is_training,
      rnn_mode=cudnn_rnn_ops.CUDNN_LSTM)
  # Remove the trivial 1st dimension.
  cu_state_tuple_op = rnn_cell_impl.LSTMStateTuple(
      c=array_ops.squeeze(cu_c_op, axis=0 if time_major else 1),
      h=array_ops.squeeze(cu_h_op, axis=0 if time_major else 1))

  if is_training:
    (inp_grad_op, hgrad_op,
     cgrad_op, wgrad_op, bgrad_op) = gradients_impl.gradients(
         outputs_op, [inputs_static, initial_h_op, initial_c_op, w, b])

    (cu_inp_grad_op, cu_hgrad_op,
     cu_cgrad_op, opaque_grad_op) = gradients_impl.gradients(
         cu_outputs_op,
         [inputs, cu_initial_h_op, cu_initial_c_op, opaque_params])
    # Remove the trivial 1st dimension
    cu_hgrad_op = array_ops.squeeze(cu_hgrad_op, axis=0 if time_major else 1)
    # Remove the trivial 1st dimension
    cu_cgrad_op = array_ops.squeeze(cu_cgrad_op, axis=0 if time_major else 1)

    cu_wgrad_op, cu_bgrad_op = format_converter.opaque_to_tf_canonical(
        opaque_grad_op)
    cu_wgrad_op = cu_wgrad_op[0]
    cu_bgrad_op = cu_bgrad_op[0]
    # cudnn lstm has 2 biases each gate. When converting to tf canonical format,
    # the two biases are summed into one. Thus here bias gradient should be
    # halved when comparing with tf lstm.
    cu_bgrad_op *= 0.5

  init_op = variables.global_variables_initializer()
  sess.run(init_op)

  if is_training:
    outputs, state_tuple, inp_grad, state_grad, wgrad, bgrad = sess.run([
        outputs_op, state_tuple_op, inp_grad_op,
        (hgrad_op, cgrad_op), wgrad_op, bgrad_op
    ])
    (cu_outputs, cu_state_tuple, cu_inp_grad, cu_state_grad, cu_wgrad,
     cu_bgrad) = sess.run(
         [
             cu_outputs_op, cu_state_tuple_op, cu_inp_grad_op,
             (cu_hgrad_op, cu_cgrad_op), cu_wgrad_op, cu_bgrad_op
         ],
         feed_dict={inputs: inputs_np} if dynamic_shape_input else None)

    logging.vlog(1, "outputs: %s" % outputs)
    logging.vlog(1, "cu_outputs: %s" % cu_outputs)
    logging.vlog(1, "state_tuple: %s" % str(state_tuple))
    logging.vlog(1, "cu_state_tuple: %s" % str(cu_state_tuple))
    logging.vlog(1, "inp_grad: %s" % inp_grad)
    logging.vlog(1, "cu_inp_grad: %s" % cu_inp_grad)
    logging.vlog(1, "state_grad: %s" % str(state_grad))
    logging.vlog(1, "cu_state_grad: %s" % str(cu_state_grad))
    logging.vlog(1, "wgrad: %s" % str(wgrad))
    logging.vlog(1, "bgrad: %s" % str(bgrad))
    logging.vlog(1, "cu_wgrad: %s" % str(cu_wgrad))
    logging.vlog(1, "cu_bgrad: %s" % str(cu_bgrad))
    return (outputs, cu_outputs, state_tuple, cu_state_tuple, inp_grad,
            cu_inp_grad, state_grad, cu_state_grad, wgrad, bgrad, cu_wgrad,
            cu_bgrad)
  else:
    outputs, state_tuple = sess.run([outputs_op, state_tuple_op])
    cu_outputs, cu_state_tuple = sess.run([cu_outputs_op, cu_state_tuple_op],
                                          feed_dict=({
                                              inputs: inputs_np
                                          } if dynamic_shape_input else None))

    logging.vlog(1, "outputs: %s" % outputs)
    logging.vlog(1, "cu_outputs: %s" % cu_outputs)
    logging.vlog(1, "state_tuple: %s" % str(state_tuple))
    logging.vlog(1, "cu_state_tuple: %s" % str(cu_state_tuple))
  return outputs, cu_outputs, state_tuple, cu_state_tuple
Beispiel #44
0
 def loop_fn(i):
     x1 = array_ops.gather(x, i)
     return (array_ops.squeeze(x1, axis=0),
             array_ops.squeeze(x1, axis=-1), array_ops.squeeze(x1))
def _parse_single_example_raw(serialized,
                              names=None,
                              sparse_keys=None,
                              sparse_types=None,
                              dense_keys=None,
                              dense_types=None,
                              dense_defaults=None,
                              dense_shapes=None,
                              name=None):
    """Parses a single `Example` proto.

  Args:
    serialized: A scalar string Tensor, a single serialized Example.
      See `_parse_example_raw` documentation for more details.
    names: (Optional) A scalar string Tensor, the associated name.
      See `_parse_example_raw` documentation for more details.
    sparse_keys: See `_parse_example_raw` documentation for more details.
    sparse_types: See `_parse_example_raw` documentation for more details.
    dense_keys: See `_parse_example_raw` documentation for more details.
    dense_types: See `_parse_example_raw` documentation for more details.
    dense_defaults: See `_parse_example_raw` documentation for more details.
    dense_shapes: See `_parse_example_raw` documentation for more details.
    name: A name for this operation (optional).

  Returns:
    A `dict` mapping feature keys to `Tensor` and `SparseTensor` values.

  Raises:
    ValueError: if any feature is invalid.
  """
    with ops.name_scope(name, "ParseSingleExample", [serialized, names]):
        serialized = ops.convert_to_tensor(serialized)
        serialized_shape = serialized.get_shape()
        if serialized_shape.ndims is not None:
            if serialized_shape.ndims != 0:
                raise ValueError("Input serialized must be a scalar")
        else:
            serialized = control_flow_ops.with_dependencies(
                [
                    control_flow_ops.Assert(math_ops.equal(
                        array_ops.rank(serialized),
                        0), ["Input serialized must be a scalar"],
                                            name="SerializedIsScalar")
                ],
                serialized,
                name="SerializedDependencies")
        serialized = array_ops.expand_dims(serialized, 0)
        if names is not None:
            names = ops.convert_to_tensor(names)
            names_shape = names.get_shape()
            if names_shape.ndims is not None:
                if names_shape.ndims != 0:
                    raise ValueError("Input names must be a scalar")
            else:
                names = control_flow_ops.with_dependencies(
                    [
                        control_flow_ops.Assert(math_ops.equal(
                            array_ops.rank(names),
                            0), ["Input names must be a scalar"],
                                                name="NamesIsScalar")
                    ],
                    names,
                    name="NamesDependencies")
            names = array_ops.expand_dims(names, 0)

        outputs = _parse_example_raw(serialized,
                                     names=names,
                                     sparse_keys=sparse_keys,
                                     sparse_types=sparse_types,
                                     dense_keys=dense_keys,
                                     dense_types=dense_types,
                                     dense_defaults=dense_defaults,
                                     dense_shapes=dense_shapes,
                                     name=name)
        if dense_keys is not None:
            for d in dense_keys:
                d_name = re.sub("[^A-Za-z0-9_.\\-/]", "_", d)
                outputs[d] = array_ops.squeeze(outputs[d], [0],
                                               name="Squeeze_%s" % d_name)
        if sparse_keys is not None:
            for s in sparse_keys:
                s_name = re.sub("[^A-Za-z0-9_.\\-/]", "_", s)
                outputs[s] = sparse_tensor.SparseTensor(
                    array_ops.slice(outputs[s].indices, [0, 1], [-1, -1],
                                    name="Slice_Indices_%s" % s_name),
                    outputs[s].values,
                    array_ops.slice(outputs[s].shape, [1], [-1],
                                    name="Squeeze_Shape_%s" % s_name))
        return outputs
def _ConcatGradHelper(op, grad, start_value_index, end_value_index, dim_index):
  """Gradient for concat op.

  Args:
    op: An operation.
    grad: `Tensor` or `IndexedSlices` representing the gradients with respect
      to each output of the op.
    start_value_index: An integer index of the first value in the op.inputs.
    end_value_index: An integer index of the last value in the op.inputs.
    dim_index: An interger index of concat_dim or axis parameter in op.inputs.

  Returns:
    Tensors represending the partial gradients with respect to each input
    of the op.

  Raises:
    ValueError: if concat_dim/axis is not statically known.
  """

  def _CreateDenseMaskAndBegin(sizes, concat_dim):
    """Create variables for iteratively slicing a dense gradients tensor."""
    # Since shape is 1-D, shape_of_shape = [rank-of-inputs]
    shape_of_shape = array_ops.shape(sizes[dim_index])
    # Make a vector of length equal to the input's dimensions,
    # with 0's everywhere and 1 in the concat dim position.
    # Note: Can't use sparse_to_dense since it isn't GPU-capable (for now)
    mask = array_ops.concat_v2(
        [array_ops.fill(
            array_ops.expand_dims(concat_dim, 0), 0),
         [1],
         array_ops.fill(
             shape_of_shape - concat_dim - 1, 0)],
        0)
    begin = array_ops.fill(shape_of_shape, 0)
    return mask, begin

  def _ExtractInputShapes(inputs):
    """Extract the shapes of a set of input tensors."""
    sizes = []
    fully_known = True
    for x in inputs:
      input_shape = array_ops.shape(x)
      if not isinstance(input_shape,
                        ops.Tensor) or input_shape.op.type != "Const":
        fully_known = False
        break
      else:
        sizes.append(input_shape)

    if fully_known:
      return sizes
    else:
      return array_ops.shape_n(inputs)

  # Degenerate concatenation, just return grad.
  if len(op.inputs) == 2:
    return grad + [None] if end_value_index <= dim_index else [None] + grad

  concat_dim = op.inputs[dim_index]
  input_values = op.inputs[start_value_index:end_value_index]
  out_grads = []
  if isinstance(grad, ops.Tensor):
    # Get the inputs' tensor shapes
    sizes = _ExtractInputShapes(input_values)
    # The magic number of 16 was found through benchmarking a range of sizes
    # on CPUs and a Maxwell TitanX.  A speedup was seen in a large majority of
    # cases when switching implementations at N=16, but it is possible that
    # there will be a small number of performance regressions.
    # pylint: disable=protected-access
    if len(sizes) > 16:
      # extract the size of each input along the concat dimension
      sizes = array_ops.squeeze(
          array_ops.slice(
              array_ops.stack(
                  sizes, axis=1), [concat_dim, 0], [1, -1]))
      out_grads = array_ops.split(grad, sizes, concat_dim)
    else:
      offset = gen_array_ops._concat_offset(concat_dim, sizes)
      for (begin, size) in zip(offset, sizes):
        out_grads.append(array_ops.slice(grad, begin, size))
    # pylint: enable=protected-access
  elif isinstance(grad, ops.IndexedSlices):
    concat_dim_static = tensor_util.constant_value(concat_dim)
    if concat_dim_static is None:
      raise ValueError("Can only compute IndexedSlices gradient with "
                       "statically-known concat_dim")
    # Get the inputs' tensor shapes
    sizes = [array_ops.shape(x) for x in input_values]
    if concat_dim_static > 0:
      # IndexedSlices, concat_dim > 0. Each input gets IndexedSlices gradients
      # with all the indices, but with grad.values sliced accordingly. This
      # is like the Tensor case, except shape(grad.values)[0] is not equal to
      # shape(sizes[i])[0], since only a subset of the dim-0 values are stored.
      mask, begin = _CreateDenseMaskAndBegin(sizes, concat_dim)
      for size in sizes:
        new_values = array_ops.slice(
            grad.values,
            begin,
            array_ops.concat_v2(
                [[-1], array_ops.slice(size, [1], [-1])], 0))
        out_grads.append(
            ops.IndexedSlices(new_values, grad.indices, size))
        # Lint complains begin = begin + ...
        begin = math_ops.add(begin, size * mask)
    else:
      # IndexedSlices, concat_dim == 0. Each input gets IndexedSlices gradients
      # only for the relevant indices.
      start = constant_op.constant(0, dtype=grad.indices.dtype)
      for size in sizes:
        size_concat_dim = array_ops.gather(size, concat_dim)
        if size_concat_dim.dtype != grad.indices.dtype:
          size_concat_dim = math_ops.cast(size_concat_dim,
                                          dtype=grad.indices.dtype)
        end = start + size_concat_dim
        # Compute the 1-D Tensor of indices relevant for this input.
        indices_to_select = array_ops.squeeze(
            array_ops.where(math_ops.logical_and(grad.indices >= start,
                                                 grad.indices < end)),
            squeeze_dims=[1])
        new_indices = array_ops.gather(grad.indices, indices_to_select) - start
        new_values = array_ops.gather(grad.values, indices_to_select)
        out_grads.append(
            ops.IndexedSlices(new_values, new_indices, size))
        start = end
  else:
    raise TypeError("Expected Tensor or IndexedSlices, got %s" % type(grad))

  return (out_grads + [None] if end_value_index <= dim_index
          else [None] + out_grads)
Beispiel #47
0
def remove_squeezable_dimensions(labels,
                                 predictions,
                                 expected_rank_diff=0,
                                 name=None):
    """Squeeze last dim if ranks differ from expected by exactly 1.

  In the common case where we expect shapes to match, `expected_rank_diff`
  defaults to 0, and we squeeze the last dimension of the larger rank if they
  differ by 1.

  But, for example, if `labels` contains class IDs and `predictions` contains 1
  probability per class, we expect `predictions` to have 1 more dimension than
  `labels`, so `expected_rank_diff` would be 1. In this case, we'd squeeze
  `labels` if `rank(predictions) - rank(labels) == 0`, and
  `predictions` if `rank(predictions) - rank(labels) == 2`.

  This will use static shape if available. Otherwise, it will add graph
  operations, which could result in a performance hit.

  Args:
    labels: Label values, a `Tensor` whose dimensions match `predictions`.
    predictions: Predicted values, a `Tensor` of arbitrary dimensions.
    expected_rank_diff: Expected result of `rank(predictions) - rank(labels)`.
    name: Name of the op.

  Returns:
    Tuple of `labels` and `predictions`, possibly with last dim squeezed.
  """
    with ops.name_scope(name, 'remove_squeezable_dimensions',
                        [labels, predictions]):
        predictions = ops.convert_to_tensor_v2_with_dispatch(predictions)
        labels = ops.convert_to_tensor_v2_with_dispatch(labels)
        predictions_shape = predictions.get_shape()
        predictions_rank = predictions_shape.ndims
        labels_shape = labels.get_shape()
        labels_rank = labels_shape.ndims
        if (labels_rank is not None) and (predictions_rank is not None):
            # Use static rank.
            rank_diff = predictions_rank - labels_rank
            if (rank_diff == expected_rank_diff + 1
                    and predictions_shape.dims[-1].is_compatible_with(1)):
                predictions = array_ops.squeeze(predictions, [-1])
            elif (rank_diff == expected_rank_diff - 1
                  and labels_shape.dims[-1].is_compatible_with(1)):
                labels = array_ops.squeeze(labels, [-1])
            return labels, predictions

        # Use dynamic rank.
        rank_diff = array_ops.rank(predictions) - array_ops.rank(labels)
        if (predictions_rank is
                None) or (predictions_shape.dims[-1].is_compatible_with(1)):
            predictions = control_flow_ops.cond(
                math_ops.equal(expected_rank_diff + 1, rank_diff),
                lambda: array_ops.squeeze(predictions, [-1]),
                lambda: predictions)
        if (labels_rank is
                None) or (labels_shape.dims[-1].is_compatible_with(1)):
            labels = control_flow_ops.cond(
                math_ops.equal(expected_rank_diff - 1, rank_diff),
                lambda: array_ops.squeeze(labels, [-1]), lambda: labels)
        return labels, predictions
Beispiel #48
0
    def GraphFn(self, x):
        input_matrix_rows = 4
        input_matrix_columns = 144

        b = self._ConstOp((input_matrix_columns, 4))
        x1 = math_ops.matmul(x, b)
        b = self._ConstOp((1, 4))
        x1 = x1 + b

        b = self._ConstOp((input_matrix_rows, 144))
        x2 = self.trt_incompatible_op(x)
        x2 = math_ops.matmul(x2, b, transpose_a=True)
        x2 = gen_array_ops.reshape(x2, [4, -1])
        x2 = self.trt_incompatible_op(x2)

        b = self._ConstOp((4, input_matrix_columns))
        x3 = math_ops.matmul(x, b, transpose_b=True)

        b = self._ConstOp((16, input_matrix_rows))
        x4 = self.trt_incompatible_op(x)
        x4 = math_ops.matmul(x4, b, transpose_b=True, transpose_a=True)
        x4 = gen_array_ops.reshape(x4, [4, -1])
        x4 = self.trt_incompatible_op(x4)

        # Note that tf.nn.bias_add supports up to 5 dimensions.
        b = self._ConstOp((input_matrix_columns, 48))
        x5 = math_ops.matmul(x, b)
        b = self._ConstOp((48, ))
        x5 = nn.bias_add(x5, b)
        # TODO(b/154672994): Put the reshape back when the bug is fixed.
        # x5 = gen_array_ops.reshape(x5, [4, -1])

        x6 = gen_array_ops.reshape(x, [4, 24, 6])
        b = self._ConstOp((6, ))
        x6 = nn.bias_add(x6, b, data_format="NHWC")
        x6 = gen_array_ops.reshape(x6, [4, -1])

        x7 = gen_array_ops.reshape(x, [4, 12, 4, 3])
        b = self._ConstOp((3, ))
        x7 = nn.bias_add(x7, b, data_format="NHWC")
        x7 = gen_array_ops.reshape(x7, [4, -1])

        x8 = gen_array_ops.reshape(x, [4, 4, 3, 2, 6])
        b = self._ConstOp((6, ))
        x8 = nn.bias_add(x8, b, data_format="NHWC")
        x8 = gen_array_ops.reshape(x8, [4, -1])

        x9 = gen_array_ops.reshape(x, [4, 12, 3, 2, 2])
        b = self._ConstOp((12, ))
        x9 = nn.bias_add(x9, b, data_format="NCHW")
        x9 = gen_array_ops.reshape(x9, [4, -1])

        x10 = gen_array_ops.reshape(x, [4, 3, 4, 12])
        b = self._ConstOp((3, ))
        x10 = nn.bias_add(x10, b, data_format="NCHW")
        x10 = gen_array_ops.reshape(x10, [4, -1])

        x11 = gen_array_ops.reshape(x, [4, 6, 24])
        b = self._ConstOp((6, ))
        x11 = nn.bias_add(x11, b, data_format="NCHW")
        x11 = gen_array_ops.reshape(x11, [4, -1])

        out = array_ops.concat([x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11],
                               axis=-1)
        return array_ops.squeeze(out, name="output_0")
Beispiel #49
0
  def _maybe_update_block_mask(self, weights, threshold):
    """Performs block-granular masking of the weights.

    Block pruning occurs only if the block_height or block_width is > 1 and
    if the weight tensor, when squeezed, has ndims = 2. Otherwise, elementwise
    pruning occurs.
    Args:
      weights: The weight tensor that needs to be masked.
      threshold: The current threshold value. The function will compute a new
        threshold and return the exponential moving average using the current
        value of threshold

    Returns:
      new_threshold: The new value of the threshold based on weights, and
        sparsity at the current global_step
      new_mask: A numpy array of the same size and shape as weights containing
        0 or 1 to indicate which of the values in weights falls below
        the threshold

    Raises:
      ValueError: if block pooling function is not AVG or MAX
    """
    squeezed_weights = array_ops.squeeze(weights)
    if squeezed_weights.get_shape().ndims != 2 or self._block_dim == [1, 1]:
      return self._update_mask(weights, threshold)

    if self._block_pooling_function not in ['AVG', 'MAX']:
      raise ValueError('Unknown pooling function for block sparsity: %s' %
                       self._block_pooling_function)

    with ops.name_scope(weights.op.name + '_pruning_ops'):
      abs_weights = math_ops.abs(squeezed_weights)

      pool_window = [self._block_dim[0], self._block_dim[1]]
      pool_fn = pruning_utils.factorized_pool

      if not self._spec.use_tpu:
        pool_fn = nn_ops.pool
        abs_weights = array_ops.reshape(
            abs_weights,
            [1, abs_weights.get_shape()[0],
             abs_weights.get_shape()[1], 1])

      pooled_weights = pool_fn(
          abs_weights,
          window_shape=pool_window,
          pooling_type=self._block_pooling_function,
          strides=pool_window,
          padding='SAME',
          name=weights.op.name + '_pooled')

      if pooled_weights.get_shape().ndims != 2:
        pooled_weights = array_ops.squeeze(pooled_weights)

      smoothed_threshold, new_mask = self._update_mask(pooled_weights,
                                                       threshold)
      updated_mask = pruning_utils.kronecker_product(
          new_mask, array_ops.ones(self._block_dim))
      sliced_mask = array_ops.slice(
          updated_mask, [0, 0],
          [squeezed_weights.get_shape()[0],
           squeezed_weights.get_shape()[1]])

    return smoothed_threshold, array_ops.reshape(sliced_mask,
                                                 array_ops.shape(weights))
Beispiel #50
0
 def logits_to_predictions(self, logits, proba=False):
     if self.num_label_columns == 1:
         return array_ops.squeeze(logits, squeeze_dims=[1])
     return logits
Beispiel #51
0
def sparse_top_k_categorical_accuracy(y_true, y_pred, k=5):
  # If the shape of y_true is (num_samples, 1), squeeze to (num_samples,)
  if (len(K.int_shape(y_true)) == len(K.int_shape(y_pred))):
    y_true = array_ops.squeeze(y_true, [-1])

  return K.mean(nn.in_top_k(y_pred, math_ops.cast(y_true, 'int32'), k), axis=-1)
Beispiel #52
0
def _ConcatGradHelper(op, grad, start_value_index, end_value_index, dim_index):
    """Gradient for concat op.

  Args:
    op: An operation.
    grad: `Tensor` or `IndexedSlices` representing the gradients with respect to
      each output of the op.
    start_value_index: An integer index of the first value in the op.inputs.
    end_value_index: An integer index of the last value in the op.inputs.
    dim_index: An integer index of concat_dim or axis parameter in op.inputs.

  Returns:
    Tensors representing the partial gradients with respect to each input
    of the op.

  Raises:
    ValueError: if concat_dim/axis is not statically known.
  """
    def _CreateDenseMaskAndBegin(sizes, concat_dim):
        """Create variables for iteratively slicing a dense gradients tensor."""
        # Since shape is 1-D, shape_of_shape = [rank-of-inputs]
        shape_of_shape = array_ops.shape(sizes[0])
        # Make a vector of length equal to the input's dimensions,
        # with 0's everywhere and 1 in the concat dim position.
        # Note: Can't use sparse_to_dense since it isn't GPU-capable (for now)
        mask = array_ops.concat([
            array_ops.zeros(array_ops.expand_dims(concat_dim, 0),
                            dtype=dtypes.int32), [1],
            array_ops.zeros(shape_of_shape - concat_dim - 1,
                            dtype=dtypes.int32)
        ], 0)
        begin = array_ops.zeros(shape_of_shape, dtype=dtypes.int32)
        return mask, begin

    def _ExtractInputShapes(inputs):
        """Extract the shapes of a set of input tensors."""
        if context.executing_eagerly():
            return array_ops.shape_n(inputs)
        sizes = []
        fully_known = True
        for x in inputs:
            input_shape = array_ops.shape(x)
            if not isinstance(input_shape,
                              ops.Tensor) or input_shape.op.type != "Const":
                fully_known = False
                break
            sizes.append(input_shape)

        if fully_known:
            return sizes
        else:
            return array_ops.shape_n(inputs)

    # Degenerate concatenation, just return grad.
    if len(op.inputs) == 2:
        return grad + [None] if end_value_index <= dim_index else [None] + grad

    concat_dim = op.inputs[dim_index]
    input_values = op.inputs[start_value_index:end_value_index]

    out_grads = []
    if isinstance(grad, ops.Tensor):
        if context.executing_eagerly() or isinstance(concat_dim,
                                                     ops.EagerTensor):
            # Using mod here for convenience since concat_dim is already verified
            # in concat implementation to be within the allowed [-rank, rank) range.
            non_neg_concat_dim = (concat_dim._numpy().item(0) %
                                  input_values[0]._rank())  # pylint: disable=protected-access
            # All inputs are guaranteed to be EagerTensors in eager mode
            sizes = pywrap_tfe.TFE_Py_TensorShapeSlice(input_values,
                                                       non_neg_concat_dim)
            out_grads = array_ops.split(grad, sizes, non_neg_concat_dim)
        else:
            if constant_op.is_constant(concat_dim):
                # If concat_dim is a constant defined in a different context,
                # then we duplicate it in the current context to avoid passing it
                # through an Enter node.
                # This is a small optimization in general, but it is required when
                # compiling with XLA, as XLA needs the concat input to be folded into a
                # constant.
                grad_context = control_flow_util.GetOutputContext(grad.op)
                dim_context = control_flow_util.GetOutputContext(concat_dim.op)
                if dim_context != grad_context:
                    value = tensor_util.constant_value(concat_dim)
                    concat_dim = constant_op.constant(value=value,
                                                      dtype=concat_dim.dtype)

            # Using mod here for convenience since concat_dim is already verified
            # in concat implementation to be within the allowed [-rank, rank) range.
            non_neg_concat_dim = concat_dim % array_ops.rank(input_values[0])

            # Get the inputs' tensor shapes
            sizes = _ExtractInputShapes(input_values)
            # The magic number of 16 was found through benchmarking a range of sizes
            # on CPUs and a Maxwell TitanX.  A speedup was seen in a large majority of
            # cases when switching implementations at N=16, but it is possible that
            # there will be a small number of performance regressions.
            if len(sizes) > 16:
                # extract the size of each input along the concat dimension
                sizes = array_ops.squeeze(
                    array_ops.slice(array_ops.stack(sizes, axis=1),
                                    [non_neg_concat_dim, 0], [1, -1]))
                out_grads = array_ops.split(grad, sizes, non_neg_concat_dim)
            else:
                offset = gen_array_ops.concat_offset(non_neg_concat_dim, sizes)
                for (begin, size) in zip(offset, sizes):
                    out_grads.append(array_ops.slice(grad, begin, size))
    elif isinstance(grad, ops.IndexedSlices):
        # Using mod here for convenience since concat_dim is already verified
        # in concat implementation to be within the allowed [-rank, rank) range.
        non_neg_concat_dim = concat_dim % array_ops.rank(input_values[0])
        concat_dim_static = tensor_util.constant_value(concat_dim)
        if concat_dim_static is None:
            raise ValueError("Can only compute IndexedSlices gradient with "
                             "statically-known concat_dim")
        if concat_dim_static < 0:
            rank = tensor_util.constant_value(array_ops.rank(input_values[0]))
            if rank is None:
                raise ValueError(
                    "Can only compute IndexedSlices gradient with "
                    "negative concat_dim when first value rank is "
                    "statically-known.")
            concat_dim_static %= rank
        # Get the inputs' tensor shapes
        sizes = [array_ops.shape(x) for x in input_values]
        if concat_dim_static > 0:
            # IndexedSlices, non_neg_concat_dim > 0. Each input gets IndexedSlices
            # gradients with all the indices, but with grad.values sliced accordingly.
            # This is like the Tensor case, except shape(grad.values)[0] is not equal
            # to shape(sizes[i])[0], since only a subset of the dim-0 values are
            # stored.
            mask, begin = _CreateDenseMaskAndBegin(sizes, non_neg_concat_dim)
            for size in sizes:
                new_values = array_ops.slice(
                    grad.values, begin,
                    array_ops.concat(
                        [[-1], array_ops.slice(size, [1], [-1])], 0))
                out_grads.append(
                    ops.IndexedSlices(new_values, grad.indices, size))
                # Lint complains begin = begin + ...
                begin = math_ops.add(begin, size * mask)
        else:
            # IndexedSlices, concat_dim == 0. Each input gets IndexedSlices gradients
            # only for the relevant indices.
            start = constant_op.constant(0, dtype=grad.indices.dtype)
            for size in sizes:
                size_concat_dim = array_ops.gather(size, non_neg_concat_dim)
                if size_concat_dim.dtype != grad.indices.dtype:
                    size_concat_dim = math_ops.cast(size_concat_dim,
                                                    dtype=grad.indices.dtype)
                end = start + size_concat_dim
                # Compute the 1-D Tensor of indices relevant for this input.
                indices_to_select = array_ops.squeeze(array_ops.where(
                    math_ops.logical_and(grad.indices >= start,
                                         grad.indices < end)),
                                                      axis=[1])
                new_indices = array_ops.gather(grad.indices,
                                               indices_to_select) - start
                new_values = array_ops.gather(grad.values, indices_to_select)
                out_grads.append(
                    ops.IndexedSlices(new_values, new_indices, size))
                start = end
    else:
        raise TypeError("Expected Tensor or IndexedSlices, got %s" %
                        type(grad))

    return (out_grads + [None] if end_value_index <= dim_index else [None] +
            out_grads)
Beispiel #53
0
 def select_dataset(seed):
     return array_ops.squeeze(stateless.stateless_multinomial([logits],
                                                              1,
                                                              seed=seed),
                              axis=[0, 1])
Beispiel #54
0
    def solvevec(self, rhs, adjoint=False, name="solve"):
        """Solve single equation with best effort: `A X = rhs`.

    The returned `Tensor` will be close to an exact solution if `A` is well
    conditioned. Otherwise closeness will vary. See class docstring for details.

    Examples:

    ```python
    # Make an operator acting like batch matrix A.  Assume A.shape = [..., M, N]
    operator = LinearOperator(...)
    operator.shape = [..., M, N]

    # Solve one linear system for every member of the batch.
    RHS = ... # shape [..., M]

    X = operator.solvevec(RHS)
    # X is the solution to the linear system
    # sum_j A[..., :, j] X[..., j] = RHS[..., :]

    operator.matvec(X)
    ==> RHS
    ```

    Args:
      rhs: `Tensor` with same `dtype` as this operator, or list of `Tensor`s
        (for blockwise operators). `Tensor`s are treated as [batch] vectors,
        meaning for every set of leading dimensions, the last dimension defines
        a vector.  See class docstring for definition of compatibility regarding
        batch dimensions.
      adjoint: Python `bool`.  If `True`, solve the system involving the adjoint
        of this `LinearOperator`:  `A^H X = rhs`.
      name:  A name scope to use for ops added by this method.

    Returns:
      `Tensor` with shape `[...,N]` and same `dtype` as `rhs`.

    Raises:
      NotImplementedError:  If `self.is_non_singular` or `is_square` is False.
    """
        with self._name_scope(name):
            block_dimensions = (self._block_domain_dimensions()
                                if adjoint else self._block_range_dimensions())
            if linear_operator_util.arg_is_blockwise(block_dimensions, rhs,
                                                     -1):
                for i, block in enumerate(rhs):
                    if not isinstance(block, linear_operator.LinearOperator):
                        block = ops.convert_to_tensor_v2_with_dispatch(block)
                        self._check_input_dtype(block)
                        block_dimensions[i].assert_is_compatible_with(
                            block.shape[-1])
                        rhs[i] = block
                rhs_mat = [
                    array_ops.expand_dims(block, axis=-1) for block in rhs
                ]
                solution_mat = self.solve(rhs_mat, adjoint=adjoint)
                return [array_ops.squeeze(x, axis=-1) for x in solution_mat]

            rhs = ops.convert_to_tensor_v2_with_dispatch(rhs, name="rhs")
            self._check_input_dtype(rhs)
            op_dimension = (self.domain_dimension
                            if adjoint else self.range_dimension)
            op_dimension.assert_is_compatible_with(rhs.shape[-1])
            rhs_mat = array_ops.expand_dims(rhs, axis=-1)
            solution_mat = self.solve(rhs_mat, adjoint=adjoint)
            return array_ops.squeeze(solution_mat, axis=-1)
Beispiel #55
0
def _resize_image(image, height, width):
    image = array_ops.expand_dims(image, 0)
    image = image_ops.resize_bilinear(image, [height, width])
    return array_ops.squeeze(image, [0])
    def training_graph(self,
                       input_data,
                       input_labels,
                       random_seed,
                       data_spec,
                       input_weights=None):
        """Constructs a TF graph for training a random tree.

    Args:
      input_data: A tensor or SparseTensor or placeholder for input data.
      input_labels: A tensor or placeholder for labels associated with
        input_data.
      random_seed: The random number generator seed to use for this tree.  0
        means use the current time as the seed.
      data_spec: A list of tf.dtype values specifying the original types of
        each column.
      input_weights: A float tensor or placeholder holding per-input weights,
        or None if all inputs are to be weighted equally.

    Returns:
      The last op in the random tree training graph.
    """
        epoch = math_ops.to_int32(get_epoch_variable())

        if input_weights is None:
            input_weights = []

        sparse_indices = []
        sparse_values = []
        sparse_shape = []
        if isinstance(input_data, sparse_tensor.SparseTensor):
            sparse_indices = input_data.indices
            sparse_values = input_data.values
            sparse_shape = input_data.shape
            input_data = []

        # Count extremely random stats.
        (node_sums, node_squares, splits_indices, splits_sums, splits_squares,
         totals_indices, totals_sums, totals_squares,
         input_leaves) = (self.training_ops.count_extremely_random_stats(
             input_data,
             sparse_indices,
             sparse_values,
             sparse_shape,
             data_spec,
             input_labels,
             input_weights,
             self.variables.tree,
             self.variables.tree_thresholds,
             self.variables.node_to_accumulator_map,
             self.variables.candidate_split_features,
             self.variables.candidate_split_thresholds,
             self.variables.start_epoch,
             epoch,
             num_classes=self.params.num_output_columns,
             regression=self.params.regression))
        node_update_ops = []
        node_update_ops.append(
            state_ops.assign_add(self.variables.node_sums, node_sums))

        splits_update_ops = []
        splits_update_ops.append(
            self.training_ops.scatter_add_ndim(
                self.variables.candidate_split_sums, splits_indices,
                splits_sums))
        splits_update_ops.append(
            self.training_ops.scatter_add_ndim(self.variables.accumulator_sums,
                                               totals_indices, totals_sums))

        if self.params.regression:
            node_update_ops.append(
                state_ops.assign_add(self.variables.node_squares,
                                     node_squares))
            splits_update_ops.append(
                self.training_ops.scatter_add_ndim(
                    self.variables.candidate_split_squares, splits_indices,
                    splits_squares))
            splits_update_ops.append(
                self.training_ops.scatter_add_ndim(
                    self.variables.accumulator_squares, totals_indices,
                    totals_squares))

        # Sample inputs.
        update_indices, feature_updates, threshold_updates = (
            self.training_ops.sample_inputs(
                input_data,
                sparse_indices,
                sparse_values,
                sparse_shape,
                input_weights,
                self.variables.node_to_accumulator_map,
                input_leaves,
                self.variables.candidate_split_features,
                self.variables.candidate_split_thresholds,
                split_initializations_per_input=(
                    self.params.split_initializations_per_input),
                split_sampling_random_seed=random_seed))
        update_features_op = state_ops.scatter_update(
            self.variables.candidate_split_features, update_indices,
            feature_updates)
        update_thresholds_op = state_ops.scatter_update(
            self.variables.candidate_split_thresholds, update_indices,
            threshold_updates)

        # Calculate finished nodes.
        with ops.control_dependencies(splits_update_ops):
            # Passing input_leaves to finished nodes here means that nodes that
            # have become stale won't be deallocated until an input reaches them,
            # because we're trying to avoid considering every fertile node for
            # performance reasons.
            finished, stale = self.training_ops.finished_nodes(
                input_leaves,
                self.variables.node_to_accumulator_map,
                self.variables.candidate_split_sums,
                self.variables.candidate_split_squares,
                self.variables.accumulator_sums,
                self.variables.accumulator_squares,
                self.variables.start_epoch,
                epoch,
                num_split_after_samples=self.params.split_after_samples,
                min_split_samples=self.params.min_split_samples,
                dominate_method=self.params.dominate_method,
                dominate_fraction=self.params.dominate_fraction)

        # Update leaf scores.
        # TODO(thomaswc): Store the leaf scores in a TopN and only update the
        # scores of the leaves that were touched by this batch of input.
        children = array_ops.squeeze(array_ops.slice(self.variables.tree,
                                                     [0, 0], [-1, 1]),
                                     squeeze_dims=[1])
        is_leaf = math_ops.equal(constants.LEAF_NODE, children)
        leaves = math_ops.to_int32(
            array_ops.squeeze(array_ops.where(is_leaf), squeeze_dims=[1]))
        non_fertile_leaves = array_ops.boolean_mask(
            leaves,
            math_ops.less(
                array_ops.gather(self.variables.node_to_accumulator_map,
                                 leaves), 0))

        # TODO(gilberth): It should be possible to limit the number of non
        # fertile leaves we calculate scores for, especially since we can only take
        # at most array_ops.shape(finished)[0] of them.
        with ops.control_dependencies(node_update_ops):
            sums = array_ops.gather(self.variables.node_sums,
                                    non_fertile_leaves)
            if self.params.regression:
                squares = array_ops.gather(self.variables.node_squares,
                                           non_fertile_leaves)
                non_fertile_leaf_scores = self._variance(sums, squares)
            else:
                non_fertile_leaf_scores = self._weighted_gini(sums)

        # Calculate best splits.
        with ops.control_dependencies(splits_update_ops):
            split_indices = self.training_ops.best_splits(
                finished,
                self.variables.node_to_accumulator_map,
                self.variables.candidate_split_sums,
                self.variables.candidate_split_squares,
                self.variables.accumulator_sums,
                self.variables.accumulator_squares,
                regression=self.params.regression)

        # Grow tree.
        with ops.control_dependencies(
            [update_features_op, update_thresholds_op]):
            (tree_update_indices, tree_children_updates,
             tree_threshold_updates, new_eot) = (self.training_ops.grow_tree(
                 self.variables.end_of_tree,
                 self.variables.node_to_accumulator_map, finished,
                 split_indices, self.variables.candidate_split_features,
                 self.variables.candidate_split_thresholds))
            tree_update_op = state_ops.scatter_update(self.variables.tree,
                                                      tree_update_indices,
                                                      tree_children_updates)
            thresholds_update_op = state_ops.scatter_update(
                self.variables.tree_thresholds, tree_update_indices,
                tree_threshold_updates)
            # TODO(thomaswc): Only update the epoch on the new leaves.
            new_epoch_updates = epoch * array_ops.ones_like(
                tree_threshold_updates, dtype=dtypes.int32)
            epoch_update_op = state_ops.scatter_update(
                self.variables.start_epoch, tree_update_indices,
                new_epoch_updates)

        # Update fertile slots.
        with ops.control_dependencies([tree_update_op]):
            (n2a_map_updates, a2n_map_updates, accumulators_cleared,
             accumulators_allocated) = (self.training_ops.update_fertile_slots(
                 finished,
                 non_fertile_leaves,
                 non_fertile_leaf_scores,
                 self.variables.end_of_tree,
                 self.variables.accumulator_sums,
                 self.variables.node_to_accumulator_map,
                 stale,
                 self.variables.node_sums,
                 regression=self.params.regression))

        # Ensure end_of_tree doesn't get updated until UpdateFertileSlots has
        # used it to calculate new leaves.
        gated_new_eot, = control_flow_ops.tuple(
            [new_eot], control_inputs=[n2a_map_updates])
        eot_update_op = state_ops.assign(self.variables.end_of_tree,
                                         gated_new_eot)

        updates = []
        updates.append(eot_update_op)
        updates.append(tree_update_op)
        updates.append(thresholds_update_op)
        updates.append(epoch_update_op)

        updates.append(
            state_ops.scatter_update(self.variables.node_to_accumulator_map,
                                     n2a_map_updates[0], n2a_map_updates[1]))

        updates.append(
            state_ops.scatter_update(self.variables.accumulator_to_node_map,
                                     a2n_map_updates[0], a2n_map_updates[1]))

        cleared_and_allocated_accumulators = array_ops.concat(
            0, [accumulators_cleared, accumulators_allocated])

        # Calculate values to put into scatter update for candidate counts.
        # Candidate split counts are always reset back to 0 for both cleared
        # and allocated accumulators. This means some accumulators might be doubly
        # reset to 0 if the were released and not allocated, then later allocated.
        split_values = array_ops.tile(
            array_ops.expand_dims(
                array_ops.expand_dims(
                    array_ops.zeros_like(cleared_and_allocated_accumulators,
                                         dtype=dtypes.float32), 1), 2),
            [
                1, self.params.num_splits_to_consider,
                self.params.num_output_columns
            ])
        updates.append(
            state_ops.scatter_update(self.variables.candidate_split_sums,
                                     cleared_and_allocated_accumulators,
                                     split_values))
        if self.params.regression:
            updates.append(
                state_ops.scatter_update(
                    self.variables.candidate_split_squares,
                    cleared_and_allocated_accumulators, split_values))

        # Calculate values to put into scatter update for total counts.
        total_cleared = array_ops.tile(
            array_ops.expand_dims(
                math_ops.neg(
                    array_ops.ones_like(accumulators_cleared,
                                        dtype=dtypes.float32)), 1),
            [1, self.params.num_output_columns])
        total_reset = array_ops.tile(
            array_ops.expand_dims(
                array_ops.zeros_like(accumulators_allocated,
                                     dtype=dtypes.float32), 1),
            [1, self.params.num_output_columns])
        accumulator_updates = array_ops.concat(0, [total_cleared, total_reset])
        updates.append(
            state_ops.scatter_update(self.variables.accumulator_sums,
                                     cleared_and_allocated_accumulators,
                                     accumulator_updates))
        if self.params.regression:
            updates.append(
                state_ops.scatter_update(self.variables.accumulator_squares,
                                         cleared_and_allocated_accumulators,
                                         accumulator_updates))

        # Calculate values to put into scatter update for candidate splits.
        split_features_updates = array_ops.tile(
            array_ops.expand_dims(
                math_ops.neg(
                    array_ops.ones_like(cleared_and_allocated_accumulators)),
                1), [1, self.params.num_splits_to_consider])
        updates.append(
            state_ops.scatter_update(self.variables.candidate_split_features,
                                     cleared_and_allocated_accumulators,
                                     split_features_updates))

        updates += self.finish_iteration()

        return control_flow_ops.group(*updates)
Beispiel #57
0
def inception_v3(inputs,
                 num_classes=1000,
                 is_training=True,
                 dropout_keep_prob=0.8,
                 min_depth=16,
                 depth_multiplier=1.0,
                 prediction_fn=layers_lib.softmax,
                 spatial_squeeze=True,
                 reuse=None,
                 scope='InceptionV3'):
    """Inception model from http://arxiv.org/abs/1512.00567.

  "Rethinking the Inception Architecture for Computer Vision"

  Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens,
  Zbigniew Wojna.

  With the default arguments this method constructs the exact model defined in
  the paper. However, one can experiment with variations of the inception_v3
  network by changing arguments dropout_keep_prob, min_depth and
  depth_multiplier.

  The default image size used to train this network is 299x299.

  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    num_classes: number of predicted classes.
    is_training: whether is training or not.
    dropout_keep_prob: the percentage of activation values that are retained.
    min_depth: Minimum depth value (number of channels) for all convolution ops.
      Enforced when depth_multiplier < 1, and not an active constraint when
      depth_multiplier >= 1.
    depth_multiplier: Float multiplier for the depth (number of channels)
      for all convolution ops. The value must be greater than zero. Typical
      usage will be to set this value in (0, 1) to reduce the number of
      parameters or computation cost of the model.
    prediction_fn: a function to get predictions out of logits.
    spatial_squeeze: if True, logits is of shape is [B, C], if false logits is
        of shape [B, 1, 1, C], where B is batch_size and C is number of classes.
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional variable_scope.

  Returns:
    logits: the pre-softmax activations, a tensor of size
      [batch_size, num_classes]
    end_points: a dictionary from components of the network to the corresponding
      activation.

  Raises:
    ValueError: if 'depth_multiplier' is less than or equal to zero.
  """
    if depth_multiplier <= 0:
        raise ValueError('depth_multiplier is not greater than zero.')
    depth = lambda d: max(int(d * depth_multiplier), min_depth)

    with variable_scope.variable_scope(scope,
                                       'InceptionV3', [inputs, num_classes],
                                       reuse=reuse) as scope:
        with arg_scope([layers_lib.batch_norm, layers_lib.dropout],
                       is_training=is_training):
            net, end_points = inception_v3_base(
                inputs,
                scope=scope,
                min_depth=min_depth,
                depth_multiplier=depth_multiplier)

            # Auxiliary Head logits
            with arg_scope(
                [layers.conv2d, layers_lib.max_pool2d, layers_lib.avg_pool2d],
                    stride=1,
                    padding='SAME'):
                aux_logits = end_points['Mixed_6e']
                with variable_scope.variable_scope('AuxLogits'):
                    aux_logits = layers_lib.avg_pool2d(aux_logits, [5, 5],
                                                       stride=3,
                                                       padding='VALID',
                                                       scope='AvgPool_1a_5x5')
                    aux_logits = layers.conv2d(aux_logits,
                                               depth(128), [1, 1],
                                               scope='Conv2d_1b_1x1')

                    # Shape of feature map before the final layer.
                    kernel_size = _reduced_kernel_size_for_small_input(
                        aux_logits, [5, 5])
                    aux_logits = layers.conv2d(
                        aux_logits,
                        depth(768),
                        kernel_size,
                        weights_initializer=trunc_normal(0.01),
                        padding='VALID',
                        scope='Conv2d_2a_{}x{}'.format(*kernel_size))
                    aux_logits = layers.conv2d(
                        aux_logits,
                        num_classes, [1, 1],
                        activation_fn=None,
                        normalizer_fn=None,
                        weights_initializer=trunc_normal(0.001),
                        scope='Conv2d_2b_1x1')
                    if spatial_squeeze:
                        aux_logits = array_ops.squeeze(aux_logits, [1, 2],
                                                       name='SpatialSqueeze')
                    end_points['AuxLogits'] = aux_logits

            # Final pooling and prediction
            with variable_scope.variable_scope('Logits'):
                kernel_size = _reduced_kernel_size_for_small_input(net, [8, 8])
                net = layers_lib.avg_pool2d(
                    net,
                    kernel_size,
                    padding='VALID',
                    scope='AvgPool_1a_{}x{}'.format(*kernel_size))
                # 1 x 1 x 2048
                net = layers_lib.dropout(net,
                                         keep_prob=dropout_keep_prob,
                                         scope='Dropout_1b')
                end_points['PreLogits'] = net
                # 2048
                logits = layers.conv2d(net,
                                       num_classes, [1, 1],
                                       activation_fn=None,
                                       normalizer_fn=None,
                                       scope='Conv2d_1c_1x1')
                if spatial_squeeze:
                    logits = array_ops.squeeze(logits, [1, 2],
                                               name='SpatialSqueeze')
                # 1000
            end_points['Logits'] = logits
            end_points['Predictions'] = prediction_fn(logits,
                                                      scope='Predictions')
    return logits, end_points
        def _train_op_fn(loss):
            """Run one training iteration."""
            train_op = []
            if cache:
                train_op.append(cache.insert(tree_ids, node_ids, logits))
            if closed_form_grad_and_hess_fn:
                gradients, hessians = closed_form_grad_and_hess_fn(
                    logits, labels)
            else:
                gradients = gradients_impl.gradients(loss,
                                                     logits,
                                                     name='Gradients')[0]
                hessians = gradients_impl.gradients(gradients,
                                                    logits,
                                                    name='Hessians')[0]
            stats_summary_list = [
                array_ops.squeeze(boosted_trees_ops.make_stats_summary(
                    node_ids=node_ids,
                    gradients=gradients,
                    hessians=hessians,
                    bucketized_features_list=[input_feature_list[f]],
                    max_splits=max_splits,
                    num_buckets=num_buckets),
                                  axis=0) for f in range(num_features)
            ]

            def grow_tree_from_stats_summaries(stats_summary_list):
                """Updates ensemble based on the best gains from stats summaries."""
                (node_ids_per_feature, gains_list, thresholds_list,
                 left_node_contribs_list, right_node_contribs_list) = (
                     boosted_trees_ops.calculate_best_gains_per_feature(
                         node_id_range=array_ops.stack([
                             math_ops.reduce_min(node_ids),
                             math_ops.reduce_max(node_ids)
                         ]),
                         stats_summary_list=stats_summary_list,
                         l1=tree_hparams.l1,
                         l2=tree_hparams.l2,
                         tree_complexity=tree_hparams.tree_complexity,
                         max_splits=max_splits))
                grow_op = boosted_trees_ops.update_ensemble(
                    # Confirm if local_tree_ensemble or tree_ensemble should be used.
                    tree_ensemble.resource_handle,
                    feature_ids=math_ops.range(0,
                                               num_features,
                                               dtype=dtypes.int32),
                    node_ids=node_ids_per_feature,
                    gains=gains_list,
                    thresholds=thresholds_list,
                    left_node_contribs=left_node_contribs_list,
                    right_node_contribs=right_node_contribs_list,
                    learning_rate=tree_hparams.learning_rate,
                    max_depth=tree_hparams.max_depth,
                    pruning_mode=boosted_trees_ops.PruningMode.NO_PRUNING)
                return grow_op

            if train_in_memory and is_single_machine:
                train_op.append(distribute_lib.increment_var(global_step))
                train_op.append(
                    grow_tree_from_stats_summaries(stats_summary_list))
            else:
                summary_accumulator = data_flow_ops.ConditionalAccumulator(
                    dtype=dtypes.float32,
                    # The stats consist of gradients and hessians (the last dimension).
                    shape=[num_features, max_splits, num_buckets, 2],
                    shared_name='stats_summary_accumulator')
                apply_grad = summary_accumulator.apply_grad(
                    array_ops.stack(stats_summary_list, axis=0), stamp_token)

                def grow_tree_from_accumulated_summaries_fn():
                    """Updates the tree with the best layer from accumulated summaries."""
                    # Take out the accumulated summaries from the accumulator and grow.
                    stats_summary_list = array_ops.unstack(
                        summary_accumulator.take_grad(1), axis=0)
                    grow_op = grow_tree_from_stats_summaries(
                        stats_summary_list)
                    return grow_op

                with ops.control_dependencies([apply_grad]):
                    train_op.append(distribute_lib.increment_var(global_step))
                    if config.is_chief:
                        train_op.append(
                            control_flow_ops.cond(
                                math_ops.greater_equal(
                                    summary_accumulator.num_accumulated(),
                                    n_batches_per_layer),
                                grow_tree_from_accumulated_summaries_fn,
                                control_flow_ops.no_op,
                                name='wait_until_n_batches_accumulated'))

            return control_flow_ops.group(train_op, name='train_op')
Beispiel #59
0
def _list_mle_loss(labels,
                   logits,
                   weights=None,
                   lambda_weight=None,
                   reduction=core_losses.Reduction.SUM_BY_NONZERO_WEIGHTS,
                   name=None,
                   seed=None):
  """Computes the ListMLE loss [Xia et al.

  2008] for a list.

  Given the labels of graded relevance l_i and the logits s_i, we calculate
  the ListMLE loss for the given list.

  The `lambda_weight` re-weights examples based on l_i and r_i.
  The recommended weighting scheme is the formulation presented in the
  "Position-Aware ListMLE" paper (Lan et. al) and available using
  create_p_list_mle_lambda_weight() factory function above.

  Args:
    labels: A `Tensor` of the same shape as `logits` representing graded
      relevance.
    logits: A `Tensor` with shape [batch_size, list_size]. Each value is the
      ranking score of the corresponding item.
    weights: A scalar, a `Tensor` with shape [batch_size, 1] for list-wise
      weights, or a `Tensor` with shape [batch_size, list_size] for item-wise
      weights.
    lambda_weight: A `DCGLambdaWeight` instance.
    reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
      reduce training loss over batch.
    name: A string used as the name for this loss.
    seed: A randomization seed used when shuffling ground truth permutations.

  Returns:
    An op for the ListMLE loss.
  """
  with ops.name_scope(name, 'list_mle_loss', (labels, logits, weights)):
    is_label_valid = utils.is_label_valid(labels)
    # Reset the invalid labels to 0 and reset the invalid logits to a logit with
    # ~= 0 contribution.
    labels = array_ops.where(is_label_valid, labels,
                             array_ops.zeros_like(labels))
    logits = array_ops.where(
        is_label_valid, logits,
        math_ops.log(_EPSILON) * array_ops.ones_like(logits))
    weights = 1.0 if weights is None else ops.convert_to_tensor(weights)
    weights = array_ops.squeeze(weights)

    # Shuffle labels and logits to add randomness to sort.
    shuffled_indices = utils.shuffle_valid_indices(is_label_valid, seed)
    shuffled_labels = array_ops.gather_nd(labels, shuffled_indices)
    shuffled_logits = array_ops.gather_nd(logits, shuffled_indices)

    sorted_labels, sorted_logits = utils.sort_by_scores(
        shuffled_labels, [shuffled_labels, shuffled_logits])

    raw_max = math_ops.reduce_max(sorted_logits, axis=1, keepdims=True)
    sorted_logits = sorted_logits - raw_max
    sums = math_ops.cumsum(math_ops.exp(sorted_logits), axis=1, reverse=True)
    sums = math_ops.log(sums) - sorted_logits

    if lambda_weight is not None and isinstance(lambda_weight,
                                                ListMLELambdaWeight):
      sums *= lambda_weight.individual_weights(sorted_labels)

    negative_log_likelihood = math_ops.reduce_sum(sums, 1)

    return core_losses.compute_weighted_loss(
        negative_log_likelihood, weights=weights, reduction=reduction)
Beispiel #60
0
def norm(tensor,
         ord='euclidean',
         axis=None,
         keepdims=None,
         name=None,
         keep_dims=None):
    r"""Computes the norm of vectors, matrices, and tensors.

  This function can compute several different vector norms (the 1-norm, the
  Euclidean or 2-norm, the inf-norm, and in general the p-norm for p > 0) and
  matrix norms (Frobenius, 1-norm, 2-norm and inf-norm).

  Args:
    tensor: `Tensor` of types `float32`, `float64`, `complex64`, `complex128`
    ord: Order of the norm. Supported values are 'fro', 'euclidean',
      `1`, `2`, `np.inf` and any positive real number yielding the corresponding
      p-norm. Default is 'euclidean' which is equivalent to Frobenius norm if
      `tensor` is a matrix and equivalent to 2-norm for vectors.
      Some restrictions apply:
        a) The Frobenius norm `fro` is not defined for vectors,
        b) If axis is a 2-tuple (matrix norm), only 'euclidean', 'fro', `1`,
           `2`, `np.inf` are supported.
      See the description of `axis` on how to compute norms for a batch of
      vectors or matrices stored in a tensor.
    axis: If `axis` is `None` (the default), the input is considered a vector
      and a single vector norm is computed over the entire set of values in the
      tensor, i.e. `norm(tensor, ord=ord)` is equivalent to
      `norm(reshape(tensor, [-1]), ord=ord)`.
      If `axis` is a Python integer, the input is considered a batch of vectors,
      and `axis` determines the axis in `tensor` over which to compute vector
      norms.
      If `axis` is a 2-tuple of Python integers it is considered a batch of
      matrices and `axis` determines the axes in `tensor` over which to compute
      a matrix norm.
      Negative indices are supported. Example: If you are passing a tensor that
      can be either a matrix or a batch of matrices at runtime, pass
      `axis=[-2,-1]` instead of `axis=None` to make sure that matrix norms are
      computed.
    keepdims: If True, the axis indicated in `axis` are kept with size 1.
      Otherwise, the dimensions in `axis` are removed from the output shape.
    name: The name of the op.
    keep_dims: Deprecated alias for `keepdims`.

  Returns:
    output: A `Tensor` of the same type as tensor, containing the vector or
      matrix norms. If `keepdims` is True then the rank of output is equal to
      the rank of `tensor`. Otherwise, if `axis` is none the output is a scalar,
      if `axis` is an integer, the rank of `output` is one less than the rank
      of `tensor`, if `axis` is a 2-tuple the rank of `output` is two less
      than the rank of `tensor`.

  Raises:
    ValueError: If `ord` or `axis` is invalid.

  @compatibility(numpy)
  Mostly equivalent to numpy.linalg.norm.
  Not supported: ord <= 0, 2-norm for matrices, nuclear norm.
  Other differences:
    a) If axis is `None`, treats the flattened `tensor` as a vector
     regardless of rank.
    b) Explicitly supports 'euclidean' norm as the default, including for
     higher order tensors.
  @end_compatibility
  """
    keepdims = deprecation.deprecated_argument_lookup('keepdims', keepdims,
                                                      'keep_dims', keep_dims)
    if keepdims is None:
        keepdims = False

    is_matrix_norm = ((isinstance(axis, tuple) or isinstance(axis, list))
                      and len(axis) == 2)
    if is_matrix_norm:
        axis = tuple(axis)
        if (not isinstance(axis[0], int) or not isinstance(axis[1], int)
                or axis[0] == axis[1]):
            raise ValueError(
                "'axis' must be None, an integer, or a tuple of 2 unique integers"
            )
        supported_matrix_norms = ['euclidean', 'fro', 1, 2, np.inf]
        if ord not in supported_matrix_norms:
            raise ValueError(
                "'ord' must be a supported matrix norm in %s, got %s" %
                (supported_matrix_norms, ord))
    else:
        if not (isinstance(axis, int) or axis is None):
            raise ValueError(
                "'axis' must be None, an integer, or a tuple of 2 unique integers"
            )

        supported_vector_norms = ['euclidean', 1, 2, np.inf]
        if (not np.isreal(ord)
                or ord <= 0) and ord not in supported_vector_norms:
            raise ValueError("'ord' must be a supported vector norm, got %s" %
                             ord)
        if axis is not None:
            axis = (axis, )

    with ops.name_scope(name, 'norm', [tensor]):
        tensor = ops.convert_to_tensor(tensor)

        if ord in ['fro', 'euclidean', 2, 2.0]:
            if is_matrix_norm and ord in [2, 2.0]:
                rank = array_ops.rank(tensor)
                positive_axis = functional_ops.map_fn(
                    lambda i: control_flow_ops.cond(i >= 0, lambda: i, lambda:
                                                    i + rank),
                    ops.convert_to_tensor(axis))
                axes = math_ops.range(rank)
                perm_before = array_ops.concat([
                    array_ops.setdiff1d(axes, positive_axis)[0], positive_axis
                ],
                                               axis=0)
                perm_after = functional_ops.map_fn(
                    lambda i: math_ops.cast(array_ops.squeeze(
                        array_ops.where(math_ops.equal(perm_before, i))),
                                            dtype=dtypes.int32), axes)
                permed = array_ops.transpose(tensor, perm=perm_before)
                matrix_2_norm = array_ops.expand_dims(math_ops.reduce_max(
                    math_ops.abs(
                        gen_linalg_ops.svd(permed, compute_uv=False)[0]),
                    axis=-1,
                    keepdims=True),
                                                      axis=-1)
                result = array_ops.transpose(matrix_2_norm, perm=perm_after)
            else:
                result = math_ops.sqrt(
                    math_ops.reduce_sum(tensor * math_ops.conj(tensor),
                                        axis,
                                        keepdims=True))
        else:
            result = math_ops.abs(tensor)
            if ord == 1:
                sum_axis = None if axis is None else axis[0]
                result = math_ops.reduce_sum(result, sum_axis, keepdims=True)
                if is_matrix_norm:
                    result = math_ops.reduce_max(result,
                                                 axis[-1],
                                                 keepdims=True)
            elif ord == np.inf:
                if is_matrix_norm:
                    result = math_ops.reduce_sum(result,
                                                 axis[1],
                                                 keepdims=True)
                max_axis = None if axis is None else axis[0]
                result = math_ops.reduce_max(result, max_axis, keepdims=True)
            else:
                # General p-norms (positive p only)
                result = math_ops.pow(
                    math_ops.reduce_sum(math_ops.pow(result, ord),
                                        axis,
                                        keepdims=True), 1.0 / ord)
        if not keepdims:
            result = array_ops.squeeze(result, axis)
        return result