def build_graph(device, input_shape, filter_shape, strides, padding, num_iters):
  """builds a graph containing a sequence of conv2d operations.

  Args:
    device: String, the device to run on.
    input_shape: Shape of the input tensor.
    filter_shape: Shape of the filter tensor.
    strides: A list of ints. 1-D of length 4. The stride of sliding
             window for each dimension of input.
    padding: A string from: "SAME", "VALID". The type of padding
             algorithm to use.
    num_iters: number of iterations to run conv2d.

  Returns:
    An array of tensors to run()
  """
  with ops.device("/%s:0" % device):
    inp = variables.Variable(random_ops.truncated_normal(input_shape))
    filt = variables.Variable(random_ops.truncated_normal(filter_shape))

    outputs = []
    conv2d_op = nn_ops.conv2d(inp, filt, strides, padding, data_format="NHWC")
    outputs.append(conv2d_op)
    for _ in range(1, num_iters):
      with ops.control_dependencies([conv2d_op]):
        conv2d_op = nn_ops.conv2d(
            inp, filt, strides, padding, data_format="NHWC")
        outputs.append(conv2d_op)
    return control_flow_ops.group(*outputs)
  def testSmallNetwork(self):
    image = array_ops.placeholder(dtypes.float32, shape=[1, 28, 28, 1])
    label = array_ops.placeholder(dtypes.float32, shape=[1, 10])
    w = variables.Variable(
        random_ops.truncated_normal([5, 5, 1, 32], stddev=0.1))
    b = variables.Variable(random_ops.truncated_normal([32], stddev=0.1))
    conv = nn_ops.conv2d(image, w, strides=[1, 1, 1, 1], padding="SAME")
    h_conv = nn_ops.relu(conv + b)
    h_conv_flat = array_ops.reshape(h_conv, [1, -1])

    w_fc = variables.Variable(
        random_ops.truncated_normal([25088, 10], stddev=0.1))
    b_fc = variables.Variable(random_ops.truncated_normal([10], stddev=0.1))
    y_conv = nn_ops.softmax(math_ops.matmul(h_conv_flat, w_fc) + b_fc)

    cross_entropy = math_ops.reduce_mean(-math_ops.reduce_sum(
        label * math_ops.log(y_conv), reduction_indices=[1]))
    _ = adam.AdamOptimizer(1e-4).minimize(cross_entropy)

    mg = meta_graph.create_meta_graph_def(graph=ops.get_default_graph())
    report = cost_analyzer.GenerateCostReport(mg)

    self.assertTrue(b"MatMul" in report)
    self.assertTrue(b"ApplyAdam" in report)
    self.assertTrue(b"Conv2D" in report)
    self.assertTrue(b"Conv2DBackpropInput" in report)
    self.assertTrue(b"Conv2DBackpropFilter" in report)
    self.assertTrue(b"Softmax" in report)

    # Also print the report to make it easier to debug
    print("{}".format(report))
 def testNoCSE(self):
   with self.test_session(use_gpu=True):
     shape = [2, 3, 4]
     rnd1 = random_ops.truncated_normal(shape, 0.0, 1.0, dtypes.float32)
     rnd2 = random_ops.truncated_normal(shape, 0.0, 1.0, dtypes.float32)
     diff = rnd2 - rnd1
     self.assertTrue(np.linalg.norm(diff.eval()) > 0.1)
def loop():
  random_seed.set_random_seed(0)
  x1 = random_ops.truncated_normal([1, 784], seed=0)
  x2 = random_ops.truncated_normal([1, 784], seed=0)
  x3 = random_ops.truncated_normal([1, 784], seed=0)
  x4 = random_ops.truncated_normal([1, 784], seed=0)
  elems = (x1, x2, x3, x4)
  outputs = functional_ops.map_fn(two_layer_model, elems, dtype=dtypes.float32)
  return outputs
def _loop_with_vec_and_4d():
  random_seed.set_random_seed(0)
  x1 = random_ops.truncated_normal([1, 784], seed=0)
  x2 = random_ops.truncated_normal([1, 784], seed=0)
  x3 = random_ops.truncated_normal([1, 784], seed=0)
  x4 = random_ops.truncated_normal([1, 784], seed=0)
  elems = (x1, x2, x3, x4)
  outputs = functional_ops.map_fn(
      _model_with_vec_and_4d, elems, dtype=dtypes.float32)
  return outputs
Example #6
0
 def testTruncatedNormal(self):
   # Fully known shape.
   rnd1 = random_ops.truncated_normal([1, 2, 3])
   self.assertEqual([1, 2, 3], rnd1.get_shape())
   # Partially known shape.
   rnd2 = random_ops.truncated_normal(
       array_ops.placeholder(dtypes.int32, shape=(3,)))
   self.assertEqual([None, None, None], rnd2.get_shape().as_list())
   # Unknown shape.
   rnd3 = random_ops.truncated_normal(array_ops.placeholder(dtypes.int32))
   self.assertIs(None, rnd3.get_shape().ndims)
def build_fused_conv_bias_relu_graph(device, input_shape, filter_shape, strides,
                                     padding, num_iters, data_format):
  """builds a graph containing a sequence of conv2d operations.

  Args:
    device: String, the device to run on.
    input_shape: Shape of the input tensor.
    filter_shape: Shape of the filter tensor.
    strides: A list of ints. 1-D of length 4. The stride of sliding
             window for each dimension of input.
    padding: A string from: "SAME", "VALID". The type of padding
             algorithm to use.
    num_iters: number of iterations to run conv2d.
    data_format: data format string of input, 'NHWC' and 'NCHW' are
    supported.

  Returns:
    An array of tensors to run()
  """
  if data_format == "NCHW":
    input_shape = [
        input_shape[0], input_shape[3], input_shape[1], input_shape[2]
    ]
  with ops.device("/%s:0" % device):
    inp = variables.Variable(random_ops.truncated_normal(input_shape))
    filt = variables.Variable(random_ops.truncated_normal(filter_shape))
    bias_shape = [filter_shape[-1]]
    bias = variables.Variable(random_ops.truncated_normal(bias_shape))

    outputs = []
    fused_out = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(
        inp,
        filt,
        bias,
        strides,
        padding,
        data_format=data_format,
        activation_mode="Relu")
    outputs.append(fused_out)
    for _ in range(1, num_iters):
      with ops.control_dependencies([fused_out]):
        # pylint: disable=g-line-too-long
        fused_out = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(  # pylint: disable=line-too-long
            inp,
            filt,
            bias,
            strides,
            padding,
            data_format=data_format,
            activation_mode="Relu")
        outputs.append(fused_out)
    return control_flow_ops.group(*outputs)
  def testSmallNetwork(self):
    image = array_ops.placeholder(dtypes.float32, shape=[1, 28, 28, 1])
    label = array_ops.placeholder(dtypes.float32, shape=[1, 10])
    w = variables.Variable(
        random_ops.truncated_normal([5, 5, 1, 32], stddev=0.1))
    b = variables.Variable(random_ops.truncated_normal([32], stddev=0.1))
    conv = nn_ops.conv2d(image, w, strides=[1, 1, 1, 1], padding="SAME")
    h_conv = nn_ops.relu(conv + b)
    h_conv_flat = array_ops.reshape(h_conv, [1, -1])

    w_fc = variables.Variable(
        random_ops.truncated_normal([25088, 10], stddev=0.1))
    b_fc = variables.Variable(random_ops.truncated_normal([10], stddev=0.1))
    y_conv = nn_ops.softmax(math_ops.matmul(h_conv_flat, w_fc) + b_fc)

    cross_entropy = math_ops.reduce_mean(-math_ops.reduce_sum(
        label * math_ops.log(y_conv), reduction_indices=[1]))
    _ = adam.AdamOptimizer(1e-4).minimize(cross_entropy)

    mg = meta_graph.create_meta_graph_def(graph=ops.get_default_graph())
    report = cost_analyzer.GenerateCostReport(mg)

    # Print the report to make it easier to debug
    print("{}".format(report))

    self.assertTrue(b"MatMul" in report)
    self.assertTrue(b"ApplyAdam" in report)
    self.assertTrue(b"Conv2D" in report)
    self.assertTrue(b"Conv2DBackpropInput" in report)
    self.assertTrue(b"Conv2DBackpropFilter" in report)
    self.assertTrue(b"Softmax" in report)

    for op_type in [
        b"MatMul", b"Conv2D", b"Conv2DBackpropInput", b"Conv2DBackpropFilter"
    ]:
      matcher = re.compile(
          br"\s+" + op_type + br",\s*(\d+),\s*(\d+),\s*([\d\.eE+-]+)%,\s*" +
          br"([\d\.eE+-]+)%,\s*(-?\d+),\s*(\d+),", re.MULTILINE)
      m = matcher.search(report)

      op_count = int(m.group(1))
      # upper = int(m.group(5))
      lower = int(m.group(6))
      if op_type is b"MatMul":
        self.assertEqual(3, op_count)
      else:
        self.assertEqual(1, op_count)
      self.assertTrue(0 <= lower)
Example #9
0
def sequence_softmax(inputs, noutput, scope=None, name=None, linear_name=None):
  """Run a softmax layer over all the time steps of an input sequence.

  Args:
    inputs: (length, batch_size, depth) tensor
    noutput: output depth
    scope: optional scope name
    name: optional name for output tensor
    linear_name: name for linear (pre-softmax) output

  Returns:
    A tensor of size (length, batch_size, noutput).

  """
  length, _, ninputs = _shape(inputs)
  inputs_u = array_ops.unstack(inputs)
  output_u = []
  with variable_scope.variable_scope(scope, "SequenceSoftmax", [inputs]):
    initial_w = random_ops.truncated_normal([0 + ninputs, noutput], stddev=0.1)
    initial_b = constant_op.constant(0.1, shape=[noutput])
    w = variables.model_variable("weights", initializer=initial_w)
    b = variables.model_variable("biases", initializer=initial_b)
    for i in xrange(length):
      with variable_scope.variable_scope(scope, "SequenceSoftmaxStep",
                                         [inputs_u[i]]):
        # TODO(tmb) consider using slim.fully_connected(...,
        # activation_fn=tf.nn.softmax)
        linear = nn_ops.xw_plus_b(inputs_u[i], w, b, name=linear_name)
        output = nn_ops.softmax(linear)
        output_u += [output]
    outputs = array_ops.stack(output_u, name=name)
  return outputs
  def testTwoConvLayers(self):
    if test.is_gpu_available(cuda_only=True):
      random_seed.set_random_seed(0)
      x = random_ops.truncated_normal([1, 784], seed=0)
      output = two_layer_model(x)

      with session.Session() as sess:
        output_val_ref = sess.run(output)

      with session.Session(config=get_config()) as sess:
        metadata = config_pb2.RunMetadata()
        output_val = sess.run(output, run_metadata=metadata)

      nodes = []
      num_transposes = 0
      for node in metadata.cost_graph.node:
        if node.name.startswith('LayoutOptimizerTranspose'):
          num_transposes += 1
        nodes.append(node.name)

      # Four transposes were initially added in the Expand phase of
      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
      expected_num_transposes = 2
      self.assertEqual(expected_num_transposes, num_transposes)
      self.assertIn('LayoutOptimizerTransposeNHWCToNCHW-Conv2D-Reshape-0',
                    nodes)
      self.assertIn('LayoutOptimizerTransposeNCHWToNHWC-Relu_1-MaxPool_1',
                    nodes)

      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
  def testStridedSliceWithMask1011(self):
    if test.is_gpu_available(cuda_only=True):
      random_seed.set_random_seed(0)
      x = random_ops.truncated_normal([1, 784], seed=0)
      conv = _two_layer_model(x)
      # This will generate a StridedSlice op with begin mask and
      # end mask 11(1011).
      s = conv[:, :, 1:-1, :]
      output = array_ops.identity(s)

      with session.Session() as sess:
        output_val_ref = sess.run(output)

      with session.Session(config=_get_config()) as sess:
        metadata = config_pb2.RunMetadata()
        output_val = sess.run(output, run_metadata=metadata)

      nodes = []
      num_transposes = 0
      for node in metadata.cost_graph.node:
        if _is_transpose(node.name):
          num_transposes += 1
        nodes.append(node.name)

      # Four transposes were initially added in the Expand phase of
      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
      expected_num_transposes = 2
      self.assertEqual(expected_num_transposes, num_transposes)
      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
      self._assert_trans_nchw_to_nhwc('strided_slice-0-0', nodes)
      self.assertIn('strided_slice-1-LayoutOptimizer', nodes)
      self.assertIn('strided_slice-2-LayoutOptimizer', nodes)
      self.assertIn('strided_slice-3-LayoutOptimizer', nodes)
      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
  def testTernaryOp(self):
    if test.is_gpu_available(cuda_only=True):
      random_seed.set_random_seed(0)
      x = random_ops.truncated_normal([1, 784], seed=0)
      conv = _two_layer_model(x)
      add = math_ops.add(conv, conv)
      mean = math_ops.reduce_mean(conv)
      condition = math_ops.less(conv, mean)
      select = gen_math_ops._select(condition, conv, add)
      output = array_ops.identity(select)

      with session.Session() as sess:
        output_val_ref = sess.run(output)

      with session.Session(config=_get_config()) as sess:
        metadata = config_pb2.RunMetadata()
        output_val = sess.run(output, run_metadata=metadata)

      nodes = []
      num_transposes = 0
      for node in metadata.cost_graph.node:
        if node.name.startswith('LayoutOptimizerTranspose'):
          num_transposes += 1
        nodes.append(node.name)

      expected_num_transposes = 3
      self.assertEqual(expected_num_transposes, num_transposes)
      self.assertIn('LayoutOptimizerTransposeNHWCToNCHW-Conv2D-0', nodes)
      self.assertIn('LayoutOptimizerTransposeNCHWToNHWC-Select-0-0', nodes)
      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
  def testPadWithNonConstPaddings(self):
    if test.is_gpu_available(cuda_only=True):
      random_seed.set_random_seed(0)
      x = random_ops.truncated_normal([1, 784], seed=0)
      conv = _two_layer_model(x)
      paddings = array_ops.placeholder(dtype='int32')
      pad = array_ops.pad(conv, paddings)
      output = array_ops.identity(pad)

      paddings_val = [[1, 2], [3, 4], [5, 6], [7, 8]]
      with session.Session() as sess:
        output_val_ref = sess.run(output, feed_dict={paddings: paddings_val})

      with session.Session(config=_get_config()) as sess:
        metadata = config_pb2.RunMetadata()
        output_val = sess.run(
            output, run_metadata=metadata, feed_dict={
                paddings: paddings_val
            })

      nodes = []
      num_transposes = 0
      for node in metadata.cost_graph.node:
        if node.name.startswith('LayoutOptimizerTranspose'):
          num_transposes += 1
        nodes.append(node.name)

      # Four transposes were initially added in the Expand phase of
      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
      expected_num_transposes = 2
      self.assertEqual(expected_num_transposes, num_transposes)
      self.assertIn('LayoutOptimizerTransposeNHWCToNCHW-Conv2D-0', nodes)
      self.assertIn('LayoutOptimizerTransposeNCHWToNHWC-Pad-0-0', nodes)
      self.assertIn('LayoutOptimizerVecPermuteNHWCToNCHW_Pad_1', nodes)
      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
Example #14
0
 def __call__(self, shape, dtype=None, partition_info=None):
   if dtype is None:
     dtype = self.dtype
   scale = self.scale
   scale_shape = shape
   if partition_info is not None:
     scale_shape = partition_info.full_shape
   fan_in, fan_out = _compute_fans(scale_shape)
   if self.mode == "fan_in":
     scale /= max(1., fan_in)
   elif self.mode == "fan_out":
     scale /= max(1., fan_out)
   else:
     scale /= max(1., (fan_in + fan_out) / 2.)
   if self.distribution == "normal" or self.distribution == "truncated_normal":
     # constant taken from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.)
     stddev = math.sqrt(scale) / .87962566103423978
     return random_ops.truncated_normal(
         shape, 0.0, stddev, dtype, seed=self.seed)
   elif self.distribution == "untruncated_normal":
     stddev = math.sqrt(scale)
     return random_ops.random_normal(
         shape, 0.0, stddev, dtype, seed=self.seed)
   else:
     limit = math.sqrt(3.0 * scale)
     return random_ops.random_uniform(
         shape, -limit, limit, dtype, seed=self.seed)
  def testGradient(self):
    if not test.is_gpu_available(cuda_only=True):
      self.skipTest('GPU required')

    random_seed.set_random_seed(0)
    x = random_ops.truncated_normal([1, 200, 200, 3], seed=0)
    y = conv_layers.conv2d(x, 32, [3, 3])
    z = conv_layers.conv2d(y, 32, [3, 3])
    optimizer = gradient_descent.GradientDescentOptimizer(1e-4)
    loss = math_ops.reduce_mean(z)
    train_op = optimizer.minimize(loss)
    graph = ops.get_default_graph()
    graph.add_to_collection('train_op', train_op)
    meta_graph = saver_lib.export_meta_graph(graph_def=graph.as_graph_def())

    rewrite_options = rewriter_config_pb2.RewriterConfig(
        optimize_tensor_layout=True)
    optimized_graph = tf_optimizer.OptimizeGraph(rewrite_options, meta_graph)

    found = 0
    for node in optimized_graph.node:
      if node.op in ['Conv2D', 'Conv2DBackpropFilter', 'Conv2DBackpropInput']:
        found += 1
        self.assertEqual(node.attr['data_format'].s, 'NCHW')
    self.assertEqual(found, 5)
Example #16
0
  def testGradient(self):
    with ops.Graph().as_default() as g:
      inputs = array_ops.placeholder(
          dtypes.float32, shape=[None, 100], name="input")
      weights = array_ops.placeholder(
          dtypes.float32, shape=[100, 10], name="weights")
      biases = array_ops.placeholder(dtypes.float32, shape=[10], name="biases")
      activations = nn_ops.relu(
          math_ops.matmul(inputs, weights) + biases, name="activations")
      loss = math_ops.reduce_mean(activations, name="loss")
    gdef = g.as_graph_def()

    with ops.Graph().as_default() as g:
      input_placeholder = array_ops.placeholder(dtypes.float32, shape=[32, 100])
      weights_var = variables.Variable(
          random_ops.truncated_normal([100, 10]), name="weights")
      biases_var = variables.Variable(array_ops.zeros([10]), name="biases")
      activations, loss = importer.import_graph_def(
          gdef,
          input_map={
              "input:0": input_placeholder,
              "weights:0": weights_var,
              "biases:0": biases_var
          },
          return_elements=["activations:0", "loss:0"])
      self.assertEqual([32, 10], activations.get_shape())
      self.assertEqual([], loss.get_shape())
      weights_grad, biases_grad = gradients_impl.gradients(
          loss, [weights_var, biases_var])
      self.assertEqual([100, 10], weights_grad.get_shape())
      self.assertEqual([10], biases_grad.get_shape())
  def testSliceWithNonConstAxis(self):
    if test.is_gpu_available(cuda_only=True):
      random_seed.set_random_seed(0)
      x = random_ops.truncated_normal([1, 784], seed=0)
      conv = _two_layer_model(x)
      size = array_ops.placeholder(dtype='int32')
      s = array_ops.slice(conv, [0, 0, 0, 0], size)
      output = array_ops.identity(s)

      size_val = [1, 2, 3, 4]
      with session.Session() as sess:
        output_val_ref = sess.run(output, feed_dict={size: size_val})

      with session.Session(config=_get_config()) as sess:
        metadata = config_pb2.RunMetadata()
        output_val = sess.run(
            output, run_metadata=metadata, feed_dict={
                size: size_val
            })

      nodes = []
      num_transposes = 0
      for node in metadata.cost_graph.node:
        if _is_transpose(node.name):
          num_transposes += 1
        nodes.append(node.name)

      # Four transposes were initially added in the Expand phase of
      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
      expected_num_transposes = 2
      self.assertEqual(expected_num_transposes, num_transposes)
      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
      self._assert_trans_nchw_to_nhwc('Slice-0-0', nodes)
      self._assert_vec_nhwc_to_nchw('Slice-2', nodes)
      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
  def testSelectOpScalarCondition(self):
    if test.is_gpu_available(cuda_only=True):
      random_seed.set_random_seed(0)
      x = random_ops.truncated_normal([1, 784], seed=0)
      conv = _two_layer_model(x)
      add = math_ops.add(conv, conv)
      condition = constant_op.constant(True)
      select = gen_math_ops._select(condition, conv, add)
      output = array_ops.identity(select)

      with session.Session() as sess:
        output_val_ref = sess.run(output)

      with session.Session(config=_get_config()) as sess:
        metadata = config_pb2.RunMetadata()
        output_val = sess.run(output, run_metadata=metadata)

      nodes = []
      num_transposes = 0
      for node in metadata.cost_graph.node:
        if _is_transpose(node.name):
          num_transposes += 1
        nodes.append(node.name)

      expected_num_transposes = 2
      self.assertEqual(expected_num_transposes, num_transposes)
      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
      self._assert_trans_nchw_to_nhwc('Select-0-0', nodes)
      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
  def testSplitWithNonConstAxis(self):
    if test.is_gpu_available(cuda_only=True):
      random_seed.set_random_seed(0)
      x = random_ops.truncated_normal([1, 784], seed=0)
      conv = _two_layer_model(x)
      dim = array_ops.placeholder(dtype='int32')
      split = array_ops.split(conv, 2, axis=dim)
      scale = constant_op.constant(0.1, shape=[32])
      offset = constant_op.constant(0.3, shape=[32])
      bn0 = nn.fused_batch_norm(split[0], scale, offset)
      bn1 = nn.fused_batch_norm(split[1], scale, offset)
      add = bn0[0] + bn1[0]
      output = array_ops.identity(add)

      with session.Session() as sess:
        output_val_ref = sess.run(output, feed_dict={dim: 3})

      with session.Session(config=_get_config()) as sess:
        metadata = config_pb2.RunMetadata()
        output_val = sess.run(output, run_metadata=metadata, feed_dict={dim: 3})

      nodes = []
      num_transposes = 0
      for node in metadata.cost_graph.node:
        if _is_transpose(node.name):
          num_transposes += 1
        nodes.append(node.name)

      expected_num_transposes = 2
      self.assertEqual(expected_num_transposes, num_transposes)
      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
      self._assert_trans_nchw_to_nhwc('add_2-0-0', nodes)
      self._assert_map_nhwc_to_nchw('split-0', nodes)
      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
  def testConcatWithControlDependency(self):
    if test.is_gpu_available(cuda_only=True):
      random_seed.set_random_seed(0)
      x = random_ops.truncated_normal([1, 784], seed=0)
      conv = _two_layer_model(x)
      axis = constant_op.constant(3)
      var = variables.Variable(3)
      assign = state_ops.assign(var, 6)
      with ops.control_dependencies([assign]):
        concat = array_ops.concat([conv, conv], axis)
      output = array_ops.identity(concat)

      with session.Session() as sess:
        output_val_ref = sess.run(output)

      with session.Session(config=_get_config()) as sess:
        metadata = config_pb2.RunMetadata()
        output_val = sess.run(output, run_metadata=metadata)

      nodes = []
      num_transposes = 0
      for node in metadata.cost_graph.node:
        if _is_transpose(node.name):
          num_transposes += 1
        nodes.append(node.name)

      # Four transposes were initially added in the Expand phase of
      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
      expected_num_transposes = 2
      self.assertEqual(expected_num_transposes, num_transposes)
      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
      self._assert_trans_nchw_to_nhwc('concat-0-0', nodes)
      self.assertIn('concat-2-LayoutOptimizer', nodes)
      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
  def testReduceSumAlongC(self):
    if test.is_gpu_available(cuda_only=True):
      random_seed.set_random_seed(0)
      x = random_ops.truncated_normal([1, 784], seed=0)
      conv = _two_layer_model(x)
      reduce_sum = math_ops.reduce_sum(conv, axis=[3])
      output = array_ops.identity(reduce_sum)

      with session.Session() as sess:
        output_val_ref = sess.run(output)

      with session.Session(config=_get_config()) as sess:
        metadata = config_pb2.RunMetadata()
        output_val = sess.run(output, run_metadata=metadata)

      nodes = []
      num_transposes = 0
      for node in metadata.cost_graph.node:
        if _is_transpose(node.name):
          num_transposes += 1
        nodes.append(node.name)

      # Three transposes were initially added in the Expand phase of
      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
      expected_num_transposes = 1
      self.assertEqual(expected_num_transposes, num_transposes)
      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
  def testSelectOpConditionUnknownShape(self):
    if test.is_gpu_available(cuda_only=True):
      random_seed.set_random_seed(0)
      x = random_ops.truncated_normal([1, 784], seed=0)
      conv = _two_layer_model(x)
      add = math_ops.add(conv, conv)
      condition = array_ops.placeholder(dtype='bool')
      select = gen_math_ops._select(condition, conv, add)
      output = array_ops.identity(select)

      condition_val = np.zeros((1, 7, 7, 64))
      with session.Session() as sess:
        output_val_ref = sess.run(output, feed_dict={condition: condition_val})

      with session.Session(config=_get_config()) as sess:
        metadata = config_pb2.RunMetadata()
        output_val = sess.run(
            output, run_metadata=metadata, feed_dict={condition: condition_val})

      nodes = []
      num_transposes = 0
      for node in metadata.cost_graph.node:
        if _is_transpose(node.name):
          num_transposes += 1
        nodes.append(node.name)

      expected_num_transposes = 3
      self.assertEqual(expected_num_transposes, num_transposes)
      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
  def testReverseWithConstDims(self):
    if test.is_gpu_available(cuda_only=True):
      random_seed.set_random_seed(0)
      x = random_ops.truncated_normal([1, 784], seed=0)
      conv = _two_layer_model(x)
      dims = constant_op.constant([3, 1], name='DimsConst')
      reverse = array_ops.reverse(conv, dims)
      output = array_ops.identity(reverse)

      with session.Session() as sess:
        output_val_ref = sess.run(output)

      with session.Session(config=_get_config()) as sess:
        metadata = config_pb2.RunMetadata()
        output_val = sess.run(output, run_metadata=metadata)

      nodes = []
      num_transposes = 0
      for node in metadata.cost_graph.node:
        if _is_transpose(node.name):
          num_transposes += 1
        nodes.append(node.name)

      # Four transposes were initially added in the Expand phase of
      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
      expected_num_transposes = 2
      self.assertEqual(expected_num_transposes, num_transposes)
      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
      self._assert_trans_nchw_to_nhwc('ReverseV2-0-0', nodes)
      self.assertIn('ReverseV2-1-LayoutOptimizer', nodes)
      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
Example #24
0
 def _initializer(shape, dtype=dtype, partition_info=None):
   """Initializer function."""
   if not dtype.is_floating:
     raise TypeError('Cannot create initializer for non-floating point type.')
   # Estimating fan_in and fan_out is not possible to do perfectly, but we try.
   # This is the right thing for matrix multiply and convolutions.
   if shape:
     fan_in = float(shape[-2]) if len(shape) > 1 else float(shape[-1])
     fan_out = float(shape[-1])
   else:
     fan_in = 1.0
     fan_out = 1.0
   for dim in shape[:-2]:
     fan_in *= float(dim)
     fan_out *= float(dim)
   if mode == 'FAN_IN':
     # Count only number of input connections.
     n = fan_in
   elif mode == 'FAN_OUT':
     # Count only number of output connections.
     n = fan_out
   elif mode == 'FAN_AVG':
     # Average number of inputs and output connections.
     n = (fan_in + fan_out) / 2.0
   if uniform:
     # To get stddev = math.sqrt(factor / n) need to adjust for uniform.
     limit = math.sqrt(3.0 * factor / n)
     return random_ops.random_uniform(shape, -limit, limit,
                                      dtype, seed=seed)
   else:
     # To get stddev = math.sqrt(factor / n) need to adjust for truncated.
     trunc_stddev = math.sqrt(1.3 * factor / n)
     return random_ops.truncated_normal(shape, 0.0, trunc_stddev, dtype,
                                        seed=seed)
  def testSplitWithNonConstAxis(self):
    if test.is_gpu_available(cuda_only=True):
      random_seed.set_random_seed(0)
      x = random_ops.truncated_normal([1, 784], seed=0)
      conv = _two_layer_model(x)
      dim = array_ops.placeholder(dtype='int32')
      split = array_ops.split(conv, 2, axis=dim)
      output = math_ops.reduce_sum(split[0])

      with session.Session() as sess:
        output_val_ref = sess.run(output, feed_dict={dim: 3})

      with session.Session(config=_get_config()) as sess:
        metadata = config_pb2.RunMetadata()
        output_val = sess.run(output, run_metadata=metadata, feed_dict={dim: 3})

      nodes = []
      num_transposes = 0
      for node in metadata.cost_graph.node:
        if node.name.startswith('LayoutOptimizerTranspose'):
          num_transposes += 1
        nodes.append(node.name)

      # Four transposes were initially added in the Expand phase of
      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
      expected_num_transposes = 2
      self.assertEqual(expected_num_transposes, num_transposes)
      self.assertIn('LayoutOptimizerTransposeNHWCToNCHW-Conv2D-Reshape-0',
                    nodes)
      self.assertIn('LayoutOptimizerTransposeNCHWToNHWC-split-Sum-0', nodes)
      self.assertIn('LayoutOptimizerDim-split', nodes)
      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
  def testSplitVWithNonConstAxis(self):
    if test.is_gpu_available(cuda_only=True):
      random_seed.set_random_seed(0)
      x = random_ops.truncated_normal([1, 784], seed=0)
      conv = _two_layer_model(x)
      dim = array_ops.placeholder(dtype='int32')
      sizes = constant_op.constant([50, 10, 4], shape=[3])
      split = gen_array_ops._split_v(
          value=conv, size_splits=sizes, axis=dim, num_split=3)
      output = math_ops.reduce_sum(split[0])

      with session.Session() as sess:
        output_val_ref = sess.run(output, feed_dict={dim: 3})

      with session.Session(config=_get_config()) as sess:
        metadata = config_pb2.RunMetadata()
        output_val = sess.run(output, run_metadata=metadata, feed_dict={dim: 3})

      nodes = []
      num_transposes = 0
      for node in metadata.cost_graph.node:
        if _is_transpose(node.name):
          num_transposes += 1
        nodes.append(node.name)

      # Four transposes were initially added in the Expand phase of
      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
      expected_num_transposes = 2
      self.assertEqual(expected_num_transposes, num_transposes)
      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
      self._assert_trans_nchw_to_nhwc('SplitV-0-0', nodes)
      self._assert_map_nhwc_to_nchw('SplitV-2', nodes)
      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
  def testMaxPoolV2(self):
    if test.is_gpu_available(cuda_only=True):
      random_seed.set_random_seed(0)
      x = random_ops.truncated_normal([1, 784], seed=0)
      conv = _two_layer_model(x)
      ksize = constant_op.constant([1, 2, 3, 1], shape=[4])
      strides = array_ops.placeholder(dtype='int32', shape=[4])
      max_pool = gen_nn_ops._max_pool_v2(conv, ksize, strides, 'VALID')
      output = array_ops.identity(max_pool)

      strides_val = [1, 3, 2, 1]
      with session.Session() as sess:
        output_val_ref = sess.run(output, feed_dict={strides: strides_val})

      with session.Session(config=_get_config()) as sess:
        metadata = config_pb2.RunMetadata()
        output_val = sess.run(
            output, run_metadata=metadata, feed_dict={
                strides: strides_val
            })

      nodes = []
      num_transposes = 0
      for node in metadata.cost_graph.node:
        if _is_transpose(node.name):
          num_transposes += 1
        nodes.append(node.name)

      expected_num_transposes = 2
      self.assertEqual(expected_num_transposes, num_transposes)
      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
      self._assert_trans_nchw_to_nhwc('MaxPoolV2-0-0', nodes)
      self._assert_vec_nhwc_to_nchw('MaxPoolV2-2', nodes)
      self.assertIn('MaxPoolV2-1-LayoutOptimizer', nodes)
      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
Example #28
0
 def func():
   with self.test_session(use_gpu=use_gpu, graph=ops.Graph()) as sess:
     rng = random_ops.truncated_normal(
         [num], mean=mu, stddev=sigma, dtype=dtype, seed=seed)
     ret = np.empty([10, num])
     for i in xrange(10):
       ret[i, :] = sess.run(rng)
   return ret
def _model_with_second_port():
  random_seed.set_random_seed(0)
  x = random_ops.truncated_normal([2, 5, 5, 4], seed=0)
  scale = constant_op.constant(0.1, shape=[4])
  offset = constant_op.constant(0.3, shape=[4])
  y, mean, _ = nn.fused_batch_norm(x, scale, offset)
  mul = math_ops.add(y, mean)
  output = array_ops.identity(mul)
  return output
 def testConv2DTransposeShapeInference(self):
   # Test case for 8972
   initializer = random_ops.truncated_normal(
       [3, 3, 5, 1], mean=0.0, stddev=0.01, dtype=dtypes.float32)
   x = variables.Variable(random_ops.random_normal([3, 10, 5, 1]))
   f = variable_scope.get_variable("f", initializer=initializer)
   f_shape = array_ops.stack([array_ops.shape(x)[0], 10, 5, 5])
   output = nn_ops.conv2d_transpose(
       x, f, f_shape, strides=[1, 1, 1, 1], padding="SAME")
   self.assertEqual(output.get_shape().as_list(), [None, 10, 5, 5])
def _add_scaled_noise_to_gradients(grads_and_vars, gradient_noise_scale):
  """Adds scaled noise from a 0-mean normal distribution to gradients."""
  gradients, variables = zip(*grads_and_vars)
  noisy_gradients = []
  for gradient in gradients:
    if gradient is None:
      noisy_gradients.append(None)
      continue
    if isinstance(gradient, ops.IndexedSlices):
      gradient_shape = gradient.dense_shape
    else:
      gradient_shape = gradient.get_shape()
    noise = random_ops.truncated_normal(gradient_shape) * gradient_noise_scale
    noisy_gradients.append(gradient + noise)
  return list(zip(noisy_gradients, variables))
Example #32
0
def build_graph(device, input_shape, axes, num_layers, mode, scale, train):
    """Build a graph containing a sequence of batch normalizations.

  Args:
    device: string, the device to run on.
    input_shape: shape of the input tensor.
    axes: axes that are to be normalized across.
    num_layers: number of batch normalization layers in the graph.
    mode: "op", "py" or "slow" depending on the implementation.
    scale: scale after normalization.
    train: if true, also run backprop.

  Returns:
    An array of tensors to run()
  """
    moment_shape = []
    keep_dims = mode == "py" or mode == "slow"
    if keep_dims:
        for axis in range(len(input_shape)):
            if axis in axes:
                moment_shape.append(1)
            else:
                moment_shape.append(input_shape[axis])
    else:
        for axis in range(len(input_shape)):
            if axis not in axes:
                moment_shape.append(input_shape[axis])
    with ops.device("/%s:0" % device):
        tensor = variables.Variable(random_ops.truncated_normal(input_shape))
        for _ in range(num_layers):
            mean, variance = nn_impl.moments(tensor, axes, keep_dims=keep_dims)
            beta = variables.Variable(array_ops.zeros(moment_shape))
            gamma = variables.Variable(
                constant_op.constant(1.0, shape=moment_shape))
            if mode == "py":
                tensor = batch_norm_py(tensor, mean, variance, beta, gamma,
                                       scale)
            elif mode == "op":
                tensor = batch_norm_op(tensor, mean, variance, beta, gamma,
                                       scale)
            elif mode == "slow":
                tensor = batch_norm_slow(tensor, mean, variance, beta, gamma,
                                         scale)
        if train:
            return gradients_impl.gradients([tensor],
                                            variables.trainable_variables())
        else:
            return [tensor]
Example #33
0
  def testSecondGradient(self):
    images_placeholder = array_ops.placeholder(dtypes.float32, shape=(3, 2))
    labels_placeholder = array_ops.placeholder(dtypes.int32, shape=(3))
    weights = variables.Variable(random_ops.truncated_normal([2], stddev=1.0))
    weights_with_zeros = array_ops.stack([array_ops.zeros([2]), weights],
                                         axis=1)
    logits = math_ops.matmul(images_placeholder, weights_with_zeros)
    cross_entropy = nn_ops.sparse_softmax_cross_entropy_with_logits(
        labels=labels_placeholder, logits=logits)
    loss = math_ops.reduce_mean(cross_entropy)

    # Taking ths second gradient should fail, since it is not
    # yet supported.
    with self.assertRaisesRegexp(LookupError,
                                 "explicitly disabled"):
      _ = gradients_impl.hessians(loss, [weights])
  def testTruncatedNormalIsInRange(self):
    count = 10000000
    # TODO(b/34339814): make this test work with 16 bit float types.
    for dtype in self._random_types() & {dtypes.float32, dtypes.float64}:
      with self.cached_session() as sess:
        with self.test_scope():
          x = random_ops.truncated_normal(shape=[count], dtype=dtype)
        y = self.evaluate(x)

        def normal_cdf(x):
          return .5 * math.erfc(-x / math.sqrt(2))

        def normal_pdf(x):
          return math.exp(-(x**2) / 2.) / math.sqrt(2 * math.pi)

        def probit(x, sess=sess):
          return self.evaluate(special_math.ndtri(x))

        a = -2.
        b = 2.
        mu = 0.
        sigma = 1.

        alpha = (a - mu) / sigma
        beta = (b - mu) / sigma
        z = normal_cdf(beta) - normal_cdf(alpha)

        self.assertTrue((y >= a).sum() == count)
        self.assertTrue((y <= b).sum() == count)

        # For more information on these calculations, see:
        # Burkardt, John. "The Truncated Normal Distribution".
        # Department of Scientific Computing website. Florida State University.
        expected_mean = mu + (normal_pdf(alpha) - normal_pdf(beta)) / z * sigma
        actual_mean = np.mean(y)
        self.assertAllClose(actual_mean, expected_mean, atol=2e-3)

        expected_median = mu + probit(
            (normal_cdf(alpha) + normal_cdf(beta)) / 2.) * sigma
        actual_median = np.median(y)
        self.assertAllClose(actual_median, expected_median, atol=1e-2)

        expected_variance = sigma**2 * (1 + (
            (alpha * normal_pdf(alpha) - beta * normal_pdf(beta)) / z) - (
                (normal_pdf(alpha) - normal_pdf(beta)) / z)**2)
        actual_variance = np.var(y)
        self.assertAllClose(actual_variance, expected_variance, rtol=2*1e-3)
Example #35
0
    def __call__(self, shape, dtype=dtypes.float32):
        """Returns a tensor object initialized as specified by the initializer.

    Args:
      shape: Shape of the tensor.
      dtype: Optional dtype of the tensor. Only floating point types are
       supported.

    Raises:
      ValueError: If the dtype is not floating point
    """
        dtype = _assert_float_dtype(dtype)
        return random_ops.truncated_normal(shape,
                                           self.mean,
                                           self.stddev,
                                           dtype,
                                           seed=self.seed)
Example #36
0
    def _initializer(shape, dtype=dtype, partition_info=None):

        # total number of basis filters
        Q = shape[0]*shape[1]
        if mode == 'FAN_IN':
            fan_in = shape[-2]
            C = fan_in
            # count number of input connections.
        elif mode == 'FAN_OUT':
            fan_out = shape[-2]
            # count number of output connections.
            C = fan_out
        n = C*Q
        # to get stddev = math.sqrt(factor / n) need to adjust for truncated.
        trunc_stddev = math.sqrt(factor / n) / .87962566103423978
        return random_ops.truncated_normal(shape, 0.0, trunc_stddev, dtype,
                                           seed=seed)
Example #37
0
    def _initializer(shape, dtype=dtype, partition_info=None):
        """Initializer function."""
        if not dtype.is_floating:
            raise TypeError(
                'Cannot create initializer for non-floating point type.')
            # Estimating fan_in and fan_out is not possible to do perfectly, but we try.
            # This is the right thing for matrix multiply and convolutions.
        if shape:
            fan_in = float(shape[-2]) if len(shape) > 1 else float(shape[-1])
            fan_out = float(shape[-1])
        else:
            fan_in = 1.0
            fan_out = 1.0
        for dim in shape[:-2]:
            fan_in *= float(dim)
            fan_out *= float(dim)
        if mode == 'FAN_IN':
            # Count only number of input connections.
            n = fan_in
        elif mode == 'FAN_OUT':
            # Count only number of output connections.
            n = fan_out
        elif mode == 'FAN_AVG':
            # Average number of inputs and output connections.
            n = (fan_in + fan_out) / 2.0
        if other:
            w = scale / np.sqrt(fan_in + fan_out)
            logging.info('in xavier, use other type, scale is{}'.format(scale))
            return w * random_ops.random_normal(shape, seed=seed)

        if uniform:
            # To get stddev = math.sqrt(factor / n) need to adjust for uniform.
            limit = math.sqrt(3.0 * factor / n)
            return random_ops.random_uniform(shape,
                                             -limit,
                                             limit,
                                             dtype,
                                             seed=seed)
        else:
            # To get stddev = math.sqrt(factor / n) need to adjust for truncated.
            trunc_stddev = math.sqrt(1.3 * factor / n)
            return random_ops.truncated_normal(shape,
                                               0.0,
                                               trunc_stddev,
                                               dtype,
                                               seed=seed)
Example #38
0
    def __call__(self, shape, dtype=dtypes.float32):
        """Returns a tensor object initialized as specified by the initializer.

    Args:
      shape: Shape of the tensor.
      dtype: Optional dtype of the tensor. Only floating point types are
       supported.

    Raises:
      ValueError: If the dtype is not floating point
    """
        partition_info = None  # Keeps logic so can be readded later if necessary
        dtype = _assert_float_dtype(dtype)
        scale = self.scale
        scale_shape = shape
        if partition_info is not None:
            scale_shape = partition_info.full_shape
        fan_in, fan_out = _compute_fans(scale_shape)
        if self.mode == "fan_in":
            scale /= max(1., fan_in)
        elif self.mode == "fan_out":
            scale /= max(1., fan_out)
        else:
            scale /= max(1., (fan_in + fan_out) / 2.)
        if self.distribution == "truncated_normal":
            # constant from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.)
            stddev = math.sqrt(scale) / .87962566103423978
            return random_ops.truncated_normal(shape,
                                               0.0,
                                               stddev,
                                               dtype,
                                               seed=self.seed)
        elif self.distribution == "untruncated_normal":
            stddev = math.sqrt(scale)
            return random_ops.random_normal(shape,
                                            0.0,
                                            stddev,
                                            dtype,
                                            seed=self.seed)
        else:
            limit = math.sqrt(3.0 * scale)
            return random_ops.random_uniform(shape,
                                             -limit,
                                             limit,
                                             dtype,
                                             seed=self.seed)
  def testStridedSliceGradWithNonConstAxis(self):
    if test.is_gpu_available(cuda_only=True):
      random_seed.set_random_seed(0)
      x = random_ops.truncated_normal([1, 784], seed=0)
      conv = _two_layer_model(x)
      end = array_ops.placeholder(dtype='int32')
      shape = array_ops.shape(conv)
      end_val = [1, 2, 3, 4]
      s = array_ops.strided_slice(
          conv, [0, 0, 0, 0], end_val, strides=[1, 2, 3, 1])
      s_grad = array_ops.strided_slice_grad(shape, [0, 0, 0, 0], end,
                                            [1, 2, 3, 1], s)
      output = array_ops.identity(s_grad)

      with session.Session() as sess:
        output_val_ref = sess.run(output, feed_dict={end: end_val})

      with session.Session(config=_get_config()) as sess:
        metadata = config_pb2.RunMetadata()
        output_val = sess.run(
            output, run_metadata=metadata, feed_dict={
                end: end_val
            })

      nodes = []
      num_transposes = 0
      for node in metadata.cost_graph.node:
        if node.name.startswith('LayoutOptimizerTranspose'):
          num_transposes += 1
        nodes.append(node.name)

      # Four transposes were initially added in the Expand phase of
      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
      expected_num_transposes = 2
      self.assertEqual(expected_num_transposes, num_transposes)
      self.assertIn('LayoutOptimizerTransposeNHWCToNCHW-Conv2D-0', nodes)
      self.assertIn('LayoutOptimizerTransposeNCHWToNHWC-StridedSliceGrad-0-0',
                    nodes)
      self.assertIn('LayoutOptimizerVecPermuteNHWCToNCHW_StridedSliceGrad_2',
                    nodes)
      self.assertIn('LayoutOptimizer-StridedSlice-StridedSliceGrad/begin',
                    nodes)
      self.assertIn('LayoutOptimizer-StridedSlice-StridedSliceGrad/strides',
                    nodes)
      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
Example #40
0
def _xavier(n_inputs, n_outputs, shape, uniform, seed, dtype):
    if uniform:
        # 6 was used in the paper.
        init_range = math.sqrt(6.0 / (n_inputs + n_outputs))
        return random_ops.random_uniform(shape,
                                         -init_range,
                                         init_range,
                                         dtype,
                                         seed=seed)
    else:
        # 3 gives us approximately the same limits as above since this repicks
        # values greater than 2 standard deviations from the mean.
        stddev = math.sqrt(3.0 / (n_inputs + n_outputs))
        return random_ops.truncated_normal(shape,
                                           0.0,
                                           stddev,
                                           dtype,
                                           seed=seed)
Example #41
0
 def _initializer(shape, dtype=dtype, partition_info=None):
     """Initializer function."""
     if not dtype.is_floating:
         raise TypeError(
             'Cannot create initializer for non-floating point type.')
     # Estimating fan_in and fan_out is not possible to do perfectly, but we try.
     # This is the right thing for matrix multiply and convolutions.
     if shape:
         f1 = lambda: tf.cast(shape[-2], dtype)
         f2 = lambda: tf.cast(shape[-1], dtype)
         fan_in = tf.case([(tf.greater(len(shape), 1), f1)], default=f2)
         fan_out = tf.cast(shape[-1], dtype)
     else:
         fan_in = 1.0
         fan_out = 1.0
     for dim in shape[:-2]:
         fan_in = tf.multiply(dim, fan_in)
         fan_out = tf.multiply(dim, fan_out)
     if mode == 'FAN_IN':
         # Count only number of input connections.
         n = fan_in
     elif mode == 'FAN_OUT':
         # Count only number of output connections.
         n = fan_out
     elif mode == 'FAN_AVG':
         # Average number of inputs and output connections.
         n = tf.divide(tf.add(fan_in, fan_out), 2.0)
     if uniform:
         # To get stddev = math.sqrt(factor / n) need to adjust for uniform.
         limit = tf.sqrt(tf.divide(tf.multiply(3.0, factor), n))
         return random_ops.random_uniform(shape,
                                          -limit,
                                          limit,
                                          dtype,
                                          seed=seed)
     else:
         # To get stddev = math.sqrt(factor / n) need to adjust for truncated.
         trunc_stddev = tf.sqrt(tf.divide(tf.multiply(1.3, factor), n))
         return random_ops.truncated_normal(shape,
                                            0.0,
                                            trunc_stddev,
                                            dtype,
                                            seed=seed)
Example #42
0
    def _initializer(shape, dtype=dtype, partition_info=None):
        """Initializer function."""
        if not dtype.is_floating:
            raise TypeError(
                'Cannot create initializer for non-floating point type.')

        if shape:
            fan_in = float(shape[-2]) if len(shape) > 1 else float(shape[-1])
            fan_out = float(shape[-1])
        else:
            fan_in = 1.0
            fan_out = 1.0
        for dim in shape[:-2]:
            fan_in *= float(dim)
            fan_out *= float(dim)
        if mode == 'FAN_IN':
            # Count only number of input connections.
            n = fan_in
        elif mode == 'FAN_OUT':
            # Count only number of output connections.
            n = fan_out
        elif mode == 'FAN_AVG':
            # Average number of inputs and output connections.
            n = (fan_in + fan_out) / 2.0
        if uniform:
            # To get stddev = math.sqrt(factor / n) need to adjust for uniform.
            limit = math.sqrt(3.0 * factor / n)
            return random_ops.random_uniform(shape,
                                             -limit,
                                             limit,
                                             dtype,
                                             seed=seed)
        else:
            # To get stddev = math.sqrt(factor / n) need to adjust for truncated.
            trunc_stddev = math.sqrt(1.3 * factor / n)
            return random_ops.truncated_normal(shape,
                                               0.0,
                                               trunc_stddev,
                                               dtype,
                                               seed=seed)

        return _initializer
Example #43
0
    def model_fn(features, labels, mode, params):
        """Model function defining an inpainting estimator."""
        batch_size = params['batch_size']
        z_shape = [batch_size] + params['z_shape']
        add_summaries = params['add_summaries']
        input_clip = params['input_clip']

        z = variable_scope.get_variable(
            name=INPUT_NAME,
            initializer=random_ops.truncated_normal(z_shape),
            constraint=lambda x: clip_ops.clip_by_value(
                x, -input_clip, input_clip))

        generator = functools.partial(generator_fn, mode=mode)
        discriminator = functools.partial(discriminator_fn, mode=mode)
        gan_model = tfgan_train.gan_model(generator_fn=generator,
                                          discriminator_fn=discriminator,
                                          real_data=labels,
                                          generator_inputs=z,
                                          check_shapes=False)

        loss = loss_fn(gan_model, features, labels, add_summaries)

        # Use a variable scope to make sure that estimator variables dont cause
        # save/load problems when restoring from ckpts.
        with variable_scope.variable_scope(OPTIMIZER_NAME):
            opt = optimizer(learning_rate=params['learning_rate'],
                            **params['opt_kwargs'])
            train_op = opt.minimize(
                loss=loss,
                global_step=training_util.get_or_create_global_step(),
                var_list=[z])

        if add_summaries:
            z_grads = gradients_impl.gradients(loss, z)
            summary.scalar('z_loss/z_grads', clip_ops.global_norm(z_grads))
            summary.scalar('z_loss/loss', loss)

        return model_fn_lib.EstimatorSpec(mode=mode,
                                          predictions=gan_model.generated_data,
                                          loss=loss,
                                          train_op=train_op)
Example #44
0
 def _initializer(shape, dtype=_assert_float_dtype(dtype),
                  partition_info=None):
   scale = scale_
   scale_shape = shape
   if partition_info is not None:
     scale_shape = partition_info.full_shape
   fan_in, fan_out = _compute_fans(scale_shape)
   if mode == "fan_in":
     scale /= max(1., fan_in)
   elif mode == "fan_out":
     scale /= max(1., fan_out)
   else:
     scale /= max(1., (fan_in + fan_out) / 2.)
   if distribution == "normal":
     stddev = math.sqrt(scale)
     return random_ops.truncated_normal(shape, 0.0, stddev, dtype, seed=seed)
   else:
     limit = math.sqrt(3.0 * scale)
     return random_ops.random_uniform(shape, -limit, limit,
                                      dtype, seed=seed)
Example #45
0
def __call__for_keras_init_v1(self, shape, dtype=None, partition_info=None):
    """ Making keras VarianceScaling initializers v1 support dynamic shape.
  """
    if dtype is None:
        dtype = self.dtype
    scale = self.scale
    scale_shape = shape
    if partition_info is not None:
        scale_shape = partition_info.full_shape
    fan_in, fan_out = _compute_fans_for_keras_init_v1_v2(scale_shape)
    fan_in = math_ops.cast(fan_in, dtype=dtype)
    fan_out = math_ops.cast(fan_out, dtype=dtype)
    if self.mode == "fan_in":
        scale /= math_ops.maximum(1., fan_in)
    elif self.mode == "fan_out":
        scale /= math_ops.maximum(1., fan_out)
    else:
        scale /= math_ops.maximum(1., (fan_in + fan_out) / 2.)
    if self.distribution == "normal" or self.distribution == "truncated_normal":
        # constant taken from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.)
        stddev = math_ops.sqrt(scale) / .87962566103423978
        return random_ops.truncated_normal(shape,
                                           0.0,
                                           stddev,
                                           dtype,
                                           seed=self.seed)
    elif self.distribution == "untruncated_normal":
        stddev = math_ops.sqrt(scale)
        return random_ops.random_normal(shape,
                                        0.0,
                                        stddev,
                                        dtype,
                                        seed=self.seed)
    else:
        limit = math_ops.sqrt(3.0 * scale)
        return random_ops.random_uniform(shape,
                                         -limit,
                                         limit,
                                         dtype,
                                         seed=self.seed)
    def testSplitVWithNonConstAxis(self):
        if test.is_gpu_available(cuda_only=True):
            random_seed.set_random_seed(0)
            x = random_ops.truncated_normal([1, 784], seed=0)
            conv = _two_layer_model(x)
            dim = array_ops.placeholder(dtype='int32')
            sizes = constant_op.constant([50, 10, 4], shape=[3])
            split = gen_array_ops._split_v(value=conv,
                                           size_splits=sizes,
                                           axis=dim,
                                           num_split=3)
            output = math_ops.reduce_sum(split[0])

            with session.Session() as sess:
                output_val_ref = sess.run(output, feed_dict={dim: 3})

            with session.Session(config=_get_config()) as sess:
                metadata = config_pb2.RunMetadata()
                output_val = sess.run(output,
                                      run_metadata=metadata,
                                      feed_dict={dim: 3})

            nodes = []
            num_transposes = 0
            for node in metadata.cost_graph.node:
                if node.name.startswith('LayoutOptimizerTranspose'):
                    num_transposes += 1
                nodes.append(node.name)

            # Four transposes were initially added in the Expand phase of
            # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
            expected_num_transposes = 2
            self.assertEqual(expected_num_transposes, num_transposes)
            self.assertIn('LayoutOptimizerTransposeNHWCToNCHW-Conv2D-0', nodes)
            self.assertIn('LayoutOptimizerTransposeNCHWToNHWC-SplitV-0-0',
                          nodes)
            self.assertIn('LayoutOptimizerVecPermuteNHWCToNCHW_SplitV_2',
                          nodes)
            self.assertAllClose(output_val_ref, output_val, atol=1e-3)
def parameterized_vs_naive(shape, num_iters, use_gpu=False):
    np.random.seed(1618)  # Make it reproducible.

    # No CSE/CF.
    optimizer_options = tf.OptimizerOptions(opt_level=tf.OptimizerOptions.L0)
    config = tf.ConfigProto(graph_options=tf.GraphOptions(
        optimizer_options=optimizer_options))

    with tf.Session(config=config) as sess:
        with tf.device("/cpu:0" if not use_gpu else None):
            param_op = tf.group(
                random_ops.parameterized_truncated_normal(shape))
            naive_op = tf.group(random_ops.truncated_normal(shape))

        # Burn-in to avoid session setup costs in the timing.
        sess.run(param_op)
        sess.run(param_op)
        param_dt = timeit.timeit(lambda: sess.run(param_op), number=num_iters)
        sess.run(naive_op)
        sess.run(naive_op)
        naive_dt = timeit.timeit(lambda: sess.run(naive_op), number=num_iters)
        return param_dt, naive_dt
Example #48
0
 def __call__(self, shape, dtype=None, partition_info=None):
   if dtype is None:
     dtype = self.dtype
   scale = self.scale
   scale_shape = shape
   if partition_info is not None:
     scale_shape = partition_info.full_shape
   fan_in, fan_out = _compute_fans(scale_shape)
   if self.mode == "fan_in":
     scale /= max(1., fan_in)
   elif self.mode == "fan_out":
     scale /= max(1., fan_out)
   else:
     scale /= max(1., (fan_in + fan_out) / 2.)
   if self.distribution == "normal":
     stddev = math.sqrt(scale)
     return random_ops.truncated_normal(shape, 0.0, stddev,
                                        dtype, seed=self.seed)
   else:
     limit = math.sqrt(3.0 * scale)
     return random_ops.random_uniform(shape, -limit, limit,
                                      dtype, seed=self.seed)
Example #49
0
    def testGradient(self):
        with ops.Graph().as_default() as g:
            inputs = array_ops.placeholder(dtypes.float32,
                                           shape=[None, 100],
                                           name="input")
            weights = array_ops.placeholder(dtypes.float32,
                                            shape=[100, 10],
                                            name="weights")
            biases = array_ops.placeholder(dtypes.float32,
                                           shape=[10],
                                           name="biases")
            activations = nn_ops.relu(math_ops.matmul(inputs, weights) +
                                      biases,
                                      name="activations")
            loss = math_ops.reduce_mean(activations, name="loss")
        gdef = g.as_graph_def()

        with ops.Graph().as_default() as g:
            input_placeholder = array_ops.placeholder(dtypes.float32,
                                                      shape=[32, 100])
            weights_var = variables.Variable(random_ops.truncated_normal(
                [100, 10]),
                                             name="weights")
            biases_var = variables.Variable(array_ops.zeros([10]),
                                            name="biases")
            activations, loss = importer.import_graph_def(
                gdef,
                input_map={
                    "input:0": input_placeholder,
                    "weights:0": weights_var,
                    "biases:0": biases_var
                },
                return_elements=["activations:0", "loss:0"])
            self.assertEqual([32, 10], activations.get_shape())
            self.assertEqual([], loss.get_shape())
            weights_grad, biases_grad = gradients_impl.gradients(
                loss, [weights_var, biases_var])
            self.assertEqual([100, 10], weights_grad.get_shape())
            self.assertEqual([10], biases_grad.get_shape())
  def testMaxPoolGradV2(self):
    if test.is_gpu_available(cuda_only=True):
      random_seed.set_random_seed(0)
      x = random_ops.truncated_normal([1, 784], seed=0)
      conv = _two_layer_model(x)
      ksize = constant_op.constant([1, 2, 3, 1], shape=[4])
      strides = array_ops.placeholder(dtype='int32', shape=[4])
      max_pool_grad = gen_nn_ops.max_pool_grad_v2(conv, conv, conv, ksize,
                                                  strides, 'VALID')
      output = array_ops.identity(max_pool_grad)

      strides_val = [1, 3, 2, 1]
      with session.Session() as sess:
        output_val_ref = sess.run(output, feed_dict={strides: strides_val})

      with session.Session(config=_get_config()) as sess:
        metadata = config_pb2.RunMetadata()
        output_val = sess.run(
            output, run_metadata=metadata, feed_dict={
                strides: strides_val
            })

      nodes = []
      num_transposes = 0
      for node in metadata.cost_graph.node:
        if node.name.startswith('LayoutOptimizerTranspose'):
          num_transposes += 1
        nodes.append(node.name)

      expected_num_transposes = 2
      self.assertEqual(expected_num_transposes, num_transposes)
      self.assertIn('LayoutOptimizerTransposeNHWCToNCHW-Conv2D-0', nodes)
      self.assertIn('LayoutOptimizerTransposeNCHWToNHWC-MaxPoolGradV2-0-0',
                    nodes)
      self.assertIn('LayoutOptimizerVecPermuteNHWCToNCHW_MaxPoolGradV2_4',
                    nodes)
      self.assertIn('LayoutOptimizer-MaxPoolGradV2-Const_2', nodes)
      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
  def testStridedSliceWithNonConstAxis(self):
    if test.is_gpu_available(cuda_only=True):
      random_seed.set_random_seed(0)
      x = random_ops.truncated_normal([1, 784], seed=0)
      conv = _two_layer_model(x)
      end = array_ops.placeholder(dtype='int32')
      s = array_ops.strided_slice(conv, [0, 0, 0, 0], end, strides=[1, 2, 3, 1])
      output = array_ops.identity(s)

      end_val = [1, 2, 3, 4]
      with session.Session() as sess:
        output_val_ref = sess.run(output, feed_dict={end: end_val})

      with session.Session(config=_get_config()) as sess:
        metadata = config_pb2.RunMetadata()
        output_val = sess.run(
            output, run_metadata=metadata, feed_dict={
                end: end_val
            })

      nodes = []
      num_transposes = 0
      for node in metadata.cost_graph.node:
        if _is_transpose(node.name):
          num_transposes += 1
        nodes.append(node.name)

      # Four transposes were initially added in the Expand phase of
      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
      expected_num_transposes = 2
      self.assertEqual(expected_num_transposes, num_transposes)
      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
      self._assert_trans_nchw_to_nhwc('StridedSlice-0-0', nodes)
      self._assert_vec_nhwc_to_nchw('StridedSlice-2', nodes)
      self.assertIn('StridedSlice-1-LayoutOptimizer', nodes)
      self.assertIn('StridedSlice-3-LayoutOptimizer', nodes)
      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
Example #52
0
def sequence_softmax(inputs,
                     noutputs,
                     scope=None,
                     name=None,
                     linear_name=None):
    """Run a softmax layer over all time_steps of an input sequence

    Args:
        inputs: (seq_length, batch_size, depth) tensor
        noutputs: output_depth
        scope: optional scope name
        name: optional name for output tensor
        linear_name: optional name for linear (pre-softmax) output

    Returns:
        A tensor of size (seq_length, batch_size, noutputs)
    """
    seq_length, _, ninputs = _shape(inputs)
    inputs_u = array_ops.unstack(inputs)
    outputs_u = []
    with variable_scope.variable_scope(scope, "Sequential_Softmax", [inputs]):
        initial_w = random_ops.truncated_normal([0 + ninputs, noutputs],
                                                stddev=0.1)
        initial_b = constant_op.constant(0.1, shape=[noutputs])
        w = variables.model_variable("weights", initializer=initial_w)
        b = variables.model_variable("biases", initializer=initial_b)
        for i in xrange(seq_length):
            with variable_scope.variable_scope(scope, "Sequence_Softmax_Step",
                                               [inputs_u[i]]):
                linear = nn_ops.xw_plus_b_v1(inputs_u[i],
                                             w,
                                             b,
                                             name=linear_name)
                output = nn_ops.softmax(linear)
                outputs_u += [output]
        outputs = array_ops.stack(outputs_u, name=name)
    return outputs
Example #53
0
 def __call__(self, shape, dtype=None, partition_info=None):
     if dtype is None:
         dtype = self.dtype
     scale = self.scale
     scale_shape = shape
     if partition_info is not None:
         scale_shape = partition_info.full_shape
     fan_in, fan_out = _compute_fans(scale_shape)
     if self.mode == "fan_in":
         scale /= max(1., fan_in)
     elif self.mode == "fan_out":
         scale /= max(1., fan_out)
     else:
         scale /= max(1., (fan_in + fan_out) / 2.)
     if self.distribution == "normal" or self.distribution == "truncated_normal":
         # constant taken from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.)
         stddev = math.sqrt(scale) / .87962566103423978
         return random_ops.truncated_normal(shape,
                                            0.0,
                                            stddev,
                                            dtype,
                                            seed=self.seed)
     elif self.distribution == "untruncated_normal":
         stddev = math.sqrt(scale)
         return random_ops.random_normal(shape,
                                         0.0,
                                         stddev,
                                         dtype,
                                         seed=self.seed)
     else:
         limit = math.sqrt(3.0 * scale)
         return random_ops.random_uniform(shape,
                                          -limit,
                                          limit,
                                          dtype,
                                          seed=self.seed)
  def testStridedSliceWithMask(self):
    if test.is_gpu_available(cuda_only=True):
      random_seed.set_random_seed(0)
      x = random_ops.truncated_normal([1, 784], seed=0)
      conv = _two_layer_model(x)
      # This will generate a StridedSlice op with begin mask and end mask.
      s = conv[:, :, 1:-1, :]
      output = array_ops.identity(s)

      with session.Session() as sess:
        output_val_ref = sess.run(output)

      with session.Session(config=_get_config()) as sess:
        metadata = config_pb2.RunMetadata()
        output_val = sess.run(output, run_metadata=metadata)

      nodes = []
      num_transposes = 0
      for node in metadata.cost_graph.node:
        if node.name.startswith('LayoutOptimizerTranspose'):
          num_transposes += 1
        nodes.append(node.name)

      # Four transposes were initially added in the Expand phase of
      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
      expected_num_transposes = 2
      self.assertEqual(expected_num_transposes, num_transposes)
      self.assertIn('LayoutOptimizerTransposeNHWCToNCHW-Conv2D-0', nodes)
      self.assertIn('LayoutOptimizerTransposeNCHWToNHWC-strided_slice-0-0',
                    nodes)
      self.assertIn('LayoutOptimizer-strided_slice-strided_slice/stack', nodes)
      self.assertIn('LayoutOptimizer-strided_slice-strided_slice/stack_1',
                    nodes)
      self.assertIn('LayoutOptimizer-strided_slice-strided_slice/stack_2',
                    nodes)
      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
Example #55
0
    def _initializer(shape, dtype=dtype, partition_info=None):
        del partition_info

        if shape:
            fan_in = float(shape[-2]) if len(shape) > 1 else float(shape[-1])
            fan_out = float(shape[-1])
        else:
            fan_in = 1.
            fan_out = 1.

        for dim in shape[:-2]:
            fan_in *= float(dim)
            fan_out *= float(dim)

        if mode == 'FAN_IN':
            n = fan_in
        elif mode == 'FAN_OUT':
            n = fan_out
        else:  # mode == 'FAN_AVG':
            n = (fan_in + fan_out) / 2.

        if uniform:
            limit = math.sqrt(3.0 * factor / n)
            _init = random_ops.random_uniform(shape,
                                              -limit,
                                              limit,
                                              dtype,
                                              seed=seed)
        else:
            trunc_stddev = math.sqrt(1.3 * factor / n)
            _init = random_ops.truncated_normal(shape,
                                                0.,
                                                trunc_stddev,
                                                dtype,
                                                seed=seed)
        return _init * scale_factor
Example #56
0
 def __call__(self, shape, dtype=None, partition_info=None):
   if dtype is None:
     dtype = self.dtype
   return random_ops.truncated_normal(
       shape, self.mean, self.stddev, dtype, seed=self.seed)
 def testLargeShape(self):
     with self.test_session(use_gpu=True):
         v = variables.Variable(
             array_ops.zeros(dtype=dtypes.float32, shape=[2**33, 1]))
         n = random_ops.truncated_normal(v.shape)
         self.assertEqual([8589934592, 1], n.shape.as_list())
    def _testScopedExport(self, test_dir, exported_filenames):
        graph = ops.Graph()
        with graph.as_default():
            # Creates an inference graph.
            # Hidden 1
            colocate_constraint = constant_op.constant(1.2, name="constraint")
            images = constant_op.constant(1.2,
                                          dtypes.float32,
                                          shape=[100, 28],
                                          name="images")
            with ops.name_scope("hidden1"):
                with graph.colocate_with(colocate_constraint.op):
                    weights1 = variables.Variable(random_ops.truncated_normal(
                        [28, 128], stddev=1.0 / math.sqrt(float(28))),
                                                  name="weights")
                # The use of control_flow_ops.cond here is purely for adding test
                # coverage the save and restore of control flow context (which doesn't
                # make any sense here from a machine learning perspective).  The typical
                # biases is a simple Variable without the conditions.
                biases1 = variables.Variable(control_flow_ops.cond(
                    math_ops.less(random.random(),
                                  0.5), lambda: array_ops.ones([128]),
                    lambda: array_ops.zeros([128])),
                                             name="biases")
                hidden1 = nn_ops.relu(
                    math_ops.matmul(images, weights1) + biases1)

            # Hidden 2
            with ops.name_scope("hidden2"):
                weights2 = variables.Variable(random_ops.truncated_normal(
                    [128, 32], stddev=1.0 / math.sqrt(float(128))),
                                              name="weights")

                # The use of control_flow_ops.while_loop here is purely for adding test
                # coverage the save and restore of control flow context (which doesn't
                # make any sense here from a machine learning perspective).  The typical
                # biases is a simple Variable without the conditions.
                def loop_cond(it, _):
                    return it < 2

                def loop_body(it, biases2):
                    biases2 += constant_op.constant(0.1, shape=[32])
                    return it + 1, biases2

                _, biases2 = control_flow_ops.while_loop(
                    loop_cond, loop_body, [
                        constant_op.constant(0),
                        variables.Variable(array_ops.zeros([32]),
                                           name="biases")
                    ])
                hidden2 = nn_ops.relu(
                    math_ops.matmul(hidden1, weights2) + biases2)
            # Linear
            with ops.name_scope("softmax_linear"):
                weights3 = variables.Variable(random_ops.truncated_normal(
                    [32, 10], stddev=1.0 / math.sqrt(float(32))),
                                              name="weights")
                biases3 = variables.Variable(array_ops.zeros([10]),
                                             name="biases")
                logits = math_ops.matmul(hidden2, weights3) + biases3
                ops.add_to_collection("logits", logits)

            # Exports each sub-graph.
            # Exports the first one with unbound_inputs_col_name set to default.
            orig_meta_graph1, var_list = meta_graph.export_scoped_meta_graph(
                filename=os.path.join(test_dir, exported_filenames[0]),
                graph=ops.get_default_graph(),
                export_scope="hidden1")
            self.assertEqual(["biases:0", "weights:0"],
                             sorted(var_list.keys()))
            var_names = [v.name for _, v in var_list.items()]
            self.assertEqual(["hidden1/biases:0", "hidden1/weights:0"],
                             sorted(var_names))

            # Exports the rest with no unbound_inputs_col_name.
            orig_meta_graph2, _ = meta_graph.export_scoped_meta_graph(
                filename=os.path.join(test_dir, exported_filenames[1]),
                graph=ops.get_default_graph(),
                export_scope="hidden2",
                unbound_inputs_col_name=None)
            orig_meta_graph3, _ = meta_graph.export_scoped_meta_graph(
                filename=os.path.join(test_dir, exported_filenames[2]),
                graph=ops.get_default_graph(),
                export_scope="softmax_linear",
                unbound_inputs_col_name=None)

        return [orig_meta_graph1, orig_meta_graph2, orig_meta_graph3]
Example #59
0
    def __call__(self, shape, dtype=None, partition_info=None):
        if dtype is None:
            dtype = self.dtype

        v = random_ops.truncated_normal(shape, 0, 1.0, dtype, seed=self.seed)
        return unit(v, self.eps)
Example #60
0
def variance_scaling_initializer(shape,
                                 factor=2.0,
                                 mode='FAN_IN',
                                 uniform=False,
                                 seed=None,
                                 dtype=dtypes.float32,
                                 mask=None):
    """Returns an initializer that generates tensors without scaling variance.

    When initializing a deep network, it is in principle advantageous to keep
    the scale of the input variance constant, so it does not explode / diminish
    by reaching the final layer. This initializer use the following formula:

    ```python
    if mode='FAN_IN': # Count only number of input connections.
      n = fan_in
    elif mode='FAN_OUT': # Count only number of output connections.
      n = fan_out
    elif mode='FAN_AVG': # Average number of inputs and output connections.
      n = (fan_in + fan_out)/2.0
      truncated_normal(shape, 0.0, stddev=sqrt(factor / n))
    ```

    * To get [Delving Deep into Rectifiers](
     http://arxiv.org/pdf/1502.01852v1.pdf), use (Default):<br/>
    `factor=2.0 mode='FAN_IN' uniform=False`
    * To get [Convolutional Architecture for Fast Feature Embedding](
     http://arxiv.org/abs/1408.5093), use:<br/>
    `factor=1.0 mode='FAN_IN' uniform=True`
    * To get [Understanding the difficulty of training deep feedforward neural
    networks](http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf),
    use:<br/>
    `factor=1.0 mode='FAN_AVG' uniform=True.`
    * To get `xavier_initializer` use either:<br/>
    `factor=1.0 mode='FAN_AVG' uniform=True`, or<br/>
    `factor=1.0 mode='FAN_AVG' uniform=False`.

    Args:
    factor: Float.  A multiplicative factor.
    mode: String.  'FAN_IN', 'FAN_OUT', 'FAN_AVG'.
    uniform: Whether to use uniform or normal distributed random.
    seed: A Python integer. Used to create random seeds. See
          @{tf.set_random_seed} for behavior.
    dtype: The data type. Only floating point types are supported.

    Returns:
    An initializer that generates tensors with unit variance.

    Raises:
    ValueError: if `dtype` is not a floating point type.
    TypeError: if `mode` is not in ['FAN_IN', 'FAN_OUT', 'FAN_AVG'].
    """
    if not dtype.is_floating:
        raise TypeError(
            'Cannot create initializer for non-floating point type.')
    if mode not in ['FAN_IN', 'FAN_OUT', 'FAN_AVG']:
        raise TypeError('Unknow mode %s [FAN_IN, FAN_OUT, FAN_AVG]', mode)

    if not dtype.is_floating:
        raise TypeError(
            'Cannot create initializer for non-floating point type.')

    # Estimating fan_in and fan_out is not perfect, but we try.
    # This is the right thing for matrix multiply and convolutions.
    if shape:
        fan_in = float(shape[-2]) if len(shape) > 1 else float(shape[-1])
        fan_out = float(shape[-1])
    else:
        fan_in = 1.0
        fan_out = 1.0
    for dim in shape[:-2]:
        fan_in *= float(dim)
        fan_out *= float(dim)
    if mode == 'FAN_IN':
        # Count only number of input connections.
        n = fan_in
    elif mode == 'FAN_OUT':
        # Count only number of output connections.
        n = fan_out
    elif mode == 'FAN_AVG':
        # Average number of inputs and output connections.
        n = (fan_in + fan_out) / 2.0
    if uniform:
        # To get stddev = math.sqrt(factor / n) need to adjust for uniform.
        limit = math.sqrt(3.0 * factor / n)
        init = random_ops.random_uniform(shape,
                                         -limit,
                                         limit,
                                         dtype,
                                         seed=seed)
    else:
        # To get stddev = math.sqrt(factor / n) adjust for truncated.
        trunc_stddev = math.sqrt(1.3 * factor / n)
        init = random_ops.truncated_normal(shape,
                                           0.0,
                                           trunc_stddev,
                                           dtype,
                                           seed=seed)
    if mask is not None:
        return mask * init
    else:
        return init