Example #1
0
  def testSelectEverthingDetail(self):
    ops.reset_default_graph()
    dev = '/gpu:0' if test.is_gpu_available() else '/cpu:0'
    outfile = os.path.join(test.get_temp_dir(), 'dump')
    opts = (builder(builder.trainable_variables_parameter())
            .with_file_output(outfile)
            .with_accounted_types(['.*'])
            .select(['micros', 'bytes', 'params', 'float_ops', 'occurrence',
                     'device', 'op_types', 'input_shapes']).build())

    config = config_pb2.ConfigProto()
    with session.Session(config=config) as sess, ops.device(dev):
      x = lib.BuildSmallModel()

      sess.run(variables.global_variables_initializer())
      run_meta = config_pb2.RunMetadata()
      _ = sess.run(x,
                   options=config_pb2.RunOptions(
                       trace_level=config_pb2.RunOptions.FULL_TRACE),
                   run_metadata=run_meta)

      model_analyzer.profile(
          sess.graph, run_meta, options=opts)

      with gfile.Open(outfile, 'r') as f:
        # pylint: disable=line-too-long
        outputs = f.read().split('\n')

        self.assertEqual(outputs[0],
                         'node name | # parameters | # float_ops | requested bytes | total execution time | accelerator execution time | cpu execution time | assigned devices | op types | op count (run|defined) | input shapes')
        for o in outputs[1:]:
          if o.find('Conv2D ') > 0:
            metrics = o[o.find('(') +1: o.find(')')].split(',')
            # Make sure time is profiled.
            gap = 1 if test.is_gpu_available() else 2
            for i in range(3, 6, gap):
              mat = re.search('(.*)[um]s/(.*)[um]s', metrics[i])
              self.assertGreater(float(mat.group(1)), 0.0)
              self.assertGreater(float(mat.group(2)), 0.0)
            # Make sure device is profiled.
            if test.is_gpu_available():
              self.assertTrue(metrics[6].find('gpu') > 0)
              self.assertFalse(metrics[6].find('cpu') > 0)
            else:
              self.assertFalse(metrics[6].find('gpu') > 0)
              self.assertTrue(metrics[6].find('cpu') > 0)
            # Make sure float_ops is profiled.
            mat = re.search('(.*)k/(.*)k flops', metrics[1].strip())
            self.assertGreater(float(mat.group(1)), 0.0)
            self.assertGreater(float(mat.group(2)), 0.0)
            # Make sure op_count is profiled.
            self.assertEqual(metrics[8].strip(), '1/1|1/1')
            # Make sure input_shapes is profiled.
            self.assertEqual(metrics[9].strip(), '0:2x6x6x3|1:3x3x3x6')

          if o.find('DW (3x3x3x6') > 0:
            metrics = o[o.find('(') +1: o.find(')')].split(',')
            mat = re.search('(.*)/(.*) params', metrics[1].strip())
            self.assertGreater(float(mat.group(1)), 0.0)
            self.assertGreater(float(mat.group(2)), 0.0)
  def testTraining(self):
    x_shape = [1, 1, 6, 1]
    for dtype in [np.float16, np.float32]:
      if test.is_gpu_available(cuda_only=True):
        self._test_training(
            x_shape, dtype, [1], np.float32, use_gpu=True, data_format='NHWC')
        self._test_training(
            x_shape, dtype, [1], np.float32, use_gpu=True, data_format='NCHW')
      self._test_training(
          x_shape, dtype, [1], np.float32, use_gpu=False, data_format='NHWC')

    x_shape = [1, 1, 6, 2]
    for dtype in [np.float16, np.float32]:
      if test.is_gpu_available(cuda_only=True):
        self._test_training(
            x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NHWC')
      self._test_training(
          x_shape, dtype, [2], np.float32, use_gpu=False, data_format='NHWC')

    x_shape = [1, 2, 1, 6]
    if test.is_gpu_available(cuda_only=True):
      for dtype in [np.float16, np.float32]:
        self._test_training(
            x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NCHW')

    x_shape = [27, 131, 127, 6]
    for dtype in [np.float16, np.float32]:
      if test.is_gpu_available(cuda_only=True):
        self._test_training(
            x_shape, dtype, [131], np.float32, use_gpu=True, data_format='NCHW')
        self._test_training(
            x_shape, dtype, [6], np.float32, use_gpu=True, data_format='NHWC')
      self._test_training(
          x_shape, dtype, [6], np.float32, use_gpu=False, data_format='NHWC')
  def testBatchNormGrad(self):
    for is_training in [True, False]:
      x_shape = [1, 1, 6, 1]
      if test.is_gpu_available(cuda_only=True):
        self._test_gradient(
            x_shape, [1],
            use_gpu=True,
            data_format='NHWC',
            is_training=is_training)
        self._test_gradient(
            x_shape, [1],
            use_gpu=True,
            data_format='NCHW',
            is_training=is_training)
      self._test_gradient(
          x_shape, [1],
          use_gpu=False,
          data_format='NHWC',
          is_training=is_training)

      x_shape = [1, 1, 6, 2]
      if test.is_gpu_available(cuda_only=True):
        self._test_gradient(
            x_shape, [2],
            use_gpu=True,
            data_format='NHWC',
            is_training=is_training)
      self._test_gradient(
          x_shape, [2],
          use_gpu=False,
          data_format='NHWC',
          is_training=is_training)

      x_shape = [1, 2, 1, 6]
      if test.is_gpu_available(cuda_only=True):
        self._test_gradient(
            x_shape, [2],
            use_gpu=True,
            data_format='NCHW',
            is_training=is_training)

      x_shape = [7, 9, 13, 6]
      if test.is_gpu_available(cuda_only=True):
        self._test_gradient(
            x_shape, [9],
            use_gpu=True,
            data_format='NCHW',
            is_training=is_training)
        self._test_gradient(
            x_shape, [6],
            use_gpu=True,
            data_format='NHWC',
            is_training=is_training)
      self._test_gradient(
          x_shape, [6],
          use_gpu=False,
          data_format='NHWC',
          is_training=is_training)
  def testMaxPoolV2(self):
    if test.is_gpu_available(cuda_only=True):
      random_seed.set_random_seed(0)
      x = random_ops.truncated_normal([1, 784], seed=0)
      conv = _two_layer_model(x)
      ksize = constant_op.constant([1, 2, 3, 1], shape=[4])
      strides = array_ops.placeholder(dtype='int32', shape=[4])
      max_pool = gen_nn_ops._max_pool_v2(conv, ksize, strides, 'VALID')
      output = array_ops.identity(max_pool)

      strides_val = [1, 3, 2, 1]
      with session.Session() as sess:
        output_val_ref = sess.run(output, feed_dict={strides: strides_val})

      with session.Session(config=_get_config()) as sess:
        metadata = config_pb2.RunMetadata()
        output_val = sess.run(
            output, run_metadata=metadata, feed_dict={
                strides: strides_val
            })

      nodes = []
      num_transposes = 0
      for node in metadata.cost_graph.node:
        if _is_transpose(node.name):
          num_transposes += 1
        nodes.append(node.name)

      expected_num_transposes = 2
      self.assertEqual(expected_num_transposes, num_transposes)
      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
      self._assert_trans_nchw_to_nhwc('MaxPoolV2-0-0', nodes)
      self._assert_vec_nhwc_to_nchw('MaxPoolV2-2', nodes)
      self.assertIn('MaxPoolV2-1-LayoutOptimizer', nodes)
      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
  def benchmarkMatrixExponentialOp(self):
    for shape in self.shapes:
      with ops.Graph().as_default(), \
          session.Session() as sess, \
          ops.device("/cpu:0"):
        matrix = self._GenerateMatrix(shape)
        expm = linalg_impl.matrix_exponential(matrix)
        variables.global_variables_initializer().run()
        self.run_op_benchmark(
            sess,
            control_flow_ops.group(expm),
            min_iters=25,
            name="matrix_exponential_cpu_{shape}".format(
                shape=shape))

      if test.is_gpu_available(True):
        with ops.Graph().as_default(), \
            session.Session() as sess, \
            ops.device("/gpu:0"):
          matrix = self._GenerateMatrix(shape)
          expm = linalg_impl.matrix_exponential(matrix)
          variables.global_variables_initializer().run()
          self.run_op_benchmark(
              sess,
              control_flow_ops.group(expm),
              min_iters=25,
              name="matrix_exponential_gpu_{shape}".format(
                  shape=shape))
  def testStridedSliceWithMask1011(self):
    if test.is_gpu_available(cuda_only=True):
      random_seed.set_random_seed(0)
      x = random_ops.truncated_normal([1, 784], seed=0)
      conv = _two_layer_model(x)
      # This will generate a StridedSlice op with begin mask and
      # end mask 11(1011).
      s = conv[:, :, 1:-1, :]
      output = array_ops.identity(s)

      with session.Session() as sess:
        output_val_ref = sess.run(output)

      with session.Session(config=_get_config()) as sess:
        metadata = config_pb2.RunMetadata()
        output_val = sess.run(output, run_metadata=metadata)

      nodes = []
      num_transposes = 0
      for node in metadata.cost_graph.node:
        if _is_transpose(node.name):
          num_transposes += 1
        nodes.append(node.name)

      # Four transposes were initially added in the Expand phase of
      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
      expected_num_transposes = 2
      self.assertEqual(expected_num_transposes, num_transposes)
      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
      self._assert_trans_nchw_to_nhwc('strided_slice-0-0', nodes)
      self.assertIn('strided_slice-1-LayoutOptimizer', nodes)
      self.assertIn('strided_slice-2-LayoutOptimizer', nodes)
      self.assertIn('strided_slice-3-LayoutOptimizer', nodes)
      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
Example #7
0
  def testLoopGPU(self):
    if not test.is_gpu_available():
      return

    ops.reset_default_graph()
    with ops.device('/gpu:0'):
      tfprof_node, run_meta = _run_loop_model()
      # The while-loop caused a node to appear 4 times in scheduling.
      ret = _extract_node(run_meta,
                          'rnn/while/rnn/basic_rnn_cell/basic_rnn_cell/MatMul')
      self.assertEqual(len(ret['/job:localhost/replica:0/task:0/gpu:0']), 4)

      total_cpu_execs = 0
      for node in ret['/job:localhost/replica:0/task:0/gpu:0']:
        total_cpu_execs += node.op_end_rel_micros

      ret = _extract_node(
          run_meta,
          'rnn/while/rnn/basic_rnn_cell/basic_rnn_cell/MatMul:MatMul')
      self.assertGreaterEqual(len(ret['/gpu:0/stream:all']), 4)

      total_accelerator_execs = 0
      for node in ret['/gpu:0/stream:all']:
        total_accelerator_execs += node.op_end_rel_micros

      mm_node = lib.SearchTFProfNode(
          tfprof_node,
          'rnn/while/rnn/basic_rnn_cell/basic_rnn_cell/MatMul')

      self.assertEqual(mm_node.run_count, 4)
      self.assertEqual(mm_node.accelerator_exec_micros, total_accelerator_execs)
      self.assertEqual(mm_node.cpu_exec_micros, total_cpu_execs)
      self.assertEqual(mm_node.exec_micros,
                       total_cpu_execs + total_accelerator_execs)
  def testConcatLargeNumberOfTensors(self):
    with self.session(use_gpu=True):
      for concat_dim in range(2):
        params = {}
        p = []
        shape = np.array([7, 13])
        if test.is_gpu_available():
          num_tensors = 5000
        else:
          num_tensors = 500
        for i in np.arange(num_tensors):
          input_shape = shape
          placeholder = array_ops.placeholder(dtypes.float32, shape=input_shape)
          p.append(placeholder)

          params[placeholder] = np.random.rand(*input_shape).astype(np.float32)

        concat_inputs = p
        c = array_ops.concat(concat_inputs, concat_dim)
        result = c.eval(feed_dict=params)

        self.assertEqual(result.shape, c.get_shape())
        cur_offset = 0

        for i in np.arange(num_tensors):
          # The index into the result is the ':' along all dimensions
          # except the concat_dim. slice(0, size) is used for ':', and
          # a list of slices is used to index into result.
          index = [slice(0, params[p[i]].shape[j]) for j in np.arange(2)]
          index[concat_dim] = slice(cur_offset,
                                    cur_offset + params[p[i]].shape[concat_dim])
          cur_offset += params[p[i]].shape[concat_dim]
          self.assertAllEqual(result[index], params[p[i]])
 def testGradientDilatedConv(self):
   if test.is_gpu_available(cuda_only=True):
     with self.test_session(use_gpu=True):
       for padding in ["SAME", "VALID"]:
         for stride in [1, 2]:
           np.random.seed(1)
           in_shape = [5, 8, 6, 4]
           in_val = constant_op.constant(
               2 * np.random.random_sample(in_shape) - 1, dtype=dtypes.float32)
           filter_shape = [3, 3, 4, 6]
           # Make a convolution op with the current settings,
           # just to easily get the shape of the output.
           conv_out = nn_ops.conv2d(
               in_val,
               array_ops.zeros(filter_shape),
               dilations=[1, 2, 2, 1],
               strides=[1, stride, stride, 1],
               padding=padding)
           out_backprop_shape = conv_out.get_shape().as_list()
           out_backprop_val = constant_op.constant(
               2 * np.random.random_sample(out_backprop_shape) - 1,
               dtype=dtypes.float32)
           output = nn_ops.conv2d_backprop_filter(
               in_val,
               filter_shape,
               out_backprop_val,
               dilations=[1, 2, 2, 1],
               strides=[1, stride, stride, 1],
               padding=padding)
           err = gradient_checker.compute_gradient_error(
               [in_val, out_backprop_val], [in_shape, out_backprop_shape],
               output, filter_shape)
           print("conv2d_backprop_filter gradient err = %g " % err)
           err_tolerance = 2e-3
           self.assertLess(err, err_tolerance)
Example #10
0
  def testDeviceWrapperDynamicExecutionNodesAreAllProperlyLocated(self):
    if not test.is_gpu_available():
      # Can't perform this test w/o a GPU
      return

    with self.test_session(use_gpu=True) as sess:
      with variable_scope.variable_scope(
          "root", initializer=init_ops.constant_initializer(0.5)):
        x = array_ops.zeros([1, 1, 3])
        cell = rnn_cell_impl.DeviceWrapper(rnn_cell_impl.GRUCell(3), "/gpu:0")
        with ops.device("/cpu:0"):
          outputs, _ = rnn.dynamic_rnn(
              cell=cell, inputs=x, dtype=dtypes.float32)
        run_metadata = config_pb2.RunMetadata()
        opts = config_pb2.RunOptions(
            trace_level=config_pb2.RunOptions.FULL_TRACE)

        sess.run([variables_lib.global_variables_initializer()])
        _ = sess.run(outputs, options=opts, run_metadata=run_metadata)

      step_stats = run_metadata.step_stats
      ix = 0 if "gpu" in step_stats.dev_stats[0].device else 1
      gpu_stats = step_stats.dev_stats[ix].node_stats
      cpu_stats = step_stats.dev_stats[1 - ix].node_stats
      self.assertFalse([s for s in cpu_stats if "gru_cell" in s.node_name])
      self.assertTrue([s for s in gpu_stats if "gru_cell" in s.node_name])
  def benchmarkCholeskyOp(self):
    for shape in self.shapes:
      with ops.Graph().as_default(), \
          session.Session() as sess, \
          ops.device("/cpu:0"):
        matrix = variables.Variable(self._GenerateMatrix(shape))
        l = linalg_ops.cholesky(matrix)
        variables.global_variables_initializer().run()
        self.run_op_benchmark(
            sess,
            control_flow_ops.group(
                l,),
            min_iters=25,
            name="cholesky_cpu_{shape}".format(shape=shape))

      if test.is_gpu_available(True):
        with ops.Graph().as_default(), \
            session.Session() as sess, \
            ops.device("/device:GPU:0"):
          matrix = variables.Variable(self._GenerateMatrix(shape))
          l = linalg_ops.cholesky(matrix)
          variables.global_variables_initializer().run()
          self.run_op_benchmark(
              sess,
              control_flow_ops.group(
                  l,),
              min_iters=25,
              name="cholesky_gpu_{shape}".format(shape=shape))
  def setUp(self):
    self._dump_root = tempfile.mkdtemp()

    if test.is_gpu_available():
      self._expected_partition_graph_count = 2
    else:
      self._expected_partition_graph_count = 1
 def _testBatchNormGradGrad(self, config):
   shape = config['shape']
   err_tolerance = config['err_tolerance']
   dtype = config['dtype']
   for is_training in [True, False]:
     if test.is_gpu_available(cuda_only=True):
       self._test_grad_grad(
           shape,
           dtype, [shape[3]],
           np.float32,
           use_gpu=True,
           data_format='NHWC',
           is_training=is_training,
           err_tolerance=err_tolerance)
       self._test_grad_grad(
           shape,
           dtype, [shape[1]],
           np.float32,
           use_gpu=True,
           data_format='NCHW',
           is_training=is_training,
           err_tolerance=err_tolerance)
     self._test_grad_grad(
         shape,
         dtype, [shape[3]],
         np.float32,
         use_gpu=False,
         data_format='NHWC',
         is_training=is_training,
         err_tolerance=err_tolerance)
  def test_convolution_2d(self):
    num_samples = 2
    filters = 2
    stack_size = 3
    kernel_size = (3, 2)
    num_row = 7
    num_col = 6

    for padding in ['valid', 'same']:
      for strides in [(1, 1), (2, 2)]:
        if padding == 'same' and strides != (1, 1):
          continue

        with self.test_session(use_gpu=True):
          # Only runs on GPU with CUDA, channels_first is not supported on CPU.
          # TODO(b/62340061): Support channels_first on CPU.
          if test.is_gpu_available(cuda_only=True):
            testing_utils.layer_test(
                keras.layers.Conv2D,
                kwargs={
                    'filters': filters,
                    'kernel_size': kernel_size,
                    'padding': padding,
                    'strides': strides,
                    'data_format': 'channels_first'
                },
                input_shape=(num_samples, stack_size, num_row, num_col))
  def testAllocationHistory(self):
    if not test.is_gpu_available(cuda_only=True):
      return

    gpu_dev = test.gpu_device_name()
    ops.reset_default_graph()
    with ops.device(gpu_dev):
      _, run_meta = _run_model()

    mm = _extract_node(run_meta, 'MatMul')['gpu:0'][0]
    mm_allocs = mm.memory[0].allocation_records
    # has allocation and deallocation.
    self.assertEqual(len(mm_allocs), 2)
    # first allocated.
    self.assertGreater(mm_allocs[1].alloc_micros, mm_allocs[0].alloc_micros)
    self.assertGreater(mm_allocs[0].alloc_bytes, 0)
    # Then deallocated.
    self.assertLess(mm_allocs[1].alloc_bytes, 0)
    # All memory deallocated.
    self.assertEqual(mm_allocs[0].alloc_bytes + mm_allocs[1].alloc_bytes, 0)

    rand = _extract_node(
        run_meta, 'random_normal/RandomStandardNormal')['gpu:0'][0]
    random_allocs = rand.memory[0].allocation_records
    # random normal must allocated first since matmul depends on it.
    self.assertLess(random_allocs[0].alloc_micros, mm.all_start_micros)
    # deallocates the memory after matmul started.
    self.assertGreater(random_allocs[1].alloc_micros, mm.all_start_micros)
  def testSliceWithNonConstAxis(self):
    if test.is_gpu_available(cuda_only=True):
      random_seed.set_random_seed(0)
      x = random_ops.truncated_normal([1, 784], seed=0)
      conv = _two_layer_model(x)
      size = array_ops.placeholder(dtype='int32')
      s = array_ops.slice(conv, [0, 0, 0, 0], size)
      output = array_ops.identity(s)

      size_val = [1, 2, 3, 4]
      with session.Session() as sess:
        output_val_ref = sess.run(output, feed_dict={size: size_val})

      with session.Session(config=_get_config()) as sess:
        metadata = config_pb2.RunMetadata()
        output_val = sess.run(
            output, run_metadata=metadata, feed_dict={
                size: size_val
            })

      nodes = []
      num_transposes = 0
      for node in metadata.cost_graph.node:
        if _is_transpose(node.name):
          num_transposes += 1
        nodes.append(node.name)

      # Four transposes were initially added in the Expand phase of
      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
      expected_num_transposes = 2
      self.assertEqual(expected_num_transposes, num_transposes)
      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
      self._assert_trans_nchw_to_nhwc('Slice-0-0', nodes)
      self._assert_vec_nhwc_to_nchw('Slice-2', nodes)
      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
Example #17
0
  def testSkipEagerIteratorGetNextAsOptional(self, np_value, tf_value_fn,
                                             works_on_gpu):
    if not works_on_gpu and test.is_gpu_available():
      self.skipTest("Test case not yet supported on GPU.")
    ds = dataset_ops.Dataset.from_tensors(np_value).repeat(3)
    iterator = ds.make_initializable_iterator()
    next_elem = iterator_ops.get_next_as_optional(iterator)
    self.assertIsInstance(next_elem, optional_ops.Optional)
    self.assertTrue(
        next_elem.value_structure.is_compatible_with(
            structure.Structure.from_value(tf_value_fn())))
    elem_has_value_t = next_elem.has_value()
    elem_value_t = next_elem.get_value()
    with self.cached_session() as sess:
      # Before initializing the iterator, evaluating the optional fails with
      # a FailedPreconditionError.
      with self.assertRaises(errors.FailedPreconditionError):
        sess.run(elem_has_value_t)
      with self.assertRaises(errors.FailedPreconditionError):
        sess.run(elem_value_t)

      # For each element of the dataset, assert that the optional evaluates to
      # the expected value.
      sess.run(iterator.initializer)
      for _ in range(3):
        elem_has_value, elem_value = sess.run([elem_has_value_t, elem_value_t])
        self.assertTrue(elem_has_value)
        self._assertElementValueEqual(np_value, elem_value)

      # After exhausting the iterator, `next_elem.has_value()` will evaluate to
      # false, and attempting to get the value will fail.
      for _ in range(2):
        self.assertFalse(sess.run(elem_has_value_t))
        with self.assertRaises(errors.InvalidArgumentError):
          sess.run(elem_value_t)
Example #18
0
  def benchmarkSamplingMVNDiag(self):
    logging.vlog(
        2, "mvn_diag\tuse_gpu\tcomponents\tbatch\tfeatures\tsample\twall_time")

    def create_distribution(batch_size, num_components, num_features):
      cat = ds.Categorical(
          logits=np.random.randn(batch_size, num_components))
      mus = [
          variables.Variable(np.random.randn(batch_size, num_features))
          for _ in range(num_components)
      ]
      sigmas = [
          variables.Variable(np.random.rand(batch_size, num_features))
          for _ in range(num_components)
      ]
      components = list(
          ds.MultivariateNormalDiag(
              loc=mu, scale_diag=sigma) for (mu, sigma) in zip(mus, sigmas))
      return ds.Mixture(cat, components, use_static_graph=self.use_static_graph)

    for use_gpu in False, True:
      if use_gpu and not test.is_gpu_available():
        continue
      for num_components in 1, 8, 16:
        for batch_size in 1, 32:
          for num_features in 1, 64, 512:
            for sample_size in 1, 32, 128:
              self._runSamplingBenchmark(
                  "mvn_diag",
                  create_distribution=create_distribution,
                  use_gpu=use_gpu,
                  num_components=num_components,
                  batch_size=batch_size,
                  num_features=num_features,
                  sample_size=sample_size)
  def testGradient(self):
    if not test.is_gpu_available(cuda_only=True):
      self.skipTest('GPU required')

    random_seed.set_random_seed(0)
    x = random_ops.truncated_normal([1, 200, 200, 3], seed=0)
    y = conv_layers.conv2d(x, 32, [3, 3])
    z = conv_layers.conv2d(y, 32, [3, 3])
    optimizer = gradient_descent.GradientDescentOptimizer(1e-4)
    loss = math_ops.reduce_mean(z)
    train_op = optimizer.minimize(loss)
    graph = ops.get_default_graph()
    graph.add_to_collection('train_op', train_op)
    meta_graph = saver_lib.export_meta_graph(graph_def=graph.as_graph_def())

    rewrite_options = rewriter_config_pb2.RewriterConfig(
        optimize_tensor_layout=True)
    optimized_graph = tf_optimizer.OptimizeGraph(rewrite_options, meta_graph)

    found = 0
    for node in optimized_graph.node:
      if node.op in ['Conv2D', 'Conv2DBackpropFilter', 'Conv2DBackpropInput']:
        found += 1
        self.assertEqual(node.attr['data_format'].s, 'NCHW')
    self.assertEqual(found, 5)
  def testSendingLargeGraphDefsWorks(self):
    with self.test_session(
        use_gpu=True,
        config=session_debug_testlib.no_rewrite_session_config()) as sess:
      u = variables.Variable(42.0, name="original_u")
      for _ in xrange(50 * 1000):
        u = array_ops.identity(u)
      sess.run(variables.global_variables_initializer())

      def watch_fn(fetches, feeds):
        del fetches, feeds
        return framework.WatchOptions(
            debug_ops=["DebugIdentity"],
            node_name_regex_whitelist=r"original_u")
      sess = grpc_wrapper.GrpcDebugWrapperSession(
          sess, "localhost:%d" % self.debug_server_port, watch_fn=watch_fn)
      self.assertAllClose(42.0, sess.run(u))

      self.assertAllClose(
          [42.0],
          self.debug_server.debug_tensor_values["original_u:0:DebugIdentity"])
      self.assertEqual(2 if test.is_gpu_available() else 1,
                       len(self.debug_server.partition_graph_defs))
      max_graph_def_size = max([
          len(graph_def.SerializeToString())
          for graph_def in self.debug_server.partition_graph_defs])
      self.assertGreater(max_graph_def_size, 4 * 1024 * 1024)
  def test_specify_initial_state_keras_tensor(self, layer_class):
    if test.is_gpu_available(cuda_only=True):
      with self.test_session(use_gpu=True):
        input_size = 10
        timesteps = 6
        units = 2
        num_samples = 32
        num_states = 2 if layer_class is keras.layers.CuDNNLSTM else 1

        inputs = keras.Input((timesteps, input_size))
        initial_state = [keras.Input((units,)) for _ in range(num_states)]
        layer = layer_class(units)
        if len(initial_state) == 1:
          output = layer(inputs, initial_state=initial_state[0])
        else:
          output = layer(inputs, initial_state=initial_state)
        self.assertIn(initial_state[0], layer._inbound_nodes[0].input_tensors)

        model = keras.models.Model([inputs] + initial_state, output)
        model.compile(loss='categorical_crossentropy', optimizer='adam')

        inputs = np.random.random((num_samples, timesteps, input_size))
        initial_state = [
            np.random.random((num_samples, units)) for _ in range(num_states)
        ]
        targets = np.random.random((num_samples, units))
        model.fit([inputs] + initial_state, targets)
  def benchmarkMatrixBandPartOp(self):
    for shape_ in self.shapes:
      for limits in (-1, -1), (-1, 0), (0, -1), (2, 2):
        with ops.Graph().as_default(), \
            session.Session() as sess, \
            ops.device("/cpu:0"):
          matrix = variables.Variable(array_ops.ones(shape_))
          band = array_ops.matrix_band_part(matrix, limits[0], limits[1])
          variables.global_variables_initializer().run()
          self.run_op_benchmark(
              sess,
              control_flow_ops.group(band),
              min_iters=10,
              name="matrix_band_part_cpu_{shape}_{limits}".format(
                  shape=shape_, limits=limits))

        if test_lib.is_gpu_available(True):
          with ops.Graph().as_default(), \
              session.Session() as sess, \
              ops.device("/gpu:0"):
            matrix = variables.Variable(array_ops.ones(shape_))
            band = array_ops.matrix_band_part(matrix, limits[0], limits[1])
            variables.global_variables_initializer().run()
            self.run_op_benchmark(
                sess,
                control_flow_ops.group(band),
                min_iters=10,
                name="matrix_band_part_gpu_{shape}_{limits}".format(
                    shape=shape_, limits=limits))
 def test_cudnn_rnn_basics(self):
   if test.is_gpu_available(cuda_only=True):
     with self.test_session(use_gpu=True):
       input_size = 10
       timesteps = 6
       units = 2
       num_samples = 32
       for layer_class in [keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM]:
         for return_sequences in [True, False]:
           with keras.utils.CustomObjectScope(
               {'keras.layers.CuDNNGRU': keras.layers.CuDNNGRU,
                'keras.layers.CuDNNLSTM': keras.layers.CuDNNLSTM}):
             testing_utils.layer_test(
                 layer_class,
                 kwargs={'units': units,
                         'return_sequences': return_sequences},
                 input_shape=(num_samples, timesteps, input_size))
         for go_backwards in [True, False]:
           with keras.utils.CustomObjectScope(
               {'keras.layers.CuDNNGRU': keras.layers.CuDNNGRU,
                'keras.layers.CuDNNLSTM': keras.layers.CuDNNLSTM}):
             testing_utils.layer_test(
                 layer_class,
                 kwargs={'units': units,
                         'go_backwards': go_backwards},
                 input_shape=(num_samples, timesteps, input_size))
  def test_regularizer(self, layer_class):
    if test.is_gpu_available(cuda_only=True):
      with self.test_session(use_gpu=True):
        input_size = 10
        timesteps = 6
        units = 2
        num_samples = 32
        layer = layer_class(
            units,
            return_sequences=False,
            input_shape=(timesteps, input_size),
            kernel_regularizer=keras.regularizers.l1(0.01),
            recurrent_regularizer=keras.regularizers.l1(0.01),
            bias_regularizer='l2')
        layer.build((None, None, input_size))
        self.assertEqual(len(layer.losses), 3)

        layer = layer_class(
            units,
            return_sequences=False,
            input_shape=(timesteps, input_size),
            activity_regularizer='l2')
        self.assertTrue(layer.activity_regularizer)
        x = keras.backend.variable(
            np.ones((num_samples, timesteps, input_size)))
        layer(x)
        self.assertEqual(len(layer.get_losses_for(x)), 1)
Example #25
0
  def testTimelineGpu(self):
    if not test.is_gpu_available(cuda_only=True):
      return

    run_options = config_pb2.RunOptions(
        trace_level=config_pb2.RunOptions.FULL_TRACE)
    run_metadata = config_pb2.RunMetadata()

    with self.session(force_gpu=True) as sess:
      const1 = constant_op.constant(1.0, name='const1')
      const2 = constant_op.constant(2.0, name='const2')
      result = math_ops.add(const1, const2) + const1 * const2
      sess.run(result, options=run_options, run_metadata=run_metadata)
    self.assertTrue(run_metadata.HasField('step_stats'))
    step_stats = run_metadata.step_stats
    devices = [d.device for d in step_stats.dev_stats]
    self.assertTrue('/job:localhost/replica:0/task:0/device:GPU:0' in devices)
    self.assertTrue('/device:GPU:0/stream:all' in devices)
    tl = timeline.Timeline(step_stats)
    ctf = tl.generate_chrome_trace_format()
    self._validateTrace(ctf)
    tl = timeline.Timeline(step_stats)
    ctf = tl.generate_chrome_trace_format(show_dataflow=False)
    self._validateTrace(ctf)
    tl = timeline.Timeline(step_stats)
    ctf = tl.generate_chrome_trace_format(show_memory=False)
    self._validateTrace(ctf)
    tl = timeline.Timeline(step_stats)
    ctf = tl.generate_chrome_trace_format(
        show_memory=False, show_dataflow=False)
    self._validateTrace(ctf)
  def benchmarkMatrixSolveLsOp(self):
    run_gpu_test = test_lib.is_gpu_available(True)
    regularizer = 1.0
    for matrix_shape in self.matrix_shapes:
      for num_rhs in 1, 2, matrix_shape[-1]:

        with ops.Graph().as_default(), \
            session.Session(config=benchmark.benchmark_config()) as sess, \
            ops.device("/cpu:0"):
          matrix, rhs = _GenerateTestData(matrix_shape, num_rhs)
          x = linalg_ops.matrix_solve_ls(matrix, rhs, regularizer)
          variables.global_variables_initializer().run()
          self.run_op_benchmark(
              sess,
              control_flow_ops.group(x),
              min_iters=25,
              store_memory_usage=False,
              name=("matrix_solve_ls_cpu_shape_{matrix_shape}_num_rhs_{num_rhs}"
                   ).format(matrix_shape=matrix_shape, num_rhs=num_rhs))

        if run_gpu_test and (len(matrix_shape) < 3 or matrix_shape[0] < 513):
          with ops.Graph().as_default(), \
                session.Session(config=benchmark.benchmark_config()) as sess, \
                ops.device("/gpu:0"):
            matrix, rhs = _GenerateTestData(matrix_shape, num_rhs)
            x = linalg_ops.matrix_solve_ls(matrix, rhs, regularizer)
            variables.global_variables_initializer().run()
            self.run_op_benchmark(
                sess,
                control_flow_ops.group(x),
                min_iters=25,
                store_memory_usage=False,
                name=("matrix_solve_ls_gpu_shape_{matrix_shape}_num_rhs_"
                      "{num_rhs}").format(
                          matrix_shape=matrix_shape, num_rhs=num_rhs))
  def testSelectOpScalarCondition(self):
    if test.is_gpu_available(cuda_only=True):
      random_seed.set_random_seed(0)
      x = random_ops.truncated_normal([1, 784], seed=0)
      conv = _two_layer_model(x)
      add = math_ops.add(conv, conv)
      condition = constant_op.constant(True)
      select = gen_math_ops._select(condition, conv, add)
      output = array_ops.identity(select)

      with session.Session() as sess:
        output_val_ref = sess.run(output)

      with session.Session(config=_get_config()) as sess:
        metadata = config_pb2.RunMetadata()
        output_val = sess.run(output, run_metadata=metadata)

      nodes = []
      num_transposes = 0
      for node in metadata.cost_graph.node:
        if _is_transpose(node.name):
          num_transposes += 1
        nodes.append(node.name)

      expected_num_transposes = 2
      self.assertEqual(expected_num_transposes, num_transposes)
      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
      self._assert_trans_nchw_to_nhwc('Select-0-0', nodes)
      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
  def testReverseWithConstDims(self):
    if test.is_gpu_available(cuda_only=True):
      random_seed.set_random_seed(0)
      x = random_ops.truncated_normal([1, 784], seed=0)
      conv = _two_layer_model(x)
      dims = constant_op.constant([3, 1], name='DimsConst')
      reverse = array_ops.reverse(conv, dims)
      output = array_ops.identity(reverse)

      with session.Session() as sess:
        output_val_ref = sess.run(output)

      with session.Session(config=_get_config()) as sess:
        metadata = config_pb2.RunMetadata()
        output_val = sess.run(output, run_metadata=metadata)

      nodes = []
      num_transposes = 0
      for node in metadata.cost_graph.node:
        if _is_transpose(node.name):
          num_transposes += 1
        nodes.append(node.name)

      # Four transposes were initially added in the Expand phase of
      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
      expected_num_transposes = 2
      self.assertEqual(expected_num_transposes, num_transposes)
      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
      self._assert_trans_nchw_to_nhwc('ReverseV2-0-0', nodes)
      self.assertIn('ReverseV2-1-LayoutOptimizer', nodes)
      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
  def testConcatWithControlDependency(self):
    if test.is_gpu_available(cuda_only=True):
      random_seed.set_random_seed(0)
      x = random_ops.truncated_normal([1, 784], seed=0)
      conv = _two_layer_model(x)
      axis = constant_op.constant(3)
      var = variables.Variable(3)
      assign = state_ops.assign(var, 6)
      with ops.control_dependencies([assign]):
        concat = array_ops.concat([conv, conv], axis)
      output = array_ops.identity(concat)

      with session.Session() as sess:
        output_val_ref = sess.run(output)

      with session.Session(config=_get_config()) as sess:
        metadata = config_pb2.RunMetadata()
        output_val = sess.run(output, run_metadata=metadata)

      nodes = []
      num_transposes = 0
      for node in metadata.cost_graph.node:
        if _is_transpose(node.name):
          num_transposes += 1
        nodes.append(node.name)

      # Four transposes were initially added in the Expand phase of
      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
      expected_num_transposes = 2
      self.assertEqual(expected_num_transposes, num_transposes)
      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
      self._assert_trans_nchw_to_nhwc('concat-0-0', nodes)
      self.assertIn('concat-2-LayoutOptimizer', nodes)
      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
  def testTwoConvLayers(self):
    if test.is_gpu_available(cuda_only=True):
      random_seed.set_random_seed(0)
      x = random_ops.truncated_normal([1, 784], seed=0)
      output = two_layer_model(x)

      with session.Session() as sess:
        output_val_ref = sess.run(output)

      with session.Session(config=get_config()) as sess:
        metadata = config_pb2.RunMetadata()
        output_val = sess.run(output, run_metadata=metadata)

      nodes = []
      num_transposes = 0
      for node in metadata.cost_graph.node:
        if node.name.startswith('LayoutOptimizerTranspose'):
          num_transposes += 1
        nodes.append(node.name)

      # Four transposes were initially added in the Expand phase of
      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
      expected_num_transposes = 2
      self.assertEqual(expected_num_transposes, num_transposes)
      self.assertIn('LayoutOptimizerTransposeNHWCToNCHW-Conv2D-Reshape-0',
                    nodes)
      self.assertIn('LayoutOptimizerTransposeNCHWToNHWC-Relu_1-MaxPool_1',
                    nodes)

      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
Example #31
0
  def testTraceLoopBytes(self):
    if not test.is_gpu_available(): return
    ops.reset_default_graph()
    steps = 100

    with ops.device('/gpu:0'):
      x = array_ops.ones((100, 100), dtype=dtypes.float32)
      n = array_ops.constant(steps, dtype=dtypes.int32)
      x1 = array_ops.ones((100, 100))

      x *= x1
      def loop_body(i, x):
        x *= x
        return i + 1, x

      _, y = control_flow_ops.while_loop(
          lambda i, x: i < n, loop_body,
          [array_ops.constant(0), x])

    grad = gradients.gradients(y, [x1])

    with session.Session(config=self._no_rewrite_session_config()) as sess:
      run_options = config_pb2.RunOptions(
          trace_level=config_pb2.RunOptions.FULL_TRACE)
      run_metadata = config_pb2.RunMetadata()
      sess.run(grad, options=run_options, run_metadata=run_metadata)

      options = option_builder.ProfileOptionBuilder.time_and_memory()
      options['min_bytes'] = 0
      options['min_micros'] = 0
      options['select'] = ('bytes', 'peak_bytes', 'output_bytes',
                           'residual_bytes')
      options['output'] = 'none'
      ret_pb = model_analyzer.profile(
          sess.graph, run_meta=run_metadata, cmd='scope', options=options)
      self.assertGreater(ret_pb.total_requested_bytes, 1000000)
 def testDepthwiseConv2DFilterGradExplicit(self):
   for index, (input_size, filter_size, output_size, stride, padding,
               dilations) in enumerate(CheckGradConfigsToTestExplicit()):
     tf_logging.info(
         "Testing DepthwiseConv2DFilterGradExplicit, %dth config: %r * %r, "
         "stride: %d, padding: %s", index, input_size, filter_size, stride,
         padding)
     # double datatype is currently not supported for convolution ops
     # on the ROCm platform
     optional_float64 = [] if test.is_built_with_rocm() else [dtypes.float64]
     data_formats = ["NHWC", "NCHW"] if test.is_gpu_available() else ["NHWC"]
     for data_type in [dtypes.float16, dtypes.float32] + optional_float64:
       for data_format in data_formats:
         self._ConstructAndTestGradient(
             input_size,
             filter_size,
             output_size,
             stride,
             padding,
             data_type,
             test_input=False,
             use_gpu=True,
             data_format=data_format,
             dilations=dilations)
  def testMaxPoolGradV2(self):
    if test.is_gpu_available(cuda_only=True):
      random_seed.set_random_seed(0)
      x = random_ops.truncated_normal([1, 784], seed=0)
      conv = _two_layer_model(x)
      ksize = constant_op.constant([1, 2, 3, 1], shape=[4])
      strides = array_ops.placeholder(dtype='int32', shape=[4])
      max_pool_grad = gen_nn_ops.max_pool_grad_v2(conv, conv, conv, ksize,
                                                  strides, 'VALID')
      output = array_ops.identity(max_pool_grad)

      strides_val = [1, 3, 2, 1]
      with session.Session() as sess:
        output_val_ref = sess.run(output, feed_dict={strides: strides_val})

      with session.Session(config=_get_config()) as sess:
        metadata = config_pb2.RunMetadata()
        output_val = sess.run(
            output, run_metadata=metadata, feed_dict={
                strides: strides_val
            })

      nodes = []
      num_transposes = 0
      for node in metadata.cost_graph.node:
        if _is_transpose(node.name):
          num_transposes += 1
        nodes.append(node.name)

      expected_num_transposes = 2
      self.assertEqual(expected_num_transposes, num_transposes)
      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
      self._assert_trans_nchw_to_nhwc('MaxPoolGradV2-0-0', nodes)
      self._assert_vec_nhwc_to_nchw('MaxPoolGradV2-4', nodes)
      self.assertIn('MaxPoolGradV2-3-LayoutOptimizer', nodes)
      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
Example #34
0
        def benchmarkTridiagonalMulOp(self):
            devices = [('/cpu:0', 'cpu')]
            if test.is_gpu_available(cuda_only=True):
                devices += [('/gpu:0', 'gpu')]

            for device_option, size_option in itertools.product(
                    devices, self.sizes):
                device_id, device_name = device_option
                m, batch_size, n = size_option

                with ops.Graph().as_default(), \
                    session.Session(config=benchmark.benchmark_config()) as sess, \
                    ops.device(device_id):
                    upper, diag, lower, vec = self._generateData(
                        batch_size, m, n)
                    x1 = self.baseline(upper, diag, lower, vec)
                    x2 = linalg_impl.tridiagonal_matmul(
                        (upper, diag, lower), vec, diagonals_format='sequence')

                    variables.global_variables_initializer().run()
                    self.run_op_benchmark(
                        sess,
                        control_flow_ops.group(x1),
                        min_iters=10,
                        store_memory_usage=False,
                        name=('tridiagonal_matmul_baseline_%s'
                              '_batch_size_%d_m_%d_n_%d' %
                              (device_name, batch_size, m, n)))

                    self.run_op_benchmark(
                        sess,
                        control_flow_ops.group(x2),
                        min_iters=10,
                        store_memory_usage=False,
                        name=('tridiagonal_matmul_%s_batch_size_%d_m_%d_n_%d' %
                              (device_name, batch_size, m, n)))
Example #35
0
    def _test_runtime_with_model(self, model):

        (x_train, y_train), _ = testing_utils.get_test_data(
            train_samples=self.batch,
            test_samples=0,
            input_shape=(self.timestep, self.input_shape),
            num_classes=self.output_shape)
        y_train = keras.utils.to_categorical(y_train, self.output_shape)

        model.compile(optimizer='sgd', loss=['categorical_crossentropy', None])

        existing_loss = 0
        for _ in range(self.epoch):
            history = model.fit(x_train, y_train)
            loss_value = history.history['loss'][0]

            self.assertNotEqual(existing_loss, loss_value)
            existing_loss = loss_value

        _, runtime_value = model.predict(x_train)
        if test.is_gpu_available():
            self.assertEqual(runtime_value[0], rnn._RUNTIME_GPU)
        else:
            self.assertEqual(runtime_value[0], rnn._RUNTIME_CPU)
    def test_loop_with_vars_intertwined(self):
        """Test graph with intertwined while loops."""
        if test.is_gpu_available(cuda_only=True):
            random_seed.set_random_seed(0)
            x = _input([8, 8])
            _, _, k, l = _loop_vars_intertwined(
                array_ops.ones(array_ops.shape(x)), x, _matmul_act,
                _matmul_act)
            optimizer = gradient_descent.GradientDescentOptimizer(
                learning_rate=0.01)
            g = optimizer.compute_gradients(k, [x])
            output = (k, l, g)

            output_val_ref, output_val, cost_graph = self._run(output)
            node_map = _build_node_map(cost_graph.node)

            self._assert_output_fp16(node_map, 'while/MatMul')
            self._assert_output_fp16(node_map, 'while/Relu')
            self._assert_output_fp16(node_map, 'while/MatMul_1')
            self._assert_output_fp16(node_map, 'while/Relu_1')
            self.assertAllClose(output_val_ref,
                                output_val,
                                atol=1e-3,
                                rtol=1e-3)
  def testDepthwiseConv2DFormat(self):
    if not test.is_gpu_available():
      return

    for index, (input_size, filter_size, _, stride,
                padding, dilations) in enumerate(ConfigsToTest()):
      tf_logging.info(
          "Testing DepthwiseConv2DFormat, %dth config: %r * %r, stride: %d, "
          "padding: %s", index, input_size, filter_size, stride, padding)
      # double datatype is currently not supported for convolution ops
      # on the ROCm platform
      optional_float64 = [] if test.is_built_with_rocm() else [dtypes.float64]
      for data_type in ([dtypes.float32] + optional_float64):
        tolerance = 1e-4 if data_type == dtypes.float32 else 1e-12
        self._VerifyValues(
            input_size,
            filter_size,
            stride,
            padding,
            data_type,
            use_gpu=True,
            data_format="NCHW",
            dilations=dilations,
            tolerance=tolerance)
Example #38
0
  def testBinaryOpSecondPort(self):
    if test.is_gpu_available(cuda_only=True):
      output = _model_with_second_port()

      with session.Session() as sess:
        output_val_ref = sess.run(output)

      with session.Session(config=_get_config()) as sess:
        metadata = config_pb2.RunMetadata()
        output_val = sess.run(output, run_metadata=metadata)

      nodes = []
      num_transposes = 0
      for node in metadata.cost_graph.node:
        if node.name.startswith('LayoutOptimizerTranspose'):
          num_transposes += 1
        nodes.append(node.name)

      expected_num_transposes = 2
      self.assertEqual(expected_num_transposes, num_transposes)
      self.assertIn('LayoutOptimizerTransposeNHWCToNCHW-FusedBatchNorm-0',
                    nodes)
      self.assertIn('LayoutOptimizerTransposeNCHWToNHWC-Add-0-0', nodes)
      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
Example #39
0
    def test3DGPU(self):
        # If no GPU available, skip the test
        if not test.is_gpu_available(cuda_only=True):
            return

        datatypes = [
            np.int8, np.float16, np.float32, np.float64, np.complex128
        ]
        large_shapes = [[4, 1000, 3], [4, 1000, 8], [4, 1000, 13],
                        [4, 3, 1000], [4, 8, 1000], [4, 13, 1000]] * 3
        perms = [[0, 2, 1]] * 6 + [[2, 1, 0]] * 6 + [[1, 2, 0]
                                                     ] * 3 + [[2, 0, 1]] * 3
        for datatype in datatypes:
            for input_shape, perm in zip(large_shapes, perms):
                total_size = np.prod(input_shape)
                inp = np.arange(1, total_size + 1,
                                dtype=datatype).reshape(input_shape)
                np_ans = self._np_transpose(inp, perm)
                with self.test_session(use_gpu=True):
                    inx = ops.convert_to_tensor(inp)
                    y = array_ops.transpose(inx, perm)
                    tf_ans = y.eval()
                self.assertAllEqual(np_ans, tf_ans)
                self.assertShapeEqual(np_ans, y)
Example #40
0
  def testStridedSliceWithMask(self):
    if test.is_gpu_available(cuda_only=True):
      random_seed.set_random_seed(0)
      x = random_ops.truncated_normal([1, 784], seed=0)
      conv = _two_layer_model(x)
      # This will generate a StridedSlice op with begin mask and end mask.
      s = conv[:, :, 1:-1, :]
      output = array_ops.identity(s)

      with session.Session() as sess:
        output_val_ref = sess.run(output)

      with session.Session(config=_get_config()) as sess:
        metadata = config_pb2.RunMetadata()
        output_val = sess.run(output, run_metadata=metadata)

      nodes = []
      num_transposes = 0
      for node in metadata.cost_graph.node:
        if node.name.startswith('LayoutOptimizerTranspose'):
          num_transposes += 1
        nodes.append(node.name)

      # Four transposes were initially added in the Expand phase of
      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
      expected_num_transposes = 2
      self.assertEqual(expected_num_transposes, num_transposes)
      self.assertIn('LayoutOptimizerTransposeNHWCToNCHW-Conv2D-0', nodes)
      self.assertIn('LayoutOptimizerTransposeNCHWToNHWC-strided_slice-0-0',
                    nodes)
      self.assertIn('LayoutOptimizer-strided_slice-strided_slice/stack', nodes)
      self.assertIn('LayoutOptimizer-strided_slice-strided_slice/stack_1',
                    nodes)
      self.assertIn('LayoutOptimizer-strided_slice-strided_slice/stack_2',
                    nodes)
      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
Example #41
0
  def testSamplingAtRandnSwitchover(self):
    # The randn sampler is used as the bounds are moved farther from the mean,
    # and the probability of accepting a sample increases the farther the
    # bounds are from the mean.
    # This test asserts that at the point of switchover, both samplers are
    # working (not raising an error or returning nan) and returning the
    # expected moments.
    use_gpu = test.is_gpu_available()
    stddev_inside_bounds_before_using_randn = (
        _get_stddev_inside_bounds_before_using_randn(use_gpu))

    epsilon = 0.001
    self.validateMoments(
        shape=[10**6],
        mean=0.,
        stddev=1.0,
        minval=-epsilon,
        maxval=stddev_inside_bounds_before_using_randn - epsilon)
    self.validateMoments(
        shape=[10**6],
        mean=0.,
        stddev=1.0,
        minval=-epsilon,
        maxval=stddev_inside_bounds_before_using_randn + epsilon)
  def test_conv_bn_dropout(self):
    """Test dropout precision of convolution batch norm graph."""
    with compat.forward_compatibility_horizon(2019, 6, 7):
      if test.is_gpu_available(cuda_only=True):
        random_seed.set_random_seed(0)
        x = _input([2, 8, 8, 1])
        y = _conv_bn(x)
        y = nn.dropout(y, rate=0.5)
        y = _conv_bn(y)
        y = array_ops.identity(y)
        optimizer = gradient_descent.GradientDescentOptimizer(
            learning_rate=0.01)
        g = optimizer.compute_gradients(y, [x])
        output = (y, g)

        output_val_ref, output_val, cost_graph = self._run(output)
        node_map = _build_node_map(cost_graph.node)
        self._assert_output_fp16(node_map, 'Conv2D')
        self._assert_output_fp16(node_map, 'FusedBatchNormV3')
        self._assert_output_fp16(node_map, 'dropout/mul')
        self._assert_output_fp16(node_map, 'Conv2D_1')

        output_val_ref, output_val, cost_graph = self._run(output)
        self.assertAllClose(output_val_ref, output_val, atol=1e-3, rtol=1e-3)
Example #43
0
    def testSparseDevicePlacement(self, use_resource):
        for index_dtype in [dtypes.int32, dtypes.int64]:
            with self.cached_session(force_gpu=test.is_gpu_available()):
                # If a GPU is available, tests that all optimizer ops can be placed on
                # it (i.e. they have GPU kernels).
                if use_resource:
                    global_step = resource_variable_ops.ResourceVariable(
                        array_ops.zeros([], dtypes.int64))
                    var = resource_variable_ops.ResourceVariable([[1.0],
                                                                  [2.0]])
                else:
                    global_step = variables.Variable(
                        array_ops.zeros([], dtypes.int64))
                    var = variables.Variable([[1.0], [2.0]])

                indices = constant_op.constant([0, 1], dtype=index_dtype)
                gathered_sum = math_ops.reduce_sum(
                    array_ops.gather(var, indices))
                optimizer = lazy_adam_gs_optimizer.LazyAdamGSOptimizer(
                    global_step=global_step, learning_rate=3.0)
                minimize_op = optimizer.minimize(gathered_sum,
                                                 global_step=global_step)
                variables.global_variables_initializer().run()
                minimize_op.run()
Example #44
0
    def testConv2DKernelSmallerThanStrideSame(self, gpu_only=True):
        if gpu_only and not test.is_gpu_available():
            tf_logging.info("Skipping Conv2DKernelSmallerThanStrideSame test.")
            return
        # expected = [0, 0, 2, 4]
        self._VerifyValues(tensor_in_sizes=[1, 3, 3, 1],
                           filter_in_sizes=[1, 1, 1, 1],
                           bias=[-5.0],
                           strides=[2, 2],
                           padding="SAME")

        # expected = [0, 0, 4, 6]
        self._VerifyValues(tensor_in_sizes=[1, 4, 4, 1],
                           filter_in_sizes=[1, 1, 1, 1],
                           bias=[-5.0],
                           strides=[2, 2],
                           padding="SAME")

        # expected = [4, 0, 1, 0]
        self._VerifyValues(tensor_in_sizes=[1, 4, 4, 1],
                           filter_in_sizes=[2, 2, 1, 1],
                           bias=[-40.0],
                           strides=[3, 3],
                           padding="SAME")
    def test_noninlined_funcdef(self):
        """Test graph with non-inlined function subgraph.

    This requires the grappler pass to handle an OpDef that only appears in the
    graph's function registry instead of the global op registry.
    """
        if test.is_gpu_available(cuda_only=True):
            random_seed.set_random_seed(0)
            x = _input([8, 8])
            y = _matmul_act(x)
            y = _example_noninlined_funcdef(y)
            optimizer = gradient_descent.GradientDescentOptimizer(
                learning_rate=0.01)
            g = optimizer.compute_gradients(y, [x])
            output = (g, y)

            output_val_ref, output_val, cost_graph = self._run(output)
            node_map = _build_node_map(cost_graph.node)

            self._assert_output_fp16(node_map, 'MatMul')
            self.assertAllClose(output_val_ref,
                                output_val,
                                atol=1e-3,
                                rtol=1e-3)
Example #46
0
    def testLoopGPU(self):
        if not test.is_gpu_available():
            return

        ops.reset_default_graph()
        with ops.device('/gpu:0'):
            tfprof_node, run_meta = _run_loop_model()
            # The while-loop caused a node to appear 4 times in scheduling.
            ret = _extract_node(
                run_meta, 'rnn/while/rnn/basic_rnn_cell/basic_rnn_cell/MatMul')
            self.assertEqual(len(ret['/job:localhost/replica:0/task:0/gpu:0']),
                             4)

            total_cpu_execs = 0
            for node in ret['/job:localhost/replica:0/task:0/gpu:0']:
                total_cpu_execs += node.op_end_rel_micros

            ret = _extract_node(
                run_meta,
                'rnn/while/rnn/basic_rnn_cell/basic_rnn_cell/MatMul:MatMul')
            self.assertGreaterEqual(len(ret['/gpu:0/stream:all']), 4)

            total_accelerator_execs = 0
            for node in ret['/gpu:0/stream:all']:
                total_accelerator_execs += node.op_end_rel_micros

            mm_node = lib.SearchTFProfNode(
                tfprof_node,
                'rnn/while/rnn/basic_rnn_cell/basic_rnn_cell/MatMul')

            self.assertEqual(mm_node.run_count, 4)
            self.assertEqual(mm_node.accelerator_exec_micros,
                             total_accelerator_execs)
            self.assertEqual(mm_node.cpu_exec_micros, total_cpu_execs)
            self.assertEqual(mm_node.exec_micros,
                             total_cpu_execs + total_accelerator_execs)
    def test_multi_paths_2(self):
        """Test graph with multiple paths."""
        if test.is_gpu_available(cuda_only=True):
            random_seed.set_random_seed(0)
            x = _input([8, 8])
            y1 = _matmul_act(x)
            y2 = _matmul_act(x)
            y = y1 + y2 + x
            optimizer = gradient_descent.GradientDescentOptimizer(
                learning_rate=0.01)
            g = optimizer.compute_gradients(y, [x])
            output = (g, y)

            output_val_ref, output_val, cost_graph = self._run(output)
            node_map = _build_node_map(cost_graph.node)

            self._assert_output_fp16(node_map, 'MatMul')
            self._assert_output_fp16(node_map, 'Relu')
            self._assert_output_fp16(node_map, 'MatMul_1')
            self._assert_output_fp16(node_map, 'Relu_1')
            self.assertAllClose(output_val_ref,
                                output_val,
                                atol=1e-3,
                                rtol=1e-3)
Example #48
0
  def testDeviceWrapperDynamicExecutionNodesAreAllProperlyLocated(self):
    if not test.is_gpu_available():
      # Can't perform this test w/o a GPU
      return

    gpu_dev = test.gpu_device_name()
    with self.test_session(use_gpu=True) as sess:
      with variable_scope.variable_scope(
          "root", initializer=init_ops.constant_initializer(0.5)):
        x = array_ops.zeros([1, 1, 3])
        cell = rnn_cell_impl.DeviceWrapper(rnn_cell_impl.GRUCell(3), gpu_dev)
        with ops.device("/cpu:0"):
          outputs, _ = rnn.dynamic_rnn(
              cell=cell, inputs=x, dtype=dtypes.float32)
        run_metadata = config_pb2.RunMetadata()
        opts = config_pb2.RunOptions(
            trace_level=config_pb2.RunOptions.FULL_TRACE)

        sess.run([variables_lib.global_variables_initializer()])
        _ = sess.run(outputs, options=opts, run_metadata=run_metadata)

      cpu_stats, gpu_stats = self._retrieve_cpu_gpu_stats(run_metadata)
      self.assertFalse([s for s in cpu_stats if "gru_cell" in s.node_name])
      self.assertTrue([s for s in gpu_stats if "gru_cell" in s.node_name])
  def test_multi_paths_2(self):
    """Test graph with multiple paths."""
    if test.is_gpu_available(cuda_only=True):
      random_seed.set_random_seed(0)
      x = _input([8, 8])
      y1 = _matmul_act(x)
      y2 = _matmul_act(x)
      y = y1 + y2 + x
      optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=0.01)
      g = optimizer.compute_gradients(y, [x])
      output = (g, y)

      output_val_ref, output_val, cost_graph = self._run(output)
      node_map = _build_node_map(cost_graph.node)

      self._assert_output_fp16(node_map, 'MatMul')
      self._assert_output_fp16(node_map, 'Relu')
      self._assert_output_fp16(node_map, 'MatMul_1')
      self._assert_output_fp16(node_map, 'Relu_1')
      # Bump up the tolerance for the ROCm platform
      # The default tolerance (1e-3) results in a tiny fraction (<1%) of
      # miscompares on ROCm platform, and hence the tolerance bump
      tol = 2e-3 if test.is_built_with_rocm else 1e-3
      self.assertAllClose(output_val_ref, output_val, atol=tol, rtol=tol)
    def testSelectEverythingDetail(self):
        ops.reset_default_graph()
        dev = '/device:GPU:0' if test.is_gpu_available() else '/device:CPU:0'
        outfile = os.path.join(test.get_temp_dir(), 'dump')
        opts = (builder(
            builder.trainable_variables_parameter()).with_file_output(
                outfile).with_accounted_types(['.*']).select([
                    'micros', 'bytes', 'params', 'float_ops', 'occurrence',
                    'device', 'op_types', 'input_shapes'
                ]).build())

        with profile_context.ProfileContext(test.get_temp_dir(),
                                            trace_steps=[],
                                            dump_steps=[]) as pctx:
            with session.Session() as sess, ops.device(dev):
                x = lib.BuildSmallModel()

                sess.run(variables.global_variables_initializer())
                pctx.trace_next_step()
                pctx.dump_next_step()
                _ = sess.run(x)

                pctx.profiler.profile_name_scope(options=opts)

                with gfile.Open(outfile, 'r') as f:
                    # pylint: disable=line-too-long
                    dump_str = lib.CheckAndRemoveDoc(f.read())
                    outputs = dump_str.split('\n')

                    self.assertEqual(
                        outputs[0],
                        'node name | # parameters | # float_ops | requested bytes | total execution time | accelerator execution time | cpu execution time | assigned devices | op types | op count (run|defined) | input shapes'
                    )
                    for o in outputs[1:]:
                        if o.find('Conv2D ') > 0:
                            metrics = o[o.find('(') + 1:o.find(')')].split(',')
                            # Make sure time is profiled.
                            gap = 1 if test.is_gpu_available() else 2
                            for i in range(3, 6, gap):
                                mat = re.search('(.*)[um]s/(.*)[um]s',
                                                metrics[i])
                                self.assertGreater(float(mat.group(1)), 0.0)
                                self.assertGreater(float(mat.group(2)), 0.0)
                            # Make sure device is profiled.
                            if test.is_gpu_available():
                                self.assertTrue(metrics[6].find('gpu') > 0)
                                self.assertFalse(metrics[6].find('cpu') > 0)
                            else:
                                self.assertFalse(metrics[6].find('gpu') > 0)
                                self.assertTrue(metrics[6].find('cpu') > 0)
                            # Make sure float_ops is profiled.
                            mat = re.search('(.*)k/(.*)k flops',
                                            metrics[1].strip())
                            self.assertGreater(float(mat.group(1)), 0.0)
                            self.assertGreater(float(mat.group(2)), 0.0)
                            # Make sure op_count is profiled.
                            self.assertEqual(metrics[8].strip(), '1/1|1/1')
                            # Make sure input_shapes is profiled.
                            self.assertEqual(metrics[9].strip(),
                                             '0:2x6x6x3|1:3x3x3x6')

                        if o.find('DW (3x3x3x6') > 0:
                            metrics = o[o.find('(') + 1:o.find(')')].split(',')
                            mat = re.search('(.*)/(.*) params',
                                            metrics[1].strip())
                            self.assertGreater(float(mat.group(1)), 0.0)
                            self.assertGreater(float(mat.group(2)), 0.0)
                    # pylint: enable=line-too-long

        # Test that profiler restored from profile file gives the same result.
        gfile.Remove(outfile)
        profile_file = os.path.join(test.get_temp_dir(), 'profile_1')
        with lib.ProfilerFromFile(profile_file) as profiler:
            profiler.profile_name_scope(options=opts)
            with gfile.Open(outfile, 'r') as f:
                self.assertEqual(dump_str, lib.CheckAndRemoveDoc(f.read()))
Example #51
0
 def testNumbersGPU(self):
     if not test.is_gpu_available():
         self.skipTest("No GPU available")
     for t in [np.float16, np.float32, np.float64]:
         self._testRelu(
             np.array([[-9, 7, -5, 3, -1], [1, -3, 5, -7, 9]]).astype(t))
Example #52
0
 def test_conv3d(self, kwargs, expected_output_shape=None):
   kwargs['filters'] = 2
   kwargs['kernel_size'] = (3, 3, 3)
   if 'data_format' not in kwargs or test.is_gpu_available(cuda_only=True):
     self._run_test(kwargs, expected_output_shape)
  def test_model_with_crossentropy_losses_channels_first(self):
    """Tests use of all crossentropy losses with `channels_first`.

    Tests `sparse_categorical_crossentropy`, `categorical_crossentropy`,
    and `binary_crossentropy`.
    Verifies that evaluate gives the same result with either `channels_first`
    or `channels_last` image_data_format.
    """
    def prepare_simple_model(input_tensor, loss_name, target):
      axis = 1 if K.image_data_format() == 'channels_first' else -1
      loss = None
      num_channels = None
      activation = None
      if loss_name == 'sparse_categorical_crossentropy':
        loss = lambda y_true, y_pred: K.sparse_categorical_crossentropy(  # pylint: disable=g-long-lambda
            y_true, y_pred, axis=axis)
        num_channels = np.amax(target) + 1
        activation = 'softmax'
      elif loss_name == 'categorical_crossentropy':
        loss = lambda y_true, y_pred: K.categorical_crossentropy(  # pylint: disable=g-long-lambda
            y_true, y_pred, axis=axis)
        num_channels = target.shape[axis]
        activation = 'softmax'
      elif loss_name == 'binary_crossentropy':
        loss = lambda y_true, y_pred: K.binary_crossentropy(y_true, y_pred)  # pylint: disable=unnecessary-lambda
        num_channels = target.shape[axis]
        activation = 'sigmoid'
      predictions = Conv2D(num_channels,
                           1,
                           activation=activation,
                           kernel_initializer='ones',
                           bias_initializer='ones')(input_tensor)
      simple_model = keras.models.Model(inputs=input_tensor,
                                        outputs=predictions)
      simple_model.compile(optimizer='rmsprop', loss=loss)
      return simple_model

    if test.is_gpu_available(cuda_only=True):
      with test_util.use_gpu():
        losses_to_test = ['sparse_categorical_crossentropy',
                          'categorical_crossentropy', 'binary_crossentropy']

        data_channels_first = np.array([[[[8., 7.1, 0.], [4.5, 2.6, 0.55],
                                          [0.9, 4.2, 11.2]]]], dtype=np.float32)
        # Labels for testing 4-class sparse_categorical_crossentropy, 4-class
        # categorical_crossentropy, and 2-class binary_crossentropy:
        labels_channels_first = [np.array([[[[0, 1, 3], [2, 1, 0], [2, 2, 1]]]], dtype=np.float32),  # pylint: disable=line-too-long
                                 np.array([[[[0, 1, 0], [0, 1, 0], [0, 0, 0]],
                                            [[1, 0, 0], [0, 0, 1], [0, 1, 0]],
                                            [[0, 0, 0], [1, 0, 0], [0, 0, 1]],
                                            [[0, 0, 1], [0, 0, 0], [1, 0, 0]]]], dtype=np.float32),  # pylint: disable=line-too-long
                                 np.array([[[[0, 1, 0], [0, 1, 0], [0, 0, 1]],
                                            [[1, 0, 1], [1, 0, 1], [1, 1, 0]]]], dtype=np.float32)]  # pylint: disable=line-too-long
        # Compute one loss for each loss function in the list `losses_to_test`:
        loss_channels_last = [0., 0., 0.]
        loss_channels_first = [0., 0., 0.]

        old_data_format = K.image_data_format()

        # Evaluate a simple network with channels last, with all three loss
        # functions:
        K.set_image_data_format('channels_last')
        data = np.moveaxis(data_channels_first, 1, -1)
        for index, loss_function in enumerate(losses_to_test):
          labels = np.moveaxis(labels_channels_first[index], 1, -1)
          inputs = keras.Input(shape=(3, 3, 1))
          model = prepare_simple_model(inputs, loss_function, labels)
          loss_channels_last[index] = model.evaluate(x=data, y=labels,
                                                     batch_size=1, verbose=0)

        # Evaluate the same network with channels first, with all three loss
        # functions:
        K.set_image_data_format('channels_first')
        data = data_channels_first
        for index, loss_function in enumerate(losses_to_test):
          labels = labels_channels_first[index]
          inputs = keras.Input(shape=(1, 3, 3))
          model = prepare_simple_model(inputs, loss_function, labels)
          loss_channels_first[index] = model.evaluate(x=data, y=labels,
                                                      batch_size=1, verbose=0)

        K.set_image_data_format(old_data_format)

        np.testing.assert_allclose(loss_channels_first,
                                   loss_channels_last,
                                   err_msg='{}{}'.format(
                                       'Computed different losses for ',
                                       'channels_first and channels_last'))
Example #54
0
  def testMultiStepProfile(self):
    ops.reset_default_graph()
    opts = model_analyzer.PRINT_ALL_TIMING_MEMORY.copy()
    opts['account_type_regexes'] = ['.*']

    with session.Session() as sess:
      r1, r2, r3 = lib.BuildSplitableModel()
      sess.run(variables.global_variables_initializer())

      profiler = model_analyzer.Profiler(sess.graph)
      pb0 = profiler.profile_name_scope(opts)

      run_meta = config_pb2.RunMetadata()
      _ = sess.run(r1,
                   options=config_pb2.RunOptions(
                       trace_level=config_pb2.RunOptions.FULL_TRACE),
                   run_metadata=run_meta)
      profiler.add_step(1, run_meta)
      pb1 = profiler.profile_name_scope(opts)

      self.assertNotEqual(lib.SearchTFProfNode(pb1, 'DW'), None)
      self.assertEqual(lib.SearchTFProfNode(pb1, 'DW2'), None)
      self.assertEqual(lib.SearchTFProfNode(pb1, 'add'), None)

      run_meta2 = config_pb2.RunMetadata()
      _ = sess.run(r2,
                   options=config_pb2.RunOptions(
                       trace_level=config_pb2.RunOptions.FULL_TRACE),
                   run_metadata=run_meta2)
      profiler.add_step(2, run_meta2)
      pb2 = profiler.profile_name_scope(opts)

      self.assertNotEqual(lib.SearchTFProfNode(pb2, 'DW'), None)
      self.assertNotEqual(lib.SearchTFProfNode(pb2, 'DW2'), None)
      self.assertEqual(lib.SearchTFProfNode(pb2, 'add'), None)

      run_meta3 = config_pb2.RunMetadata()
      _ = sess.run(r3,
                   options=config_pb2.RunOptions(
                       trace_level=config_pb2.RunOptions.FULL_TRACE),
                   run_metadata=run_meta3)
      profiler.add_step(3, run_meta3)
      pb3 = profiler.profile_name_scope(opts)

      self.assertNotEqual(lib.SearchTFProfNode(pb3, 'DW'), None)
      self.assertNotEqual(lib.SearchTFProfNode(pb3, 'DW2'), None)
      self.assertNotEqual(lib.SearchTFProfNode(pb3, 'add'), None)

      self.assertEqual(lib.SearchTFProfNode(pb0, 'Conv2D'), None)
      self.assertGreater(lib.SearchTFProfNode(pb1, 'Conv2D').exec_micros, 0)
      self.assertEqual(lib.SearchTFProfNode(pb1, 'Conv2D_1'), None)
      self.assertGreater(lib.SearchTFProfNode(pb2, 'Conv2D_1').exec_micros, 0)
      self.assertEqual(lib.SearchTFProfNode(pb2, 'add'), None)
      self.assertGreater(lib.SearchTFProfNode(pb3, 'add').exec_micros, 0)

      advice_pb = profiler.advise(model_analyzer.ALL_ADVICE)
      self.assertTrue('AcceleratorUtilizationChecker' in advice_pb.checkers)
      self.assertTrue('ExpensiveOperationChecker' in advice_pb.checkers)
      self.assertTrue('OperationChecker' in advice_pb.checkers)

      checker = advice_pb.checkers['AcceleratorUtilizationChecker']
      if test.is_gpu_available():
        self.assertGreater(len(checker.reports), 0)
      else:
        self.assertEqual(len(checker.reports), 0)
      checker = advice_pb.checkers['ExpensiveOperationChecker']
      self.assertGreater(len(checker.reports), 0)
Example #55
0
 def test_depthwise_conv2d(self, kwargs):
   kwargs['kernel_size'] = (3, 3)
   if 'data_format' not in kwargs or test.is_gpu_available(cuda_only=True):
     self._run_test(kwargs)
Example #56
0
    def test_cudnnrnn_bidirectional(self):
        if test.is_gpu_available(cuda_only=True):
            with self.test_session(use_gpu=True):
                rnn = keras.layers.CuDNNGRU
                samples = 2
                dim = 2
                timesteps = 2
                output_dim = 2
                mode = 'concat'

                x = np.random.random((samples, timesteps, dim))
                target_dim = 2 * output_dim if mode == 'concat' else output_dim
                y = np.random.random((samples, target_dim))

                # test with Sequential model
                model = keras.Sequential()
                model.add(
                    keras.layers.Bidirectional(rnn(output_dim),
                                               merge_mode=mode,
                                               input_shape=(None, dim)))
                model.compile(loss='mse',
                              optimizer=RMSPropOptimizer(learning_rate=0.001))
                model.fit(x, y, epochs=1, batch_size=1)

                # test config
                model.get_config()
                model = keras.models.model_from_json(model.to_json())
                model.summary()

                # test stacked bidirectional layers
                model = keras.Sequential()
                model.add(
                    keras.layers.Bidirectional(rnn(output_dim,
                                                   return_sequences=True),
                                               merge_mode=mode,
                                               input_shape=(None, dim)))
                model.add(
                    keras.layers.Bidirectional(rnn(output_dim),
                                               merge_mode=mode))
                model.compile(loss='mse',
                              optimizer=RMSPropOptimizer(learning_rate=0.001))
                model.fit(x, y, epochs=1, batch_size=1)

                # test with functional API
                inputs = keras.Input((timesteps, dim))
                outputs = keras.layers.Bidirectional(rnn(output_dim),
                                                     merge_mode=mode)(inputs)
                model = keras.Model(inputs, outputs)
                model.compile(loss='mse',
                              optimizer=RMSPropOptimizer(learning_rate=0.001))
                model.fit(x, y, epochs=1, batch_size=1)

                # Bidirectional and stateful
                inputs = keras.Input(batch_shape=(1, timesteps, dim))
                outputs = keras.layers.Bidirectional(rnn(output_dim,
                                                         stateful=True),
                                                     merge_mode=mode)(inputs)
                model = keras.Model(inputs, outputs)
                model.compile(loss='mse',
                              optimizer=RMSPropOptimizer(learning_rate=0.001))
                model.fit(x, y, epochs=1, batch_size=1)
Example #57
0
    def testOpEdgeCases(self, gpu_only=True):
        if gpu_only and not test.is_gpu_available():
            tf_logging.info("Skipping OpEdgeCases tests.")
            return
        with self.cached_session() as sess, self.test_scope():
            # Illegal strides.
            with self.assertRaisesRegexp(
                    errors_impl.UnimplementedError,
                    ".*strides.*in the batch and depth dimensions"):
                sess.run(
                    fused_conv2d_bias_activation_op.
                    fused_conv2d_bias_activation(
                        _IotaNdF32Constant([1, 1, 1, 1]),
                        _IotaNdF32Constant([1, 1, 1, 1]),
                        _IotaNdF32Constant([1]),
                        strides=[2, 1, 1, 1],
                        padding="SAME",
                        activation_mode="Relu"))
            with self.assertRaisesRegexp(
                    errors_impl.UnimplementedError,
                    ".*strides.*in the batch and depth dimensions"):
                sess.run(
                    fused_conv2d_bias_activation_op.
                    fused_conv2d_bias_activation(
                        _IotaNdF32Constant([1, 1, 1, 1]),
                        _IotaNdF32Constant([1, 1, 1, 1]),
                        _IotaNdF32Constant([1]),
                        strides=[1, 1, 1, 2],
                        padding="SAME",
                        activation_mode="Relu"))

            # Illegal activation mode.
            with self.assertRaisesRegexp(ValueError,
                                         "Op passed string 'Tanh' not in:"):
                sess.run(
                    fused_conv2d_bias_activation_op.
                    fused_conv2d_bias_activation(
                        _IotaNdF32Constant([1, 1, 1, 1]),
                        _IotaNdF32Constant([1, 1, 1, 1]),
                        _IotaNdF32Constant([1]),
                        strides=[1, 1, 1, 1],
                        padding="SAME",
                        activation_mode="Tanh"))

            # Filter larger than input.
            with self.assertRaisesRegexp(ValueError,
                                         "Negative dimension size"):
                sess.run(
                    fused_conv2d_bias_activation_op.
                    fused_conv2d_bias_activation(
                        _IotaNdF32Constant([32, 20, 20, 3]),
                        _IotaNdF32Constant([20, 21, 3, 2]),
                        _IotaNdF32Constant([2]),
                        strides=[1, 1, 1, 1],
                        padding="VALID",
                        activation_mode="Relu"))
            with self.assertRaisesRegexp(ValueError,
                                         "Negative dimension size"):
                sess.run(
                    fused_conv2d_bias_activation_op.
                    fused_conv2d_bias_activation(
                        _IotaNdF32Constant([32, 20, 20, 3]),
                        _IotaNdF32Constant([21, 20, 3, 2]),
                        _IotaNdF32Constant([2]),
                        strides=[1, 1, 1, 1],
                        padding="VALID",
                        activation_mode="Relu"))
 def test_whether_there_is_a_gpu(self):
     self.assertEqual(len(replicate_model_fn._get_local_devices('GPU')),
                      test.is_gpu_available())
Example #59
0
 def _CompareBackward(self, x, rank, fft_length=None, use_placeholder=False):
   if test.is_gpu_available(cuda_only=True):
     super(RFFTOpsTest, self)._CompareBackward(x, rank, fft_length,
                                               use_placeholder)
Example #60
0
    def testError(self):
        for rank in VALID_FFT_RANKS:
            for dims in xrange(0, rank):
                x = np.zeros((1, ) * dims).astype(np.complex64)
                with self.assertRaisesWithPredicateMatch(
                        ValueError,
                        "Shape .* must have rank at least {}".format(rank)):
                    self._tfFFT(x, rank)
                with self.assertRaisesWithPredicateMatch(
                        ValueError,
                        "Shape .* must have rank at least {}".format(rank)):
                    self._tfIFFT(x, rank)
            for dims in xrange(rank, rank + 2):
                x = np.zeros((1, ) * rank)

                # Test non-rank-1 fft_length produces an error.
                fft_length = np.zeros((1, 1)).astype(np.int32)
                with self.assertRaisesWithPredicateMatch(
                        ValueError, "Shape .* must have rank 1"):
                    self._tfFFT(x, rank, fft_length)
                with self.assertRaisesWithPredicateMatch(
                        ValueError, "Shape .* must have rank 1"):
                    self._tfIFFT(x, rank, fft_length)

                # Test wrong fft_length length.
                fft_length = np.zeros((rank + 1, )).astype(np.int32)
                with self.assertRaisesWithPredicateMatch(
                        ValueError,
                        "Dimension must be .*but is {}.*".format(rank + 1)):
                    self._tfFFT(x, rank, fft_length)
                with self.assertRaisesWithPredicateMatch(
                        ValueError,
                        "Dimension must be .*but is {}.*".format(rank + 1)):
                    self._tfIFFT(x, rank, fft_length)

            # Test that calling the kernel directly without padding to fft_length
            # produces an error.
            rffts_for_rank = {
                1: [gen_spectral_ops.rfft, gen_spectral_ops.irfft],
                2: [gen_spectral_ops.rfft2d, gen_spectral_ops.irfft2d],
                3: [gen_spectral_ops.rfft3d, gen_spectral_ops.irfft3d]
            }
            rfft_fn, irfft_fn = rffts_for_rank[rank]
            with self.assertRaisesWithPredicateMatch(
                    errors.InvalidArgumentError,
                    "Input dimension .* must have length of at least 6 but got: 5"
            ):
                x = np.zeros((5, ) * rank).astype(np.float32)
                fft_length = [6] * rank
                with self.test_session():
                    rfft_fn(x, fft_length).eval()
            # TODO(rjryan): Remove when CPU-based IRFFT is supported.
            if test.is_gpu_available(cuda_only=True):
                with self.assertRaisesWithPredicateMatch(
                        errors.InvalidArgumentError,
                        "Input dimension .* must have length of at least .* but got: 3"
                ):
                    x = np.zeros((3, ) * rank).astype(np.complex64)
                    fft_length = [6] * rank
                    with self.test_session():
                        irfft_fn(x, fft_length).eval()