def testRandomSmallAndLargeComplex128(self): if test.is_built_with_rocm(): self.skipTest( "rocBLAS GEMM for complex datatype is not yet supported in ROCm" ) np.random.seed(42) for batch_dims in [(), (1, ), (3, ), (2, 2)]: for size in 8, 31, 32: shape = batch_dims + (size, size) matrix = np.random.uniform( low=-1.0, high=1.0, size=np.prod(shape)).reshape(shape).astype(np.complex128) self._verifyLogarithmComplex(matrix)
def _DtypesToTest(self, use_gpu): # double datatype is currently not supported for convolution ops # on the ROCm platform optional_float64 = [] if test.is_built_with_rocm() else [ dtypes.float64 ] if use_gpu: if not test_util.GpuSupportsHalfMatMulAndConv(): return optional_float64 + [dtypes.float32] else: # It is important that float32 comes before float16 here, # as we will be using its gradients as reference for fp16 gradients. return optional_float64 + [dtypes.float32, dtypes.float16] else: return optional_float64 + [dtypes.float32, dtypes.float16]
def testConvertToTensor(self): with self.session(force_gpu=True): dtypes_to_test = [dtypes.float16, dtypes.float32, dtypes.float64] if not test.is_built_with_rocm(): dtypes_to_test += [dtypes.complex64, dtypes.complex128] for data_type in dtypes_to_test: for segment_ids_type in [dtypes.int32, dtypes.int64]: values, indices, _ = self._input(data_type, segment_ids_type) sparse_value = indexed_slices.IndexedSlices( values, indices, dense_shape=values.shape) with self.assertRaisesRegex(errors_impl.UnimplementedError, self._UNSORTED_ERROR_MESSAGE): # convert_to_tensor with IndexedSlices uses unsorted_segment_sum result = ops.convert_to_tensor(sparse_value) self.evaluate(result)
def test_masking_with_stacking_LSTM(self): if test.is_built_with_rocm(): self.skipTest('Skipping the test as ROCm MIOpen does not ' 'support padded input yet.') inputs = np.random.random((2, 3, 4)) targets = np.abs(np.random.random((2, 3, 5))) targets /= targets.sum(axis=-1, keepdims=True) model = keras.models.Sequential() model.add(keras.layers.Masking(input_shape=(3, 4))) model.add(rnn.LSTM(10, return_sequences=True, unroll=False)) model.add(rnn.LSTM(5, return_sequences=True, unroll=False)) model.compile( loss='categorical_crossentropy', optimizer=gradient_descent.GradientDescentOptimizer(0.01)) model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1)
def testDepthwiseConv2DExplicit(self): for index, (input_size, filter_size, _, stride, padding, dilations) in enumerate(ConfigsToTestExplicit()): tf_logging.info( "Testing DepthwiseConv2D, %dth config: %r * %r, stride: %d, padding: " "%s", index, input_size, filter_size, stride, padding) # double datatype is currently not supported for convolution ops # on the ROCm platform optional_float64 = [] if test.is_built_with_rocm() else [dtypes.float64] data_formats = ["NHWC", "NCHW"] if test.is_gpu_available() else ["NHWC"] for data_type in [dtypes.float16, dtypes.float32] + optional_float64: for data_format in data_formats: self._VerifyValues( input_size, filter_size, stride, padding, data_type, use_gpu=True, data_format=data_format, dilations=dilations)
def test_float64_GRU(self): if test.is_built_with_rocm(): self.skipTest('Double type is yet not supported in ROCm') num_samples = 2 timesteps = 3 embedding_dim = 4 units = 2 testing_utils.layer_test( rnn.GRU, kwargs={'units': units, 'return_sequences': True, 'dtype': 'float64'}, input_shape=(num_samples, timesteps, embedding_dim), input_dtype='float64')
def testNestedCall(self): def fn(x, a): return x + a xla_func = def_function.function(fn, experimental_compile=True) def fn2(x, a): return xla_func(x, a) func = def_function.function(fn2, experimental_compile=False) inputs = constant_op.constant([1, 2, 2, 3, 3]) if not test.is_built_with_rocm(): # XLA support is not yet enabled for TF ROCm self.assertAllClose([2, 3, 3, 4, 4], func(inputs, 1))
def testNestedCallUnsupportedOps(self): def fn(x): return array_ops.unique(x).y xla_func = def_function.function(fn, experimental_compile=True) def fn2(x): return xla_func(x) func = def_function.function(fn2, experimental_compile=False) inputs = constant_op.constant([1, 2, 2, 3, 3]) if not test.is_built_with_rocm(): with self.assertRaisesRegexp(errors.InvalidArgumentError, 'not compilable'): func(inputs)
def testUnsupportedOps(self): with ops.Graph().as_default() as g: def fn(x): return array_ops.unique(x).y # Unique is not supported by XLA xla_func = def_function.function(fn, jit_compile=True) inputs = array_ops.placeholder(dtypes.float32, [5]) x = xla_func(inputs) # XLA support is not yet enabled for TF ROCm if not test.is_built_with_rocm(): with self.assertRaisesRegex(errors.InvalidArgumentError, "not compilable"): with session.Session(graph=g) as sess: sess.run(x, feed_dict={inputs: [1, 2, 2, 3, 3]})
def testDepthwiseConv2DFilterGradCompare(self): for index, (input_size, filter_size, output_size, stride, padding) in enumerate(ConfigsToTest()): tf_logging.info( "Testing DepthwiseConv2DFilterGradCompare, %dth config: %r * %r, " "stride: %d, padding: %s", index, input_size, filter_size, stride, padding) self._CompareBackpropFilterFloat(input_size, filter_size, output_size, stride, padding) # double datatype is currently not supported for convolution ops # on the ROCm platform if test.is_built_with_rocm(): continue self._CompareBackpropFilterDouble(input_size, filter_size, output_size, stride, padding)
def testSymmetricPositiveDefinite(self): if test.is_built_with_rocm(): self.skipTest( "ROCm does not support BLAS operations for complex types") # 2x2 matrices matrix1 = np.array([[2., 1.], [1., 2.]]) matrix2 = np.array([[3., -1.], [-1., 3.]]) matrix1 = matrix1.astype(np.complex64) matrix1 += 1j * matrix1 matrix2 = matrix2.astype(np.complex64) matrix2 += 1j * matrix2 self._verifyLogarithmComplex(matrix1) self._verifyLogarithmComplex(matrix2) # Complex batch self._verifyLogarithmComplex(self._makeBatch(matrix1, matrix2))
def test_Bidirectional_ragged_input(self, merge_mode): if test.is_built_with_rocm(): # ragged tenors are not supported in ROCM RNN implementation self.skipTest('Test not supported on the ROCm platform') np.random.seed(100) rnn = keras.layers.LSTM units = 3 x = ragged_factory_ops.constant( [[[1, 1, 1], [1, 1, 1]], [[1, 1, 1]], [[1, 1, 1], [1, 1, 1], [1, 1, 1], [1, 1, 1]], [[1, 1, 1], [1, 1, 1], [1, 1, 1]]], ragged_rank=1) x = math_ops.cast(x, 'float32') # pylint: disable=g-long-lambda with self.cached_session(): if merge_mode == 'ave': merge_func = lambda y, y_rev: (y + y_rev) / 2 elif merge_mode == 'concat': merge_func = lambda y, y_rev: ragged_concat_ops.concat( (y, y_rev), axis=-1) elif merge_mode == 'mul': merge_func = lambda y, y_rev: (y * y_rev) # pylint: enable=g-long-lambda inputs = keras.Input( shape=(None, 3), batch_size=4, dtype='float32', ragged=True) layer = keras.layers.Bidirectional( rnn(units, return_sequences=True), merge_mode=merge_mode) f_merged = keras.backend.function([inputs], layer(inputs)) f_forward = keras.backend.function([inputs], layer.forward_layer(inputs)) # TODO(kaftan): after KerasTensor refactor TF op layers should work # with many composite tensors, and this shouldn't need to be a lambda # layer. reverse_layer = core.Lambda(array_ops.reverse, arguments=dict(axis=[1])) f_backward = keras.backend.function( [inputs], reverse_layer(layer.backward_layer(inputs))) y_merged = f_merged(x) y_expected = merge_func( ragged_tensor.convert_to_tensor_or_ragged_tensor(f_forward(x)), ragged_tensor.convert_to_tensor_or_ragged_tensor(f_backward(x))) y_merged = ragged_tensor.convert_to_tensor_or_ragged_tensor(y_merged) self.assertAllClose(y_merged.flat_values, y_expected.flat_values)
def testPoolNC(self): if test.is_gpu_available(cuda_only=True): # "NC*" format is currently only supported on CUDA. with self.session(use_gpu=True): for padding in ["SAME", "VALID"]: self._test( input_shape=[2, 2, 9], window_shape=[2], padding=padding, pooling_type="MAX", strides=[1], dilation_rate=[1], data_format="NCW") self._test( input_shape=[2, 2, 9], window_shape=[2], padding=padding, pooling_type="MAX", strides=[2], dilation_rate=[1], data_format="NCW") self._test( input_shape=[2, 2, 7, 9], window_shape=[2, 2], padding=padding, pooling_type="MAX", strides=[1, 2], dilation_rate=[1, 1], data_format="NCHW") if test.is_built_with_rocm(): # Pooling with 3D tensors is not supported in ROCm continue self._test( input_shape=[2, 2, 7, 5, 3], window_shape=[2, 2, 2], padding=padding, pooling_type="MAX", strides=[1, 2, 1], dilation_rate=[1, 1, 1], data_format="NCDHW") self._test( input_shape=[2, 2, 7, 9], window_shape=[2, 2], padding="VALID", pooling_type="MAX", strides=[1, 1], dilation_rate=[2, 2], data_format="NCHW")
def testPoolNC(self): if test.is_gpu_available(cuda_only=True): # "NC*" format is currently only supported on CUDA. with self.test_session(use_gpu=True): for padding in ["SAME", "VALID"]: self._test( input_shape=[2, 2, 9], window_shape=[2], padding=padding, pooling_type="MAX", strides=[1], dilation_rate=[1], data_format="NCW") self._test( input_shape=[2, 2, 9], window_shape=[2], padding=padding, pooling_type="MAX", strides=[2], dilation_rate=[1], data_format="NCW") self._test( input_shape=[2, 2, 7, 9], window_shape=[2, 2], padding=padding, pooling_type="MAX", strides=[1, 2], dilation_rate=[1, 1], data_format="NCHW") if not test.is_built_with_rocm() : # MIOPEN currently does not support 5D tensors, # so skip this test when running with ROCm self._test( input_shape=[2, 2, 7, 5, 3], window_shape=[2, 2, 2], padding=padding, pooling_type="MAX", strides=[1, 2, 1], dilation_rate=[1, 1, 1], data_format="NCDHW") self._test( input_shape=[2, 2, 7, 9], window_shape=[2, 2], padding="VALID", pooling_type="MAX", strides=[1, 1], dilation_rate=[2, 2], data_format="NCHW")
def testGPU(self): if not test.is_gpu_available(cuda_only=True): return gpu_dev = test.gpu_device_name() ops.reset_default_graph() with ops.device(gpu_dev): tfprof_node, run_meta = _run_model() self.assertEqual(tfprof_node.children[0].name, 'MatMul') self.assertGreater(tfprof_node.children[0].exec_micros, 10) ret = _extract_node(run_meta, 'MatMul') self.assertEqual(len(ret['gpu:0']), 1) if not test.is_built_with_rocm(): # stream tracing is currently not available in tensorflow with ROCm self.assertEqual(len(ret['gpu:0/stream:all']), 1, '%s' % run_meta)
def test_grad_random(self, rank, extra_dims, size, np_rtype): # rfft3d/irfft3d do not have gradients yet. if rank == 3: return dims = rank + extra_dims tol = 1e-2 if np_rtype == np.float32 else 1e-10 re = np.random.rand(*((size,) * dims)).astype(np_rtype) * 2 - 1 im = np.random.rand(*((size,) * dims)).astype(np_rtype) * 2 - 1 self._check_grad_real(self._tf_fft_for_rank(rank), re, rtol=tol, atol=tol) if test.is_built_with_rocm(): # Fails on ROCm because of irfft peculairity return self._check_grad_complex( self._tf_ifft_for_rank(rank), re, im, result_is_complex=False, rtol=tol, atol=tol)
def testDepthwiseConv2DFilterGradCompare(self): for index, (input_size, filter_size, output_size, stride, padding) in enumerate(ConfigsToTest()): tf_logging.info( "Testing DepthwiseConv2DFilterGradCompare, %dth config: %r * %r, " "stride: %d, padding: %s", index, input_size, filter_size, stride, padding) self._CompareBackpropFilterFloat(input_size, filter_size, output_size, stride, padding) if test.is_built_with_rocm(): # CNN for double datatype not yet supported in ROCm continue self._CompareBackpropFilterDouble(input_size, filter_size, output_size, stride, padding)
def testMethodCompilationUnsupportedFunc(self): if test.is_built_with_rocm(): return with ops.device('device:{}:0'.format(self.device)): class C(object): @def_function.function(experimental_compile=True) def f1(self, x): return array_ops.unique(x).y inputs = constant_op.constant([1, 2, 2, 3, 3]) c = C() with self.assertRaisesRegex(errors.InvalidArgumentError, 'not compilable'): c.f1(inputs)
def test_with_masking_layer_LSTM(self, unroll): if test.is_built_with_rocm(): self.skipTest( 'Skipping the test as ROCm MIOpen does not support padded input.' ) layer_class = keras.layers.LSTM inputs = np.random.random((2, 3, 4)) targets = np.abs(np.random.random((2, 3, 5))) targets /= targets.sum(axis=-1, keepdims=True) model = keras.models.Sequential() model.add(keras.layers.Masking(input_shape=(3, 4))) model.add(layer_class(units=5, return_sequences=True, unroll=unroll)) model.compile(loss='categorical_crossentropy', optimizer='rmsprop', run_eagerly=testing_utils.should_run_eagerly()) model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1)
def testBasicInt32(self): with ops.Graph().as_default() as g: def fn(x, a): return x + a xla_func = def_function.function(fn, experimental_compile=True) inputs = array_ops.placeholder(dtypes.int32, [5]) # XLA support is not yet enabled for TF ROCm if not test.is_built_with_rocm(): x = xla_func(inputs, 1) with session.Session(graph=g) as sess: y = sess.run(x, feed_dict={inputs: [1, 2, 2, 3, 3]}) self.assertTrue(x.graph.as_graph_def().library.function[0]. attr["_XlaMustCompile"].b) self.assertAllClose([2, 3, 3, 4, 4], y)
def testSolveBatchComplex(self): if test.is_built_with_rocm(): self.skipTest( "TRSM operation for complex datatype not yet supported in ROCm" ) return matrix = np.array([[1., 2.], [3., 4.]]).astype(np.complex64) matrix += 1j * matrix rhs = np.array([[1., 0., 1.], [0., 1., 1.]]).astype(np.complex64) rhs += 1j * rhs # Batch of 2x3x2x2 matrices, 2x3x2x3 right-hand sides. self._verifySolveAllWaysComplex(matrix, rhs, batch_dims=[2, 3]) # Batch of 3x2x2x2 matrices, 3x2x2x3 right-hand sides. self._verifySolveAllWaysComplex(matrix, rhs, batch_dims=[3, 2])
def test_with_masking_layer_GRU(self): if test.is_built_with_rocm(): self.skipTest('MIOpen only supports packed input output') layer_class = keras.layers.GRU inputs = np.random.random((2, 3, 4)) targets = np.abs(np.random.random((2, 3, 5))) targets /= targets.sum(axis=-1, keepdims=True) model = keras.models.Sequential() model.add(keras.layers.Masking(input_shape=(3, 4))) model.add(layer_class(units=5, return_sequences=True, unroll=False)) model.compile(loss='categorical_crossentropy', optimizer='rmsprop', run_eagerly=testing_utils.should_run_eagerly(), experimental_run_tf_function=testing_utils. should_run_tf_function()) model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1)
def testMustBeConstantPropagation(self): if test.is_built_with_rocm(): return @def_function.function(experimental_compile=True) def f(): return constant_op.constant([0, 2, 1], dtype=dtypes.int32) @def_function.function(experimental_compile=True) def g(a, b): return array_ops.transpose(a, b) @def_function.function def z(): return g(array_ops.ones([3, 4, 3], dtype=dtypes.float32), f()) z()
def testGPU(self): if not test.is_gpu_available(cuda_only=True): return gpu_dev = test.gpu_device_name() ops.reset_default_graph() with ops.device(gpu_dev): tfprof_node, run_meta = _run_model() self.assertEqual(tfprof_node.children[0].name, 'MatMul') self.assertGreater(tfprof_node.children[0].exec_micros, 10) ret = _extract_node(run_meta, 'MatMul') self.assertEqual(len(ret['gpu:0']), 1) if not test.is_built_with_rocm(): # skip this check for the ROCm platform # stream level tracing is not yet supported on the ROCm platform self.assertEqual(len(ret['gpu:0/stream:all']), 1, '%s' % run_meta)
def test_float64_LSTM(self): if test.is_built_with_rocm(): self.skipTest('Skipping the test as ROCm MIOpen does not ' 'support float64 yet.') num_samples = 2 timesteps = 3 embedding_dim = 4 units = 2 testing_utils.layer_test(rnn.LSTM, kwargs={ 'units': units, 'return_sequences': True, 'dtype': 'float64' }, input_shape=(num_samples, timesteps, embedding_dim), input_dtype='float64')
def testDepthwiseConv2DInputGradExplicitCompare(self): for index, (input_size, filter_size, output_size, stride, padding, dilations) in enumerate(ConfigsToTestExplicit()): if dilations: continue tf_logging.info( "Testing DepthwiseConv2DInputGradCompare, %dth config: %r * %r, " "stride: %d, padding: %s", index, input_size, filter_size, stride, padding) self._CompareBackpropInput(input_size, filter_size, output_size, stride, padding, "float32") # double datatype is currently not supported for convolution ops # on the ROCm platform if test.is_built_with_rocm(): continue self._CompareBackpropInput(input_size, filter_size, output_size, stride, padding, "float64")
def testBasic(self): data = np.array([[4., -1., 2.], [-1., 6., 0], [10., 0., 5.]]) for dtype in (np.float32, np.float64): for output_idx_type in (dtypes.int32, dtypes.int64): self._verifyLu(data.astype(dtype), output_idx_type=output_idx_type) if not test.is_built_with_rocm(): # ROCm does not support BLAS operations for complex types for dtype in (np.complex64, np.complex128): for output_idx_type in (dtypes.int32, dtypes.int64): complex_data = np.tril(1j * data, -1).astype(dtype) complex_data += np.triu(-1j * data, 1).astype(dtype) complex_data += data self._verifyLu(complex_data, output_idx_type=output_idx_type)
def test_lstm_v2_feature_parity_with_canonical_lstm(self): if test.is_built_with_rocm(): self.skipTest('Skipping the test as ROCm MIOpen does not ' 'support padded input yet.') input_shape = 10 rnn_state_size = 8 timestep = 4 batch = 20 (x_train, y_train), _ = testing_utils.get_test_data(train_samples=batch, test_samples=0, input_shape=(timestep, input_shape), num_classes=rnn_state_size, random_seed=87654321) y_train = np_utils.to_categorical(y_train, rnn_state_size) # For the last batch item of the test data, we filter out the last # timestep to simulate the variable length sequence and masking test. x_train[-2:, -1, :] = 0.0 y_train[-2:] = 0 inputs = keras.layers.Input(shape=[timestep, input_shape], dtype=dtypes.float32) masked_input = keras.layers.Masking()(inputs) lstm_layer = rnn_v1.LSTM(rnn_state_size, recurrent_activation='sigmoid') output = lstm_layer(masked_input) lstm_model = keras.models.Model(inputs, output) weights = lstm_model.get_weights() y_1 = lstm_model.predict(x_train) lstm_model.compile('rmsprop', 'mse') lstm_model.fit(x_train, y_train) y_2 = lstm_model.predict(x_train) with testing_utils.device(should_use_gpu=True): cudnn_layer = rnn.LSTM(rnn_state_size) cudnn_model = keras.models.Model(inputs, cudnn_layer(masked_input)) cudnn_model.set_weights(weights) y_3 = cudnn_model.predict(x_train) cudnn_model.compile('rmsprop', 'mse') cudnn_model.fit(x_train, y_train) y_4 = cudnn_model.predict(x_train) self.assertAllClose(y_1, y_3, rtol=1e-5, atol=2e-5) self.assertAllClose(y_2, y_4, rtol=1e-5, atol=2e-5)
def testVariablesHVP(self, decorator): if test.is_built_with_rocm(): # TODO(rocm) # This test was recently added and has never passed on the # ROCm platform. Remove this skip once the test is passing again self.skipTest("NoFunction decorator test fails on the ROCm platform") class _Model(module.Module): def __init__(self): self._first_dense = core.Dense(18) self._conv = convolutional.Conv2D(2, 2) self._norm = normalization_v2.BatchNormalization() self._second_dense = core.Dense(1) def __call__(self, x): x = self._first_dense(x) x = nn_ops.relu(x) x = self._norm(x) x = nn_ops.relu(self._conv(array_ops.reshape(x, [-1, 2, 3, 3]))) return self._second_dense(x) model = _Model() def _loss(): input_value = constant_op.constant([[-0.5, 1.], [0.5, -1.]]) target = constant_op.constant([[-1.], [2.]]) return math_ops.reduce_sum((model(input_value) - target) ** 2.) @decorator def _compute_hvps(): with backprop.GradientTape() as tape: loss = _loss() vector = tape.gradient(loss, model.trainable_variables) variable_input_fn = lambda unused_variables: _loss() forward_over_back_hvp, = _hvp( variable_input_fn, [model.trainable_variables], [vector]) with backprop.GradientTape(persistent=True) as tape: tape.watch(model.trainable_variables) loss = _loss() first_grads = tape.gradient(loss, model.trainable_variables) back_over_back_hvp = tape.gradient( first_grads, model.trainable_variables, output_gradients=vector) return forward_over_back_hvp, back_over_back_hvp self.assertAllClose(*_compute_hvps(), rtol=1e-5, atol=1e-5)
def test_return_states_GRU(self): if test.is_built_with_rocm(): self.skipTest('Skipping the test as ROCm MIOpen does not ' 'support padded input yet.') layer_class = rnn.GRU x = np.random.random((2, 3, 4)) y = np.abs(np.random.random((2, 5))) s = np.abs(np.random.random((2, 5))) inputs = keras.layers.Input(shape=[3, 4], dtype=dtypes.float32) masked = keras.layers.Masking()(inputs) outputs, states = layer_class(units=5, return_state=True)(masked) model = keras.models.Model(inputs, [outputs, states]) model.compile( loss='categorical_crossentropy', optimizer=gradient_descent.GradientDescentOptimizer(0.001)) model.fit(x, [y, s], epochs=1, batch_size=2, verbose=1)