def initParams(self): self.op_type = "softmax_with_cross_entropy" self.python_api = python_api self.python_out_sig = ["Loss", "Softmax"] self.numeric_stable_mode = True self.soft_label = False self.shape = [3, 5, 7, 11] self.axis = -1 self.ignore_index = -1 self.dtype = np.float32 if core.is_compiled_with_rocm() else np.float64 self.logits = np.full(self.shape, 1000.0).astype(self.dtype) self.logits[:, :, 0, :] = -1000.0 self.use_softmax = True
def test_check_grad(self): if core.is_compiled_with_rocm(): # HIP will have accuracy fail when using float32 in CPU place if self.python_api is not None: self.check_grad(["Logits"], "Loss", max_relative_error=0.1, check_eager=True) self.check_grad(["Logits"], "Loss", max_relative_error=0.1) else: if self.python_api is not None: self.check_grad(["Logits"], "Loss", check_eager=True) self.check_grad(["Logits"], "Loss")
def test_check_grad(self): self.outputs['WarpCTCGrad'] = self.gradient if core.is_compiled_with_rocm(): self.check_grad( ["Logits"], "Loss", max_relative_error=0.009, check_dygraph=False) else: self.check_grad( ["Logits"], "Loss", max_relative_error=0.007, check_dygraph=False)
def func(self, place): shape = [2, 2, 3, 3] eps = 0.005 dtype = np.float64 if core.is_compiled_with_rocm(): dtype = np.float32 x = layers.data('x', shape, False, dtype) y = layers.conv2d_transpose( input=x, num_filters=2, filter_size=1, padding=[1, 0, 0, 1], bias_attr=False, use_cudnn=True) x_arr = np.random.uniform(-1, 1, shape).astype(dtype) w = fluid.default_main_program().global_block().all_parameters() w_arr = [] for p in w: w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype)) if core.is_compiled_with_rocm(): # HIP will sometimes fail if no atol gradient_checker.double_grad_check( [x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps, atol=1e-4) else: gradient_checker.double_grad_check( [x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps) gradient_checker.double_grad_check_for_dygraph( self.conv_transpose_wrapper, [x] + w, y, x_init=[x_arr] + w_arr, place=place)
def _build_program(self, place, layout, seed, sync_bn=False, only_forward=False): """Build program.""" main = fluid.Program() startup = fluid.Program() main.random_seed = seed startup.random_seed = seed use_cudnn = self.dtype == np.float16 with fluid.unique_name.guard(): with fluid.program_guard(main, startup): data = fluid.layers.data( name='input', shape=self.dshape, dtype=self.dtype, append_batch_size=False) conv = fluid.layers.conv2d( input=data, num_filters=32, filter_size=1, param_attr=fluid.ParamAttr(name='conv2d_weight'), bias_attr=False, use_cudnn=use_cudnn) bn = fluid.layers.batch_norm( conv, param_attr=fluid.ParamAttr(name='bn_scale'), bias_attr=fluid.ParamAttr(name='bn_bias'), moving_mean_name='bn_moving_mean', moving_variance_name='bn_moving_variance', data_layout=layout, is_test=only_forward) if core.is_compiled_with_rocm(): bn = fluid.layers.cast(bn, 'float32') else: bn = fluid.layers.cast(bn, 'float64') sigmoid = fluid.layers.sigmoid(bn) out = fluid.layers.reduce_sum(sigmoid) if not sync_bn: out = out / core.get_cuda_device_count() if not only_forward: sgd_opt = fluid.optimizer.SGD(learning_rate=0.0) sgd_opt.backward(out) return main, startup, [out, conv, bn]
def test_3d(self): for p in self.places: with fluid.dygraph.guard(p): x = paddle.randn([2, 6, 6, 6, 4]) net1 = paddle.nn.BatchNorm3D(4, data_format="NDHWC") net2 = paddle.nn.BatchNorm3D(4) net2.weight = net1.weight net2.bias = net1.bias y1 = net1(x) channel_first_x = paddle.transpose(x, [0, 4, 1, 2, 3]) y2 = net2(channel_first_x) y2 = paddle.transpose(y2, [0, 2, 3, 4, 1]) if core.is_compiled_with_rocm(): # HIP will fail if no atol self.assertEqual( np.allclose( y1.numpy(), y2.numpy(), atol=1e-07), True) else: self.assertEqual(np.allclose(y1.numpy(), y2.numpy()), True)
def setUp(self): self.op_type = "softmax" self.use_cudnn = False self.use_mkldnn = False # explicilty use float32 for ROCm, as MIOpen does not yet support float64 self.dtype = np.float32 if core.is_compiled_with_rocm() else np.float64 self.init_kernel_type() self.shape = self.get_x_shape() self.axis = self.get_axis() np.random.seed(0) x = np.random.uniform(0.1, 1, self.shape).astype(self.dtype) out = np.apply_along_axis(stable_softmax, self.axis, x) self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)} self.outputs = {'Out': out} self.attrs = { 'axis': self.axis, 'use_cudnn': self.use_cudnn, 'use_mkldnn': self.use_mkldnn }
def setUp(self): self.op_type = "pool3d" self.init_kernel_type() self.dtype = np.float32 if core.is_compiled_with_rocm() else np.float64 self.init_test_case() self.padding_algorithm = "EXPLICIT" self.init_paddings() self.init_global_pool() self.init_kernel_type() self.init_pool_type() self.init_ceil_mode() self.init_exclusive() self.init_adaptive() self.init_data_format() self.init_shape() paddle.enable_static() input = np.random.random(self.shape).astype(self.dtype) output = pool3D_forward_naive( input, self.ksize, self.strides, self.paddings, self.global_pool, self.ceil_mode, self.exclusive, self.adaptive, self.data_format, self.pool_type, self.padding_algorithm).astype(self.dtype) self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(input)} self.attrs = { 'strides': self.strides, 'paddings': self.paddings, 'ksize': self.ksize, 'pooling_type': self.pool_type, 'global_pooling': self.global_pool, 'use_cudnn': self.use_cudnn, 'ceil_mode': self.ceil_mode, 'data_format': self.data_format, 'exclusive': self.exclusive, 'adaptive': self.adaptive, "padding_algorithm": self.padding_algorithm, } self.outputs = {'Out': output}
def setUp(self): # init as conv transpose self.dtype = np.float32 if core.is_compiled_with_rocm() else np.float64 self.need_check_grad = True self.is_test = False self.use_cudnn = False self.use_mkldnn = False self.output_size = None self.output_padding = [] self.data_format = "NCHW" self.pad = [0, 0] self.padding_algorithm = "EXPLICIT" self.init_op_type() self.init_test_case() input_ = np.random.random(self.input_size).astype(self.dtype) filter_ = np.random.random(self.filter_size).astype(self.dtype) self.inputs = {'Input': input_, 'Filter': filter_} self.attrs = { 'strides': self.stride, 'paddings': self.pad, 'padding_algorithm': self.padding_algorithm, 'groups': self.groups, 'dilations': self.dilations, 'use_cudnn': self.use_cudnn, 'is_test': self.is_test, 'use_mkldnn': self.use_mkldnn, 'data_format': self.data_format } if self.output_size is not None: self.attrs['output_size'] = self.output_size if len(self.output_padding) > 0: self.attrs['output_padding'] = self.output_padding output = conv2dtranspose_forward_naive(input_, filter_, self.attrs).astype(self.dtype) self.outputs = {'Output': output}
def main(): sys.path.append(os.getcwd()) if core.is_compiled_with_cuda() or core.is_compiled_with_rocm(): if (os.getenv('FLAGS_enable_gpu_memory_usage_log') == None): os.environ['FLAGS_enable_gpu_memory_usage_log'] = 'true' os.environ['FLAGS_enable_gpu_memory_usage_log_mb'] = 'false' some_test_failed = False for module_name in sys.argv[1:]: flag_need_static_mode = False if module_name in static_mode_white_list.STATIC_MODE_TESTING_LIST: flag_need_static_mode = True paddle.enable_static() buffer = cStringIO() main = fluid.Program() startup = fluid.Program() scope = fluid.core.Scope() with fluid.program_guard(main, startup): with fluid.scope_guard(scope): with fluid.unique_name.guard(): test_loader = unittest.TestLoader() module = importlib.import_module(module_name) tests = test_loader.loadTestsFromModule(module) res = unittest.TextTestRunner(stream=buffer).run(tests) if not res.wasSuccessful(): some_test_failed = True print( module_name, 'failed\n', buffer.getvalue(), file=sys.stderr) if flag_need_static_mode: paddle.disable_static() if some_test_failed: exit(1)
def setUp(self): self.op_type = "sequence_softmax" self.use_cudnn = False self.init_op_type() self.dtype = "float32" if core.is_compiled_with_rocm() else "float64" x = np.random.uniform(0.1, 1, (110, 1)).astype(self.dtype) self.init_lod() out = np.zeros((110, 1)).astype(self.dtype) offset = 0 for i in range(len(self.lod[0])): if (self.lod[0][i] == 0): continue sub_x = x[offset:offset + self.lod[0][i], :] sub_x = sub_x.reshape(1, self.lod[0][i]) sub_out = stable_softmax(sub_x) out[offset:offset + self.lod[0][i], :] = sub_out.reshape( self.lod[0][i], 1) offset += self.lod[0][i] self.inputs = {"X": (x, self.lod)} self.outputs = {"Out": out} self.attrs = { 'use_cudnn': self.use_cudnn, }
def setUp(self): self.op_type = "rnn" self.dtype = "float32" if core.is_compiled_with_rocm() else "float64" self.sequence_length = None if core.is_compiled_with_rocm( ) else np.array([12, 11, 10, 9, 8, 7, 6, 5], dtype=np.int32) self.num_layers = 1 self.is_bidirec = False self.is_test = False self.mode = "GRU" self.dropout = 0. seq_length = 12 batch_size = 8 input_size = 4 self.hidden_size = 2 self.set_attrs() self.direction_num = 2 if self.is_bidirec else 1 direction = "bidirectional" if self.is_bidirec else "forward" input = np.random.uniform(low=-0.1, high=0.1, size=(seq_length, batch_size, input_size)).astype(self.dtype) if self.sequence_length is not None: input[3][1:][:] = 0 input[4][2:][:] = 0 input[2][3:][:] = 0 input[1][4:][:] = 0 rnn1 = GRU(input_size, self.hidden_size, num_layers=self.num_layers, time_major=True, direction=direction, dropout=self.dropout, dtype=self.dtype) flat_w = get_params_for_net(rnn1) output, last_hidden = rnn1(input, sequence_length=self.sequence_length) if core.is_compiled_with_rocm(): def rocm_rnn_get_place(): places = [core.CUDAPlace(0)] return places self._get_places = rocm_rnn_get_place init_h = np.zeros((self.num_layers * self.direction_num, batch_size, self.hidden_size)).astype(self.dtype) state_out = np.ndarray((300)).astype("uint8") self.inputs = { 'Input': input, 'WeightList': flat_w, 'PreState': [('init_h', init_h)], 'SequenceLength': self.sequence_length } if self.sequence_length is None: self.inputs = { 'Input': input, 'WeightList': flat_w, 'PreState': [('init_h', init_h)], } self.attrs = { 'dropout_prob': self.dropout, 'is_bidirec': self.is_bidirec, 'input_size': input_size, 'hidden_size': self.hidden_size, 'num_layers': self.num_layers, 'is_test': self.is_test, 'mode': self.mode } self.outputs = { 'Out': output, 'State': [('last_hidden', last_hidden)], 'Reserve': np.ndarray((400)).astype("uint8"), 'DropoutState': state_out }
def test_check_grad(self): places = [fluid.CPUPlace()] if core.is_compiled_with_cuda() and (not core.is_compiled_with_rocm()): places.append(fluid.CUDAPlace(0)) for p in places: self.func(p)
def setUp(self): self.places = [fluid.CPUPlace()] if core.is_compiled_with_cuda() and (not core.is_compiled_with_rocm()): self.places.append(fluid.CUDAPlace(0))
def test_check_grad(self): if core.is_compiled_with_rocm(): # HIP will have accuracy fail when using float32 in CPU place self.check_grad(["Logits"], "Loss", max_relative_error=0.1) else: self.check_grad(["Logits"], "Loss")
# Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import os from paddle.fluid.core import is_compiled_with_cuda, is_compiled_with_rocm, CUDAPlace if is_compiled_with_cuda() and not is_compiled_with_rocm(): from paddle.fluid.core import CUDAGraph as CoreCUDAGraph else: CoreCUDAGraph = None class CUDAGraph: def __init__(self, place=None, mode="thread_local"): assert CoreCUDAGraph is not None, "CUDA Graph is only supported on PaddlePaddle compiled with NVIDIA GPU." ALL_MODES = ["global", "thread_local", "relaxed"] self._graph = None if place is None: device_id = int(os.environ.get('FLAGS_selected_gpus', 0)) place = CUDAPlace(device_id) self._place = place
fluid.layers.conv2d(input=input, num_filters=0, filter_size=0, stride=0, padding=0, dilation=0, groups=0, use_cudnn=False, data_format="NCHW") self.assertRaises(ValueError, run_1) # --------- test environment variable ------ @unittest.skipIf( not (core.is_compiled_with_cuda() or core.is_compiled_with_rocm()), "core is not compiled with CUDA or ROCM") class TestConv2DEnviron(unittest.TestCase): def run1(self, place): with fluid.program_guard(fluid.Program(), fluid.Program()): inputs = fluid.layers.data(shape=[2, 3, 5, 5], append_batch_size=False, name="inputs", dtype="float32") result = fluid.layers.conv2d(input=inputs, num_filters=4, filter_size=[3, 3], stride=[1, 1], padding=0, dilation=[1, 1], groups=1,
def init_kernel_type(self): self.dtype = "float32" if core.is_compiled_with_rocm() else "float64"
create_test_padding_SAME_class(TestDepthwiseConvWithDilation_AsyPadding) create_test_padding_SAME_class(TestDepthwiseConvandFuse_AsyPadding) create_test_padding_SAME_class(TestDepthwiseConvWithDilationandFuse_AsyPadding) create_test_padding_VALID_class(TestDepthwiseConv_AsyPadding) create_test_padding_VALID_class(TestDepthwiseConvWithDilation_AsyPadding) create_test_padding_VALID_class(TestDepthwiseConvandFuse_AsyPadding) create_test_padding_VALID_class( TestDepthwiseConvWithDilationandFuse_AsyPadding) # channel last create_test_channel_last_class(TestDepthwiseConv_AsyPadding) create_test_channel_last_class(TestDepthwiseConvWithDilation2_AsyPadding) create_test_channel_last_class(TestDepthwiseConvandFuse_AsyPadding) create_test_channel_last_class(TestDepthwiseConvWithDilationandFuse_AsyPadding) # ------------ depthwise conv2d in MIOPEN --------- if core.is_compiled_with_rocm(): create_test_cudnn_padding_SAME_class(TestDepthwiseConv_AsyPadding) create_test_cudnn_padding_SAME_class( TestDepthwiseConvWithDilation_AsyPadding) create_test_padding_VALID_class(TestDepthwiseConv_AsyPadding) create_test_padding_VALID_class(TestDepthwiseConvWithDilation_AsyPadding) create_test_cudnn_channel_last_class(TestDepthwiseConv_AsyPadding) create_test_cudnn_channel_last_class( TestDepthwiseConvWithDilation2_AsyPadding) if __name__ == '__main__': unittest.main()
def init_kernel_type(self): self.use_cudnn = True self.exhaustive_search = True self.dtype = np.float32 if core.is_compiled_with_rocm() else np.float64
def _check_mlp(self, place=None): seed = 90 batch_size = 128 if place == None: place = fluid.CPUPlace() if not core.is_compiled_with_cuda( ) else fluid.CUDAPlace(0) with fluid.dygraph.guard(place): paddle.seed(seed) paddle.framework.random._manual_program_seed(seed) mlp = MLP() optimizer = self.get_optimizer_dygraph( parameter_list=mlp.parameters()) batch_py_reader = fluid.io.PyReader(capacity=1) batch_py_reader.decorate_sample_list_generator( paddle.batch( self.reader_decorator(paddle.dataset.mnist.train()), batch_size=batch_size, drop_last=True), places=fluid.CPUPlace()) dy_param_init_value = {} for batch_id, data in enumerate(batch_py_reader()): if batch_id >= self.batch_num: break img = data[0] label = data[1] label.stop_gradient = True img = fluid.layers.reshape(img, shape=[batch_size, -1]) cost = mlp(img) avg_loss = fluid.layers.reduce_mean(cost) dy_out = avg_loss.numpy() if batch_id == 0: for param in mlp.parameters(): dy_param_init_value[param.name] = param.numpy() avg_loss.backward() optimizer.minimize(avg_loss) mlp.clear_gradients() dy_param_value = {} for param in mlp.parameters(): dy_param_value[param.name] = param.numpy() with new_program_scope(): paddle.seed(seed) paddle.framework.random._manual_program_seed(seed) if place == None: place = fluid.CPUPlace() if not core.is_compiled_with_cuda( ) else fluid.CUDAPlace(0) exe = fluid.Executor(place) mlp = MLP() optimizer = self.get_optimizer() train_reader = paddle.batch( paddle.dataset.mnist.train(), batch_size=128, drop_last=True) img = fluid.layers.data( name='pixel', shape=[1, 28, 28], dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') img = fluid.layers.reshape(img, shape=[batch_size, 784]) cost = mlp(img) avg_loss = fluid.layers.reduce_mean(cost) optimizer.minimize(avg_loss) # initialize params and fetch them static_param_init_value = {} static_param_name_list = [] for param in mlp.parameters(): static_param_name_list.append(param.name) out = exe.run(fluid.default_startup_program(), fetch_list=static_param_name_list) for i in range(len(static_param_name_list)): static_param_init_value[static_param_name_list[i]] = out[i] for batch_id, data in enumerate(train_reader()): if batch_id >= self.batch_num: break static_x_data = np.array( [x[0].reshape(1, 28, 28) for x in data]).astype('float32') y_data = np.array([x[1] for x in data]).astype('int64').reshape( [128, 1]) fetch_list = [avg_loss.name] fetch_list.extend(static_param_name_list) out = exe.run(fluid.default_main_program(), feed={"pixel": static_x_data, "label": y_data}, fetch_list=fetch_list) static_param_value = {} static_out = out[0] for i in range(1, len(out)): static_param_value[static_param_name_list[i - 1]] = out[i] for key, value in six.iteritems(static_param_init_value): self.assertTrue(np.allclose(value, dy_param_init_value[key])) if core.is_compiled_with_rocm(): self.assertTrue(np.allclose(static_out, dy_out, atol=1e-3)) else: self.assertTrue(np.allclose(static_out, dy_out)) for key, value in six.iteritems(static_param_value): if core.is_compiled_with_rocm(): self.assertTrue( np.allclose( value, dy_param_value[key], atol=1e-3)) else: self.assertTrue(np.allclose(value, dy_param_value[key]))
def setUp(self): self.op_type = "rnn" self.dtype = "float32" if core.is_compiled_with_rocm() else "float64" self.sequence_length = None if core.is_compiled_with_rocm( ) else np.array( [12, 11, 10, 9, 8], dtype=np.int32) self.num_layers = 1 self.is_bidirec = False self.is_test = False self.mode = "RNN_TANH" self.dropout = 0. self.set_attrs() self.direction_num = 2 if self.is_bidirec else 1 direction = "bidirectional" if self.is_bidirec else "forward" seq_length = 12 batch_size = 5 input_size = 3 hidden_size = 2 input = np.random.uniform( low=-0.1, high=0.1, size=(seq_length, batch_size, input_size)).astype(self.dtype) if self.sequence_length is not None: input[11][1:][:] = 0 input[10][2:][:] = 0 input[9][3:][:] = 0 input[8][4:][:] = 0 rnn1 = SimpleRNN( input_size, hidden_size, num_layers=self.num_layers, time_major=True, direction=direction, dropout=self.dropout, nonlinearity=self.mode, dtype=self.dtype) flat_w = get_params_for_net(rnn1) output, last_hidden = rnn1(input, sequence_length=self.sequence_length) init_h = np.zeros((self.num_layers * self.direction_num, batch_size, hidden_size)).astype(self.dtype) state_out = np.ndarray((300)).astype("uint8") self.inputs = { 'Input': input, 'WeightList': flat_w, 'PreState': [('init_h', init_h)], 'SequenceLength': self.sequence_length } if self.sequence_length is None: self.inputs = { 'Input': input, 'WeightList': flat_w, 'PreState': [('init_h', init_h)] } self.attrs = { 'dropout_prob': self.dropout, 'is_bidirec': self.is_bidirec, 'input_size': input_size, 'hidden_size': hidden_size, 'num_layers': self.num_layers, 'is_test': self.is_test, 'mode': self.mode } self.outputs = { 'Out': output, 'State': [('last_hidden', last_hidden)], 'Reserve': np.ndarray((400)).astype("uint8"), 'DropoutState': state_out }
fluid.layers.less_than, x=x, y=y, force_cpu=1) op = eval("fluid.layers.%s" % self.op_type) self.assertRaises(TypeError, op, x=x, y=y, cond=1) self.assertRaises(TypeError, op, x=x, y=a) self.assertRaises(TypeError, op, x=a, y=y) cls_name = "{0}_{1}".format(op_type, typename) Cls.__name__ = cls_name globals()[cls_name] = Cls for _type_name in {'float32', 'float64', 'int32', 'int64'}: if _type_name == 'float64' and core.is_compiled_with_rocm(): _type_name = 'float32' create_test_class('less_than', _type_name, lambda _a, _b: _a < _b) create_test_class('less_equal', _type_name, lambda _a, _b: _a <= _b) create_test_class('greater_than', _type_name, lambda _a, _b: _a > _b) create_test_class('greater_equal', _type_name, lambda _a, _b: _a >= _b) create_test_class('equal', _type_name, lambda _a, _b: _a == _b) create_test_class('not_equal', _type_name, lambda _a, _b: _a != _b) def create_paddle_case(op_type, callback): class PaddleCls(unittest.TestCase): def setUp(self): self.op_type = op_type self.input_x = np.array([1, 2, 3, 4]).astype(np.int64)
def init_data_type(self): self.data_type = 'float32' if core.is_compiled_with_rocm( ) else 'float64'
def set_attr(self): self.dtype = "float32" if core.is_compiled_with_rocm() else "float64" self.use_tag = True self.bz, self.len, self.ntags = 4, 8, 10
def init_data_type(self): self.data_type = np.float32 if core.is_compiled_with_rocm( ) else np.float64