def test_check_grad(self): self.calculate_grads() self.check_grad_with_place(core.CPUPlace(), ["X"], "Out", user_defined_grads=[self.dx], user_defined_grad_outputs=[self.dout])
def test_check_output(self): if platform.system() == "Linux": self.check_output_with_place(place=core.CPUPlace(), atol=1e-7) else: self.check_output_with_place(place=core.CPUPlace(), atol=1e-5)
def place(self): return core.CPUPlace()
def test_check_output_cpu(self): try: self.check_output_with_place(place=core.CPUPlace()) except: print("do not support cpu test, skip")
def test_tensor_ptr(self): t = core.Tensor() np_arr = numpy.zeros([2, 3]) t.set(np_arr, core.CPUPlace()) self.assertGreater(t._ptr(), 0)
def test_fibonacci(self): """ Mimics Fibonacci Go example: https://tour.golang.org/concurrency/5 """ with framework.program_guard(framework.Program()): quit_ch_input_var = self._create_persistable_tensor( 'quit_ch_input', core.VarDesc.VarType.LOD_TENSOR, core.VarDesc.VarType.INT32) quit_ch_input = fill_constant(shape=[1], dtype=core.VarDesc.VarType.INT32, value=0, out=quit_ch_input_var) result = self._create_persistable_tensor( 'result', core.VarDesc.VarType.LOD_TENSOR, core.VarDesc.VarType.INT32) fill_constant(shape=[1], dtype=core.VarDesc.VarType.INT32, value=0, out=result) x = fill_constant(shape=[1], dtype=core.VarDesc.VarType.INT32, value=0) y = fill_constant(shape=[1], dtype=core.VarDesc.VarType.INT32, value=1) while_cond = fill_constant(shape=[1], dtype=core.VarDesc.VarType.BOOL, value=True) while_false = fill_constant(shape=[1], dtype=core.VarDesc.VarType.BOOL, value=False) x_tmp = fill_constant(shape=[1], dtype=core.VarDesc.VarType.INT32, value=0) def fibonacci(channel, quit_channel): while_op = While(cond=while_cond) with while_op.block(): result2 = fill_constant(shape=[1], dtype=core.VarDesc.VarType.INT32, value=0) with fluid.Select() as select: with select.case(fluid.channel_send, channel, x, is_copy=True): assign(input=x, output=x_tmp) assign(input=y, output=x) assign(elementwise_add(x=x_tmp, y=y), output=y) with select.case(fluid.channel_recv, quit_channel, result2): # Quit helper = layer_helper.LayerHelper('assign') helper.append_op(type='assign', inputs={'X': [while_false]}, outputs={'Out': [while_cond]}) ch1 = fluid.make_channel(dtype=core.VarDesc.VarType.LOD_TENSOR) quit_ch = fluid.make_channel(dtype=core.VarDesc.VarType.LOD_TENSOR) with fluid.Go(): for i in range(10): fluid.channel_recv(ch1, result) Print(result) fluid.channel_send(quit_ch, quit_ch_input) fibonacci(ch1, quit_ch) fluid.channel_close(ch1) fluid.channel_close(quit_ch) cpu = core.CPUPlace() exe = Executor(cpu) exe_result = exe.run(fetch_list=[result]) self.assertEqual(exe_result[0][0], 34)
def test_w_is_selected_rows(self): places = [core.CPUPlace()] # currently only support CPU for place in places: self.check_with_place(place)
def train(): args = parse_args() if args.enable_ce: framework.default_startup_program().random_seed = 111 # Training process if args.no_attention: avg_cost, feed_order = no_attention_model.seq_to_seq_net( args.embedding_dim, args.encoder_size, args.decoder_size, args.dict_size, args.dict_size, False, beam_size=args.beam_size, max_length=args.max_length) else: avg_cost, feed_order = attention_model.seq_to_seq_net( args.embedding_dim, args.encoder_size, args.decoder_size, args.dict_size, args.dict_size, False, beam_size=args.beam_size, max_length=args.max_length) # clone from default main program and use it as the validation program main_program = fluid.default_main_program() inference_program = fluid.default_main_program().clone() optimizer = fluid.optimizer.Adam( learning_rate=args.learning_rate, regularization=fluid.regularizer.L2DecayRegularizer( regularization_coeff=1e-5)) optimizer.minimize(avg_cost) # Disable shuffle for Continuous Evaluation only if not args.enable_ce: train_batch_generator = paddle.batch(paddle.reader.shuffle( paddle.dataset.wmt14.train(args.dict_size), buf_size=1000), batch_size=args.batch_size, drop_last=False) test_batch_generator = paddle.batch(paddle.reader.shuffle( paddle.dataset.wmt14.test(args.dict_size), buf_size=1000), batch_size=args.batch_size, drop_last=False) else: train_batch_generator = paddle.batch(paddle.dataset.wmt14.train( args.dict_size), batch_size=args.batch_size, drop_last=False) test_batch_generator = paddle.batch(paddle.dataset.wmt14.test( args.dict_size), batch_size=args.batch_size, drop_last=False) place = core.CUDAPlace(0) if args.use_gpu else core.CPUPlace() exe = Executor(place) exe.run(framework.default_startup_program()) feed_list = [ main_program.global_block().var(var_name) for var_name in feed_order ] feeder = fluid.DataFeeder(feed_list, place) def validation(): # Use test set as validation each pass total_loss = 0.0 count = 0 val_feed_list = [ inference_program.global_block().var(var_name) for var_name in feed_order ] val_feeder = fluid.DataFeeder(val_feed_list, place) for batch_id, data in enumerate(test_batch_generator()): val_fetch_outs = exe.run(inference_program, feed=val_feeder.feed(data), fetch_list=[avg_cost], return_numpy=False) total_loss += np.array(val_fetch_outs[0])[0] count += 1 return total_loss / count for pass_id in range(1, args.pass_num + 1): pass_start_time = time.time() words_seen = 0 for batch_id, data in enumerate(train_batch_generator()): words_seen += len(data) * 2 fetch_outs = exe.run(framework.default_main_program(), feed=feeder.feed(data), fetch_list=[avg_cost]) avg_cost_train = np.array(fetch_outs[0]) print('pass_id=%d, batch_id=%d, train_loss: %f' % (pass_id, batch_id, avg_cost_train)) # This is for continuous evaluation only if args.enable_ce and batch_id >= 100: break pass_end_time = time.time() test_loss = validation() time_consumed = pass_end_time - pass_start_time words_per_sec = words_seen / time_consumed print("pass_id=%d, test_loss: %f, words/s: %f, sec/pass: %f" % (pass_id, test_loss, words_per_sec, time_consumed)) # This log is for continuous evaluation only if args.enable_ce: print("kpis\ttrain_cost\t%f" % avg_cost_train) print("kpis\ttest_cost\t%f" % test_loss) print("kpis\ttrain_duration\t%f" % time_consumed) if pass_id % args.save_interval == 0: model_path = os.path.join(args.save_dir, str(pass_id)) if not os.path.isdir(model_path): os.makedirs(model_path) fluid.io.save_persistables( executor=exe, dirname=model_path, main_program=framework.default_main_program())
def create_tensor(scope, name, np_data): tensor = scope.var(name).get_tensor() tensor.set(np_data, core.CPUPlace()) return tensor
def check_with_place(self, place, data_layout, dtype, shape): epsilon = 0.00001 if len(shape) == 2: x_shape = shape c = x_shape[1] else: n, h, w, c = shape[0], shape[1], shape[2], shape[3] if data_layout == "NHWC": x_shape = [n, h, w, c] elif data_layout == "NCHW": x_shape = [n, c, h, w] else: raise ValueError("Unknown data layout.") scale_shape = [c] x_val = np.random.random_sample(x_shape).astype(dtype) # generate some negative values to test case with relu fused x_val = x_val - 0.5 scale_val = np.random.random_sample(scale_shape).astype(np.float32) bias_val = np.random.random_sample(scale_shape).astype(np.float32) mean = np.zeros(scale_shape).astype(np.float32) variance = np.ones(scale_shape).astype(np.float32) y_out = _reference_testing(x_val, scale_val, bias_val, mean, variance, epsilon, data_layout).astype(dtype) if self.fuse_with_relu: y_out = np.maximum(y_out, 0) scope = core.Scope() # create input x_tensor = create_or_get_tensor(scope, "x_val", OpTest.np_dtype_to_fluid_dtype(x_val), place) scale_tensor = create_or_get_tensor( scope, "scale_val", OpTest.np_dtype_to_fluid_dtype(scale_val), place) bias_tensor = create_or_get_tensor( scope, "bias_val", OpTest.np_dtype_to_fluid_dtype(bias_val), place) mean_tensor = create_or_get_tensor( scope, "mean", OpTest.np_dtype_to_fluid_dtype(mean), place) variance_tensor = create_or_get_tensor( scope, "variance", OpTest.np_dtype_to_fluid_dtype(variance), place) # create output y_tensor = create_or_get_tensor(scope, "y_out", None, place) saved_mean_tensor = create_or_get_tensor(scope, "saved_mean", None, place) saved_variance_tensor = create_or_get_tensor(scope, "saved_variance", None, place) mean_out_tensor = mean_tensor variance_out_tensor = variance_tensor batch_norm_op = Operator( "batch_norm", # inputs X="x_val", Scale="scale_val", Bias="bias_val", Mean="mean", Variance="variance", # outputs Y="y_out", MeanOut="mean", VarianceOut="variance", SavedMean="saved_mean", SavedVariance="saved_variance", # attrs is_test=True, data_layout=data_layout, use_mkldnn=self.use_mkldnn, fuse_with_relu=self.fuse_with_relu, epsilon=epsilon) batch_norm_op.run(scope, place) # When op is called without Executor then # MKL-DNN Tensor is returned. For NHWC data layout # dims will be in NCHW order as it is MKL-DNN way # of memory descripting. So we need to convert NCHW # dims into NHWC. if data_layout == "NHWC" and self.use_mkldnn == True: # Create executor to have MKL-DNN cache # cleared after NHWC unit test place = core.CPUPlace() exe = fluid.Executor(place) dims = y_tensor.shape() c = dims.pop(1) dims.append(c) y_tensor._set_dims(dims) # check inference result self.__assert_close(y_tensor, y_out, "inference output are different at " + str(place) + ", " + data_layout + ", " + str(np.dtype(dtype)) + str(np.array(y_tensor)) + str(y_out), atol=1e-3)
def test_forward_backward(self): def test_with_place(place, data_layout, shape): # attr epsilon = self.epsilon momentum = self.momentum if data_layout == "NCHW": n, c, h, w = shape[0], shape[1], shape[2], shape[3] else: n, h, w, c = shape[0], shape[1], shape[2], shape[3] scale_shape = [c] np.random.seed(123) x = np.random.random_sample(shape).astype(np.float32) scale = np.random.random_sample(scale_shape).astype(np.float32) bias = np.random.random_sample(scale_shape).astype(np.float32) mean, variance = self.set_mean_variance(scale_shape, x, data_layout) y_grad = np.random.random_sample(shape).astype(np.float32) momentum_var = np.array([momentum]).astype(np.float32) y, mean_out, variance_out, saved_mean, saved_variance, x_grad, scale_grad, bias_grad = self.ref_forward_backward( x, y_grad, scale, bias, mean, variance, epsilon, momentum, shape, data_layout) var_dict = locals() var_dict['y@GRAD'] = y_grad var_dict['x@GRAD'] = x_grad var_dict['scale@GRAD'] = scale_grad var_dict['bias@GRAD'] = bias_grad var_names = [ 'x', 'scale', 'bias', 'mean', 'variance', 'y', 'saved_mean', 'saved_variance', 'momentum_var' ] ground_truth = {name: var_dict[name] for name in var_names} program = fluid.Program() with fluid.program_guard(program): block = program.global_block() for name in ground_truth: block.create_var(name=name, dtype='float32', shape=ground_truth[name].shape) inputs = { "X": block.var('x'), "Scale": block.var('scale'), "Bias": block.var('bias'), "Mean": block.var('mean'), "Variance": block.var('variance') } attrs = { "epsilon": epsilon, "is_test": False, "data_layout": data_layout, "use_mkldnn": self.use_mkldnn, "fuse_with_relu": self.fuse_with_relu, "use_global_stats": self.use_global_stats } if self.use_momentum_variable: inputs['MomentumTensor'] = block.var('momentum_var') else: attrs['momentum'] = momentum outputs = { "Y": block.var('y'), "MeanOut": block.var('mean'), # share memory "VarianceOut": block.var('variance'), # share memory "SavedMean": block.var('saved_mean'), "SavedVariance": block.var('saved_variance') } has_reserve_space = False if data_format == 'NHWC': flag = os.environ.get( 'FLAGS_cudnn_batchnorm_spatial_persistent') if flag is not None and flag.lower() in ['true', '1']: has_reserve_space = True if has_reserve_space: block.create_var(name="reserve_space", dtype='float16') outputs["ReserveSpace"] = block.var('reserve_space') del os.environ['FLAGS_cudnn_batchnorm_spatial_persistent'] bn_op = block.append_op(type="batch_norm", inputs=inputs, outputs=outputs, attrs=attrs) block.create_var(name='y@GRAD', dtype='float32', shape=y.shape) # generate backward op_desc grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc( bn_op.desc, self.no_grad_set, []) grad_op_desc = grad_op_desc_list[0] new_op_desc = block.desc.append_op() new_op_desc.copy_from(grad_op_desc) for var_name in grad_op_desc.output_arg_names(): block.desc.var(var_name.encode("ascii")) grad_op_desc.infer_var_type(block.desc) grad_op_desc.infer_shape(block.desc) for arg in grad_op_desc.output_arg_names(): grad_var = block.desc.find_var(arg.encode("ascii")) grad_var.set_dtype(core.VarDesc.VarType.FP32) program._sync_with_cpp() exe = fluid.Executor(place) out = exe.run(program, feed={ name: var_dict[name] for name in [ 'x', 'scale', 'bias', 'mean', 'variance', 'y@GRAD', 'momentum_var' ] }, fetch_list=self.fetch_list) for id, name in enumerate(self.fetch_list): if name == 'variance': self.__assert_close(var_dict[name], out[id], name, atol=1e-3) continue self.__assert_close(var_dict[name], out[id], name) print("op test forward passed: ", str(place), data_layout) places = [core.CPUPlace()] if core.is_compiled_with_cuda() and core.op_support_gpu("batch_norm"): places.append(core.CUDAPlace(0)) for place in places: for data_format in self.data_formats: test_with_place(place, data_format, [2, 3, 4, 5])
def test_scale_selected_rows_inplace(self): places = [core.CPUPlace()] if core.is_compiled_with_cuda(): places.append(core.CUDAPlace(0)) for place in places: self.check_with_place(place, 'in', 'in')
def test_check_grad(self): self.check_grad_with_place(core.CPUPlace(), ["X"], "Out")
def test_check_output(self): self.check_output_with_place(core.CPUPlace(), no_check_set=['XShape'])
def test_check_grad_ingore_y(self): place = core.CPUPlace() self.check_grad_with_place(place, ['X'], 'Out', max_relative_error=0.5, no_grad_set=set('Y'))
def infer(): args = parse_args() # Inference if args.no_attention: translation_ids, translation_scores, feed_order = \ no_attention_model.seq_to_seq_net( args.embedding_dim, args.encoder_size, args.decoder_size, args.dict_size, args.dict_size, True, beam_size=args.beam_size, max_length=args.max_length) else: translation_ids, translation_scores, feed_order = \ attention_model.seq_to_seq_net( args.embedding_dim, args.encoder_size, args.decoder_size, args.dict_size, args.dict_size, True, beam_size=args.beam_size, max_length=args.max_length) test_batch_generator = paddle.batch(paddle.reader.shuffle( paddle.dataset.wmt14.test(args.dict_size), buf_size=1000), batch_size=args.batch_size, drop_last=False) place = core.CUDAPlace(0) if args.use_gpu else core.CPUPlace() exe = Executor(place) exe.run(framework.default_startup_program()) model_path = os.path.join(args.save_dir, str(args.pass_num)) fluid.io.load_persistables(executor=exe, dirname=model_path, main_program=framework.default_main_program()) src_dict, trg_dict = paddle.dataset.wmt14.get_dict(args.dict_size) feed_list = [ framework.default_main_program().global_block().var(var_name) for var_name in feed_order[0:1] ] feeder = fluid.DataFeeder(feed_list, place) for batch_id, data in enumerate(test_batch_generator()): # The value of batch_size may vary in the last batch batch_size = len(data) # Setup initial ids and scores lod tensor init_ids_data = np.array([0 for _ in range(batch_size)], dtype='int64') init_scores_data = np.array([1. for _ in range(batch_size)], dtype='float32') init_ids_data = init_ids_data.reshape((batch_size, 1)) init_scores_data = init_scores_data.reshape((batch_size, 1)) init_recursive_seq_lens = [1] * batch_size init_recursive_seq_lens = [ init_recursive_seq_lens, init_recursive_seq_lens ] init_ids = fluid.create_lod_tensor(init_ids_data, init_recursive_seq_lens, place) init_scores = fluid.create_lod_tensor(init_scores_data, init_recursive_seq_lens, place) # Feed dict for inference feed_dict = feeder.feed([[x[0]] for x in data]) feed_dict['init_ids'] = init_ids feed_dict['init_scores'] = init_scores fetch_outs = exe.run(framework.default_main_program(), feed=feed_dict, fetch_list=[translation_ids, translation_scores], return_numpy=False) # Split the output words by lod levels lod_level_1 = fetch_outs[0].lod()[1] token_array = np.array(fetch_outs[0]) result = [] for i in six.moves.xrange(len(lod_level_1) - 1): sentence_list = [ trg_dict[token] for token in token_array[lod_level_1[i]:lod_level_1[i + 1]] ] sentence = " ".join(sentence_list[1:-1]) result.append(sentence) lod_level_0 = fetch_outs[0].lod()[0] paragraphs = [ result[lod_level_0[i]:lod_level_0[i + 1]] for i in six.moves.xrange(len(lod_level_0) - 1) ] for paragraph in paragraphs: print(paragraph)
def test_check_output(self): place = core.CPUPlace() self.check_output_with_place(place, atol=1e-3)
def test_check_grad(self): self.check_grad_with_place(core.CPUPlace(), ['X', 'Y'], 'Out')
def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader, batch_acc, args, train_prog, startup_prog, nccl_id_var, num_trainers, trainer_id): feed_var_list = [ var for var in train_prog.global_block().vars.itervalues() if var.is_data ] # generate fake: if args.use_fake_data: for var in feed_var_list: v = startup_prog.global_block().clone_variable(var) var.persistable = True v.persistable = True real_shape = list(var.shape) real_shape[0] = args.batch_size / args.gpus startup_prog.global_block().append_op( outputs={"Out": v}, type="fill_constant", attrs={"shape": real_shape, "value": 1.0, "dtype": var.dtype}) place = core.CPUPlace() if args.device == 'CPU' else core.CUDAPlace(0) if nccl_id_var and trainer_id == 0: #FIXME(wuyi): wait other trainer to start listening time.sleep(30) startup_exe = fluid.Executor(place) startup_exe.run(startup_prog) strategy = fluid.ExecutionStrategy() strategy.num_threads = 1 strategy.allow_op_delay = False exe = fluid.ParallelExecutor( True, avg_loss.name, exec_strategy=strategy, num_trainers=num_trainers, trainer_id=trainer_id) feeder = fluid.DataFeeder(feed_var_list, place) for pass_id in range(args.pass_num): num_samples = 0 iters = 0 start_time = time.time() for batch_id, data in enumerate(train_reader()): if args.profile and pass_id == 0 and batch_id == 5: profiler.start_profiler("All") elif args.profile and pass_id == 0 and batch_id == 10: profiler.stop_profiler("total", "/tmp/profile_%d" % trainer_id) if iters == args.skip_batch_num: start_time = time.time() num_samples = 0 if iters == args.iterations: break if args.use_fake_data: loss, = exe.run([avg_loss.name]) else: loss, = exe.run([avg_loss.name], feed=feeder.feed(data)) if args.update_method == "pserver": exe.bcast_params() num_samples += len(data) iters += 1 if batch_id % 1 == 0: print("Pass %d, batch %d, loss %s" % (pass_id, batch_id, np.array(loss))) train_elapsed = time.time() - start_time examples_per_sec = num_samples / train_elapsed print('\nTotal examples: %d, total time: %.5f, %.5f examples/sed\n' % (num_samples, train_elapsed, examples_per_sec)) if not args.no_test and batch_acc != None: test_acc = test(startup_exe, infer_prog, test_reader, feeder, batch_acc) print("Pass: %d, Test Accuracy: %f\n" % (pass_id, test_acc)) exit(0)
def test_check_grad_ingore_y(self): self.check_grad_with_place(core.CPUPlace(), ['X'], 'Out', set('Y'))
def test_out_scale_acc(self): seed = 1000 lr = 0.1 imperative_out_scale = ImperativeQuantAware() np.random.seed(seed) reader = paddle.batch( paddle.dataset.mnist.test(), batch_size=32, drop_last=True) lenet = ImperativeLenet() fixed_state = {} for name, param in lenet.named_parameters(): p_shape = param.numpy().shape p_value = param.numpy() if name.endswith("bias"): value = np.zeros_like(p_value).astype('float32') else: value = np.random.normal( loc=0.0, scale=0.01, size=np.product(p_shape)).reshape(p_shape).astype('float32') fixed_state[name] = value lenet.set_dict(fixed_state) imperative_out_scale.quantize(lenet) adam = AdamOptimizer( learning_rate=lr, parameter_list=lenet.parameters()) dynamic_loss_rec = [] lenet.train() for batch_id, data in enumerate(reader()): x_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') y_data = np.array( [x[1] for x in data]).astype('int64').reshape(-1, 1) img = fluid.dygraph.to_variable(x_data) label = fluid.dygraph.to_variable(y_data) out = lenet(img) loss = fluid.layers.cross_entropy(out, label) avg_loss = fluid.layers.mean(loss) avg_loss.backward() adam.minimize(avg_loss) lenet.clear_gradients() dynamic_loss_rec.append(avg_loss.numpy()[0]) if batch_id % 100 == 0: _logger.info('{}: {}'.format('loss', avg_loss.numpy())) lenet.eval() path = "./save_dynamic_quant_infer_model/lenet" save_dir = "./save_dynamic_quant_infer_model" imperative_out_scale.save_quantized_model( layer=lenet, path=path, input_spec=[ paddle.static.InputSpec( shape=[None, 1, 28, 28], dtype='float32') ]) paddle.enable_static() if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) else: place = core.CPUPlace() exe = fluid.Executor(place) [inference_program, feed_target_names, fetch_targets] = ( fluid.io.load_inference_model( dirname=save_dir, executor=exe, model_filename="lenet" + INFER_MODEL_SUFFIX, params_filename="lenet" + INFER_PARAMS_SUFFIX)) model_ops = inference_program.global_block().ops conv2d_count, mul_count = 0, 0 for i, op in enumerate(model_ops): if op.type == 'conv2d': if conv2d_count > 0: self.assertTrue( 'fake_quantize_dequantize' in model_ops[i - 1].type) else: self.assertTrue( 'fake_quantize_dequantize' not in model_ops[i - 1].type) conv2d_count += 1 if op.type == 'mul': if mul_count > 0: self.assertTrue( 'fake_quantize_dequantize' in model_ops[i - 1].type) else: self.assertTrue( 'fake_quantize_dequantize' not in model_ops[i - 1].type) mul_count += 1
def setUp(self): if core.is_compiled_with_cuda(): self.place = core.CUDAPlace(0) else: self.place = core.CPUPlace()
def save_quantized_model(self, layer, path, input_spec=None, **config): """ Save the quantized model for the inference. Args: layer (Layer): The Layer to be saved. path (str): The path prefix to save model. The format is ``dirname/file_prefix`` or ``file_prefix``. input_spec (list[InputSpec|Tensor], optional): Describes the input of the saved model's forward method, which can be described by InputSpec or example Tensor. If None, all input variables of the original Layer's forward method would be the inputs of the saved model. Default None. **configs (dict, optional): Other save configuration options for compatibility. We do not recommend using these configurations, they may be removed in the future. If not necessary, DO NOT use them. Default None. The following options are currently supported: (1) output_spec (list[Tensor]): Selects the output targets of the saved model. By default, all return variables of original Layer's forward method are kept as the output of the saved model. If the provided ``output_spec`` list is not all output variables, the saved model will be pruned according to the given ``output_spec`` list. Returns: None """ assert isinstance( layer, dygraph.Layer), "model must be the instance of dygraph.Layer" is_dynamic_mode = False with dygraph.guard(): layer.eval() for handle in self._register_hook_handle_list: handle.remove() for key in self._out_scale_dict: self._out_scale_dict[key] = float( self._out_scale_dict[key].numpy()) paddle.jit.save(layer=layer, path=path, input_spec=input_spec, **config) if paddle.in_dynamic_mode(): is_dynamic_mode = True paddle.enable_static() if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) else: place = core.CPUPlace() exe = Executor(place) file_prefix = os.path.basename(path) dirname = os.path.dirname(path) model_filename = file_prefix + INFER_MODEL_SUFFIX params_filename = file_prefix + INFER_PARAMS_SUFFIX [inference_program, feed_target_names, fetch_targets ] = (load_inference_model(dirname=dirname, executor=exe, model_filename=model_filename, params_filename=params_filename)) # Traverse all ops in the program and find out the op matching # the Layer in the dynamic graph. layer_var_dict = {} for block in inference_program.blocks: for op in block.ops: if op.type in _op_real_in_out_name: output_var_names = quantization_pass._get_op_output_var_names( op) for output_var_name in output_var_names: output_var_tensor = block.var(output_var_name) if output_var_tensor.dtype not in [ core.VarDesc.VarType.FP64, core.VarDesc.VarType.FP32 ]: continue # Because the Layer in dygraph may correspond to multiple ops # in static program after being saved. To ensure correctness, # the outscale collected for output of dygraph Layer can only # be set to the last op in the corresponding ops in static program. # # We can judge the execution order of the ops which corresponding # to dygraph Layer by the name of output. And use dict to save # the corresponding relationship between the dygraph Layer and the # static graph op that needs to set the outscale attribute. if '.' not in output_var_name: continue dynamic_layer_name, var_name_suffix = output_var_name.split( ".") if dynamic_layer_name in layer_var_dict: if layer_var_dict[dynamic_layer_name][ 0] < var_name_suffix: layer_var_dict[dynamic_layer_name] = [ var_name_suffix, op ] else: layer_var_dict[dynamic_layer_name] = [ var_name_suffix, op ] # Because the naming styles of static and dynamic graph are different, # in order to avoid mistakes, we unify the name here. for (layer_name, var_name_op_list) in layer_var_dict.items(): if 'prelu' in layer_name: layer_name = layer_name.replace('prelu', 'p_re_lu') if 'relu' in layer_name: layer_name = layer_name.replace('relu', 're_lu') if layer_name not in self._out_scale_dict: continue var_name_op_list[1]._set_attr('out_threshold', self._out_scale_dict[layer_name]) # Save the processed program. save_inference_model(dirname=dirname, feeded_var_names=feed_target_names, target_vars=fetch_targets, executor=exe, main_program=inference_program.clone(), model_filename=model_filename, params_filename=params_filename) if is_dynamic_mode: paddle.disable_static()
def check_forward_backward(self, shape, begin_norm_axis, has_scale=True, has_bias=True, y_grad_scale=1.0): def test_with_place(place, shape, begin_norm_axis): # attr epsilon = 0.00001 x_shape = shape D = reduce(mul, x_shape[begin_norm_axis:len(x_shape)], 1) scale_shape = [D] np.random.seed(123) x = np.random.random_sample(x_shape).astype(np.float32) scale = np.random.random_sample(scale_shape).astype( np.float32) if has_scale else None bias = np.random.random_sample(scale_shape).astype( np.float32) if has_bias else None y_grad = (np.random.random_sample(x_shape) * y_grad_scale).astype( np.float32) # reference forward & backward y, mean, variance = _reference_layer_norm_naive( x, scale, bias, epsilon, begin_norm_axis) x_grad, scale_grad, bias_grad = _reference_layer_norm_grad( x, y_grad, scale, bias, mean, variance, begin_norm_axis) var_dict = locals() var_dict['y@GRAD'] = y_grad var_names = ['x', 'mean', 'variance', 'y', 'y@GRAD'] if has_scale: var_names += ['scale'] if has_bias: var_names += ['bias'] ground_truth = {name: var_dict[name] for name in var_names} program = fluid.Program() with fluid.program_guard(program): block = program.global_block() for name in ground_truth: block.create_var(name=name, dtype='float32', shape=ground_truth[name].shape) inputs = {"X": block.var('x')} fetch_list = [ 'y', 'mean', 'variance', 'x@GRAD', ] if has_scale: inputs["Scale"] = block.var('scale') fetch_list += ['scale@GRAD'] if has_bias: inputs["Bias"] = block.var('bias') fetch_list += ['bias@GRAD'] layer_norm_op = block.append_op( type="layer_norm", inputs=inputs, outputs={ "Y": block.var('y'), "Mean": block.var('mean'), # share the same memory "Variance": block.var('variance'), # share the same memory }, attrs={ "epsilon": epsilon, "begin_norm_axis": begin_norm_axis }) # generate backward op_desc grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc( layer_norm_op.desc, set(), []) grad_op_desc = grad_op_desc_list[0] new_op_desc = block.desc.append_op() new_op_desc.copy_from(grad_op_desc) for var_name in grad_op_desc.output_arg_names(): block.desc.var(var_name.encode("ascii")) grad_op_desc.infer_var_type(block.desc) grad_op_desc.infer_shape(block.desc) for arg in grad_op_desc.output_arg_names(): grad_var = block.desc.find_var(arg.encode("ascii")) grad_var.set_dtype(core.VarDesc.VarType.FP32) program._sync_with_cpp() exe = fluid.Executor(place) out = exe.run(program, feed={ name: var_dict[name] for name in ['x', 'scale', 'bias', 'y@GRAD'] }, fetch_list=fetch_list) self.__assert_close(y, out[0], "y") self.__assert_close(mean, out[1], "mean") self.__assert_close(variance, out[2], "variance", 1e-3) self.__assert_close(x_grad, out[3], "x_grad") if has_scale: self.__assert_close(scale_grad, out[fetch_list.index('scale@GRAD')], "scale_grad", 1e-3) if has_bias: self.__assert_close(bias_grad, out[fetch_list.index('bias@GRAD')], "bias_grad") places = [core.CPUPlace()] if core.is_compiled_with_cuda() and core.op_support_gpu( "layer_norm") and self.use_cudnn: places.append(core.CUDAPlace(0)) for place in places: test_with_place(place, shape, begin_norm_axis)
def test_case(self): x = fluid.data(name="x", shape=[1, 3, 64], dtype="float32") dim = fluid.data(name="dim", shape=[1], dtype="int32") shape_tensor = fluid.data(name="shape_tensor", shape=[1], dtype="int32") actual_size = fluid.data(name="actual_size", shape=[1], dtype="int32") scale_tensor = fluid.data(name="scale_tensor", shape=[1], dtype="float32") out1 = fluid.layers.resize_linear(x, out_shape=[ 128, ], align_mode=1, align_corners=False) out2 = fluid.layers.resize_linear(x, out_shape=[128], align_mode=1, align_corners=False) out3 = fluid.layers.resize_linear(x, out_shape=shape_tensor, align_mode=1, align_corners=False) out4 = fluid.layers.resize_linear(x, out_shape=[ 128, ], actual_shape=actual_size, align_mode=1, align_corners=False) out5 = fluid.layers.resize_linear(x, scale=scale_tensor, align_mode=1, align_corners=False) out6 = interpolate(x, scale_factor=scale_tensor, mode='linear', align_mode=1, align_corners=False, data_format='NCW') out7 = interpolate(x, size=[ 128, ], mode='linear', align_mode=1, align_corners=False, data_format='NCW') out8 = interpolate(x, size=shape_tensor, mode='linear', align_mode=1, align_corners=False, data_format='NCW') x_data = np.random.random((1, 3, 64)).astype("float32") dim_data = np.array([128]).astype("int32") shape_data = np.array([ 128, ]).astype("int32") actual_size_data = np.array([ 128, ]).astype("int32") scale_data = np.array([2.0]).astype("float32") if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) else: place = core.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) results = exe.run( fluid.default_main_program(), feed={ "x": x_data, "dim": dim_data, "shape_tensor": shape_data, "actual_size": actual_size_data, "scale_tensor": scale_data }, fetch_list=[out1, out2, out3, out4, out5, out6, out7, out8], return_numpy=True) expect_res = linear_interp_np(x_data, out_w=128, align_mode=1, align_corners=False) for res in results: self.assertTrue(np.allclose(res, expect_res))
def test_check_output(self): self.check_output_with_place(core.CPUPlace())
def check_forward_backward(self): def test_with_place(place): out_grad = np.random.random_sample(self.x.shape).astype(np.float32) x_grad = out_grad sum_axis = list(range(0, len(self.x.shape))) del sum_axis[self.axis] y_grad = np.sum(out_grad, axis=tuple(sum_axis)) var_dict = locals() var_dict['y'] = self.y var_dict['x'] = self.x var_dict['out'] = self.out var_dict['y@GRAD'] = y_grad var_dict['x@GRAD'] = x_grad var_dict['out@GRAD'] = out_grad var_names = ['x', 'y', 'out', 'y@GRAD', 'x@GRAD', 'out@GRAD'] ground_truth = {name: var_dict[name] for name in var_names} program = fluid.Program() with fluid.program_guard(program): block = program.global_block() for name in ground_truth: block.create_var(name=name, dtype='float32', shape=ground_truth[name].shape) elementwise_add_op = block.append_op(type="elementwise_add", inputs={ "X": block.var('x'), "Y": block.var('y'), }, outputs={ "Out": block.var('out'), }, attrs={ "axis": self.axis, }) # generate backward op_desc grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc( elementwise_add_op.desc, set(), []) grad_op_desc = grad_op_desc_list[0] new_op_desc = block.desc.append_op() new_op_desc.copy_from(grad_op_desc) for var_name in grad_op_desc.output_arg_names(): block.desc.var(var_name.encode("ascii")) grad_op_desc.infer_var_type(block.desc) grad_op_desc.infer_shape(block.desc) for arg in grad_op_desc.output_arg_names(): grad_var = block.desc.find_var(arg.encode("ascii")) grad_var.set_dtype(core.VarDesc.VarType.FP32) exe = fluid.Executor(place) out = exe.run(program, feed={ name: var_dict[name] for name in ['x', 'y', 'out@GRAD'] }, fetch_list=['x@GRAD', 'y@GRAD']) self.__assert_close(x_grad, out[0], "x@GRAD") self.__assert_close(y_grad, out[1], "y@GRAD", atol=1.4) places = [core.CPUPlace()] if core.is_compiled_with_cuda() and core.op_support_gpu( "elementwise_add"): places.append(core.CUDAPlace(0)) for place in places: test_with_place(place)
def test_check_grad_normal(self): place = core.CPUPlace() self.check_grad_with_place(place, ['X', 'Y'], 'Out')
def test_check_output(self): # TODO(wangzhongpu): support mkldnn op in dygraph mode self.check_output_with_place(core.CPUPlace(), atol=0, check_dygraph=False)
def test_forward_backward(self): def test_with_place(place, data_layout, shape): # attr epsilon = 0.00001 momentum = 0.9 if data_layout == "NCHW": n, c, h, w = shape[0], shape[1], shape[2], shape[3] else: n, h, w, c = shape[0], shape[1], shape[2], shape[3] scale_shape = [c] np.random.seed(123) x = np.random.random_sample(shape).astype(np.float32) scale = np.random.random_sample(scale_shape).astype(np.float32) bias = np.random.random_sample(scale_shape).astype(np.float32) mean = np.zeros(scale_shape).astype(np.float32) variance = np.ones(scale_shape).astype(np.float32) y_grad = np.random.random_sample(shape).astype(np.float32) y, mean_out, variance_out, saved_mean, saved_variance, x_grad, scale_grad, bias_grad = self.ref_forward_backward( x, y_grad, scale, bias, mean, variance, epsilon, momentum, shape, data_layout) var_dict = locals() var_dict['y@GRAD'] = y_grad var_names = [ 'x', 'scale', 'bias', 'mean', 'variance', 'y', 'saved_mean', 'saved_variance' ] ground_truth = {name: var_dict[name] for name in var_names} program = fluid.Program() with fluid.program_guard(program): block = program.global_block() for name in ground_truth: block.create_var(name=name, dtype='float32', shape=ground_truth[name].shape) bn_op = block.append_op( type="batch_norm", inputs={ "X": block.var('x'), "Scale": block.var('scale'), "Bias": block.var('bias'), "Mean": block.var('mean'), "Variance": block.var('variance') }, outputs={ "Y": block.var('y'), "MeanOut": block.var('mean'), # share the same memory "VarianceOut": block.var('variance'), # share the same memory "SavedMean": block.var('saved_mean'), "SavedVariance": block.var('saved_variance') }, attrs={ "momentum": momentum, "epsilon": epsilon, "is_test": False, "data_layout": data_layout, "use_mkldnn": self.use_mkldnn }) block.create_var(name='y@GRAD', dtype='float32', shape=y.shape) # generate backward op_desc grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc( bn_op.desc, set(), []) grad_op_desc = grad_op_desc_list[0] new_op_desc = block.desc.append_op() new_op_desc.copy_from(grad_op_desc) for var_name in grad_op_desc.output_arg_names(): block.desc.var(var_name.encode("ascii")) grad_op_desc.infer_var_type(block.desc) grad_op_desc.infer_shape(block.desc) for arg in grad_op_desc.output_arg_names(): grad_var = block.desc.find_var(arg.encode("ascii")) grad_var.set_dtype(core.VarDesc.VarType.FP32) exe = fluid.Executor(place) out = exe.run( program, feed={ name: var_dict[name] for name in ['x', 'scale', 'bias', 'mean', 'variance', 'y@GRAD'] }, fetch_list=[ 'y', 'mean', 'variance', 'saved_mean', 'saved_variance', 'x@GRAD', 'scale@GRAD', 'bias@GRAD' ]) self.__assert_close(y, out[0], "y") self.__assert_close(mean_out, out[1], "mean") self.__assert_close(variance_out, out[2], "variance", 1e-3) self.__assert_close(saved_mean, out[3], "saved_mean") self.__assert_close(saved_variance, out[4], "saved_variance", 1e-3) self.__assert_close(x_grad, out[5], "x_grad") self.__assert_close(scale_grad, out[6], "scale_grad") self.__assert_close(bias_grad, out[7], "bias_grad") print "op test forward passed: ", str(place), data_layout places = [core.CPUPlace()] if core.is_compiled_with_cuda() and core.op_support_gpu("batch_norm"): places.append(core.CUDAPlace(0)) for place in places: for data_format in self.data_formats: test_with_place(place, data_format, [2, 3, 4, 5])