def test_assign_static1(): """ default,input=tensor,type=int,bool """ paddle.enable_static() for t in types1: np.random.seed(seed) main_program = fluid.Program() startup_program = fluid.Program() input = np.arange(6).reshape([6]).astype(t) feed = {"input": input} with fluid.unique_name.guard(): with fluid.program_guard(main_program=main_program, startup_program=startup_program): input1 = paddle.static.data(name="input", shape=input.shape, dtype=t) input1.stop_gradient = False output = paddle.fluid.layers.assign(input1) loss = paddle.mean(output) g = fluid.gradients(loss, input1) exe = fluid.Executor() exe.run(startup_program) out, g = exe.run(main_program, feed=feed, fetch_list=[output, g]) compare(out, input)
def gradient_penalty(self, f, real, fake, cfg=None, name=None): def _interpolate(a, b): shape = [a.shape[0]] alpha = fluid.layers.uniform_random_batch_size_like( input=a, shape=shape, min=0.0, max=1.0) a.stop_gradient = True b.stop_gradient = True inner1 = fluid.layers.elementwise_mul(a, alpha, axis=0) inner2 = fluid.layers.elementwise_mul(b, (1.0 - alpha), axis=0) inner1.stop_gradient = True inner2.stop_gradient = True inner = inner1 + inner2 return inner x = _interpolate(real, fake) pred, _ = f(x, cfg, name=name) if isinstance(pred, tuple): pred = pred[0] vars = [] for var in fluid.default_main_program().list_vars(): if fluid.io.is_parameter(var) and var.name.startswith('d_'): vars.append(var.name) grad = fluid.gradients(pred, x, no_grad_set=vars)[0] grad_shape = grad.shape grad = fluid.layers.reshape( grad, [-1, grad_shape[1] * grad_shape[2] * grad_shape[3]]) epsilon = 1e-16 norm = fluid.layers.sqrt( fluid.layers.reduce_sum( fluid.layers.square(grad), dim=1) + epsilon) gp = fluid.layers.reduce_mean(fluid.layers.square(norm - 1.0)) return gp
def test_softmax_with_cross_entropy_static1(): """ default """ paddle.enable_static() for place in places: for t in types: x = np.arange(8).reshape(8).astype(t) label = np.array([[1]], dtype='int64') feed = {'x': x, 'label': label} main_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(main_program=main_program, startup_program=startup_program): logits1 = paddle.static.data(name="x", shape=x.shape, dtype=t) label1 = paddle.static.data(name="label", shape=label.shape, dtype='int64') logits1.stop_gradient = False output = fluid.layers.softmax_with_cross_entropy( logits=logits1, label=label1) loss = paddle.mean(output) g = fluid.gradients(loss, logits1) exe = fluid.Executor(place) exe.run(startup_program) out, g = exe.run(main_program, feed=feed, fetch_list=[output, g]) assert np.allclose(out, [[6.45833969]], atol=0.005, rtol=0.05, equal_nan=True)
def _paddle_prepare(self, predict_fn=None): if predict_fn is None: startup_prog = fluid.Program() main_program = fluid.Program() with fluid.program_guard(main_program, startup_prog): with fluid.unique_name.guard(): data_op = fluid.data(name='data', shape=[None], dtype='int64', lod_level=1) label_op = fluid.data(name='label', shape=[None, 1], dtype='int64') alpha_op = fluid.layers.data(name='alpha', shape=[None, 1], dtype='double') emb, probs = self.paddle_model(data_op, alpha_op, self.noise_amount) for op in main_program.global_block().ops: if op.type == 'batch_norm': op._set_attr('use_global_stats', True) elif op.type == 'dropout': op._set_attr('dropout_prob', 0.0) class_num = probs.shape[-1] one_hot = fluid.layers.one_hot(label_op, class_num) one_hot = fluid.layers.elementwise_mul(probs, one_hot) target_category_loss = fluid.layers.reduce_sum(one_hot, dim=1) p_g_list = fluid.backward.append_backward( target_category_loss) gradients_map = fluid.gradients(one_hot, emb)[0] if self.use_cuda: gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) place = fluid.CUDAPlace(gpu_id) else: place = fluid.CPUPlace() exe = fluid.Executor(place) fluid.io.load_persistables(exe, self.trained_model_path, main_program) def predict_fn(data, label, alpha): gradients, out, embedding = exe.run( main_program, feed={ 'data': data, 'label': label, 'alpha': alpha }, fetch_list=[gradients_map, probs, emb], return_numpy=False) return gradients, out, embedding self.predict_fn = predict_fn self.paddle_prepared = True
def _paddle_prepare(self, predict_fn=None): if predict_fn is None: import paddle.fluid as fluid startup_prog = fluid.Program() main_program = fluid.Program() with fluid.program_guard(main_program, startup_prog): with fluid.unique_name.guard(): data_op = fluid.data(name='data', shape=[None] + self.model_input_shape, dtype=self.data_type) label_op = fluid.data(name='label', shape=[None, 1], dtype='int64') x_noise = fluid.data(name='noise', shape=[None] + self.model_input_shape, dtype='float32') x_plus_noise = data_op + x_noise probs = self.paddle_model(x_plus_noise) for op in main_program.global_block().ops: if op.type == 'batch_norm': op._set_attr('use_global_stats', True) elif op.type == 'dropout': op._set_attr('dropout_prob', 0.0) class_num = probs.shape[-1] one_hot = fluid.layers.one_hot(label_op, class_num) one_hot = fluid.layers.elementwise_mul(probs, one_hot) target_category_loss = fluid.layers.reduce_sum(one_hot, dim=1) p_g_list = fluid.backward.append_backward( target_category_loss) gradients_map = fluid.gradients(one_hot, x_plus_noise)[0] if self.use_cuda: gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) place = fluid.CUDAPlace(gpu_id) else: place = fluid.CPUPlace() exe = fluid.Executor(place) fluid.io.load_persistables(exe, self.trained_model_path, main_program) def predict_fn(data, labels, noise=0.0): if isinstance(noise, (float, int)): noise = np.ones_like(data) * noise gradients, out = exe.run(main_program, feed={ 'data': data, 'label': labels, 'noise': noise }, fetch_list=[gradients_map, probs]) return gradients, out self.predict_fn = predict_fn self.paddle_prepared = True
def test_error(self): x = fluid.data(name='x', shape=[None, 2, 8, 8], dtype='float32') x.stop_gradient = False conv = fluid.layers.conv2d(x, 4, 1, bias_attr=False) y = fluid.layers.relu(conv) with self.assertRaises(TypeError): x_grad = fluid.gradients(y.name, x) with self.assertRaises(TypeError): x_grad = fluid.gradients(y, x.name) with self.assertRaises(TypeError): x_grad = fluid.gradients([y], [x], target_gradients=x.name) with self.assertRaises(TypeError): x_grad = fluid.gradients([y], x, no_grad_set=conv)
def get_static_triple_grad(x, y, x_init=None, dy_init=None, place=None, program=None): """ Get Triple Grad result of static graph. Args: x (Variable|list[Variable]): input variables to the program. y (Variable|list[Variable]): output variables to the program. x_init (numpy.array|list[numpy.array]|None): the init value for input x. dy_init (numpy.array|list[numpy.array]|None): the init value for output y. place (fluid.CPUPlace or fluid.CUDAPlace): the device. program (Program|None): a Program with forward pass. If None, use fluid.default_main_program(). Returns: A list of numpy array that stores third derivative result calulated by static graph. """ if program is None: program = fluid.default_main_program() scope = fluid.executor.global_scope() y_grads = [] for i in six.moves.xrange(len(y)): yi = y[i] dyi_name = _append_grad_suffix_(yi.name) np_type = dtype_to_np_dtype(yi.dtype) dy = program.global_block().create_var(name=dyi_name, shape=yi.shape, dtype=np_type, persistable=True) dy.stop_gradient = False set_var_in_scope(scope, place, dyi_name, dy_init[i]) y_grads.append(dy) # append first order grads dx = fluid.gradients(y, x, y_grads) # y_grads are the input of first-order backward, # so, they are also the input of second-order backward. x += y_grads x_init += dy_init y = dx x_grads_grads_init = [] for dxi in dx: np_type = dtype_to_np_dtype(dxi.dtype) value = np.ones(dxi.shape, dtype=np_type) x_grads_grads_init.append(value) return get_static_double_grad(x, y, x_init, dy_init=x_grads_grads_init, place=place, program=program)
def test2(self): main = fluid.Program() startup = fluid.Program() with fluid.program_guard(main, startup): x = fluid.layers.create_parameter( name='x', shape=[1], dtype='float32', default_initializer=fluid.initializer.Constant(1)) y = x * x dx1, = fluid.gradients(y, x) z = dx1 * dx1 + y * y dx2, = fluid.gradients(z, x) place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup) out, = exe.run(main, fetch_list=[dx2]) self.assertEqual(12, out[0])
def calc_gradients(outputs, inputs, no_grad_set): if fluid.in_dygraph_mode(): return fluid.dygraph.grad(outputs=outputs, inputs=inputs, no_grad_vars=no_grad_set, create_graph=True) else: return fluid.gradients(targets=outputs, inputs=inputs, no_grad_set=no_grad_set)
def gradient_penalty(self, f, real, fake=None, cfg=None, name=None): def _interpolate(a, b=None): a_shape = fluid.layers.shape(a) if b is None: if cfg.enable_ce: beta = fluid.layers.uniform_random(shape=a_shape, min=0.0, max=1.0, seed=1) else: beta = fluid.layers.uniform_random(shape=a_shape, min=0.0, max=1.0) mean = fluid.layers.reduce_mean(a, dim=list(range(len(a.shape)))) input_sub_mean = fluid.layers.elementwise_sub(a, mean, axis=0) var = fluid.layers.reduce_mean( fluid.layers.square(input_sub_mean), dim=list(range(len(a.shape)))) b = beta * fluid.layers.sqrt(var) * 0.5 + a shape = [a.shape[0]] if cfg.enable_ce: alpha = fluid.layers.uniform_random(shape=a_shape[0], min=0.0, max=1.0, seed=1) else: alpha = fluid.layers.uniform_random(shape=a_shape[0], min=0.0, max=1.0) inner = fluid.layers.elementwise_mul((b - a), alpha, axis=0) + a return inner x = _interpolate(real, fake) pred, _ = f(x, cfg=cfg, name=name) if isinstance(pred, tuple): pred = pred[0] vars = [] for var in fluid.default_main_program().list_vars(): if fluid.io.is_parameter(var) and var.name.startswith( "discriminator"): vars.append(var.name) grad = fluid.gradients(pred, x, no_grad_set=vars)[0] grad_shape = grad.shape grad = fluid.layers.reshape( grad, [-1, grad_shape[1] * grad_shape[2] * grad_shape[3]]) epsilon = 1e-16 norm = fluid.layers.sqrt( fluid.layers.reduce_sum(fluid.layers.square(grad), dim=1) + epsilon) gp = fluid.layers.reduce_mean(fluid.layers.square(norm - 1.0)) return gp
def test1(self): main = fluid.Program() startup = fluid.Program() with fluid.program_guard(main, startup): net = lambda x: x * x x = fluid.layers.create_parameter( name='x', shape=[1], dtype='float32', default_initializer=fluid.initializer.Constant(3)) grad1, = fluid.gradients(net(x), x) # 2x = 6 z = net(x - grad1) grad2, = fluid.gradients(z, x) # gradients( (x - 2x)^2) = 2x = 6 place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup) out = exe.run(main, fetch_list=[grad1.name, grad2.name]) self.assertEqual(6, out[0][0]) self.assertEqual(6, out[1][0])
def test_prune(self): x = fluid.data(name='x', shape=[3], dtype='float32') x.stop_gradient = False x1, x2, x3 = fluid.layers.split(x, dim=0, num_or_sections=3) y = x1 * 2 x1_grad = fluid.gradients(y, x) exe = fluid.Executor(fluid.CPUPlace()) main = fluid.default_main_program() exe.run(fluid.default_startup_program()) out = exe.run(main, feed={'x': np.ones([3]).astype('float32')}, fetch_list=[x1_grad]) self.assertTrue(np.array_equal(out[0], [2., 0., 0.]))
def _construct_graph(self): train_program = fluid.Program() start_program = fluid.Program() with fluid.program_guard(train_program, start_program): self.x_ph = TensorList([ fluid.layers.data('x_{}'.format(idx), v.shape, append_batch_size=False, stop_gradient=False) for idx, v in enumerate(self.x) ]) # problem forward self.f0 = self.problem(self.x_ph) self.loss = self.problem.ip_output(self.f0, self.f0) # problem backward self.grad = TensorList(fluid.gradients(self.loss, self.x_ph)) place = fluid.CUDAPlace(0) self.exe = fluid.Executor(place) self.exe.run(program=fluid.default_startup_program()) self.compiled_prog = fluid.compiler.CompiledProgram(train_program)
from paddle import fluid import paddle.fluid.transpiler.details.program_utils as pu # net = lambda x : x * x x = fluid.layers.create_parameter(shape=[1], dtype='float32', default_initializer=fluid.initializer.Constant(2)) y = fluid.layers.elementwise_mul(x, x) grad1 = fluid.gradients(y, x)[0] # 2x = 4 # pu.program_to_code(fluid.default_main_program(), skip_op_callstack=True) z = fluid.layers.elementwise_sub(x, grad1) y2 = fluid.layers.elementwise_mul(z, z) grad2 = fluid.gradients(y2, x)[0] # gradients( (x - 2x)^2) = 2x = 4 pu.program_to_code(fluid.default_main_program(), skip_op_callstack=True) place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) out_np = exe.run(fluid.default_main_program(), fetch_list=[grad1, grad2]) print(out_np)
def _static_forward(self, res, data=None, **kwargs): """ _static_forward """ if self.__layertype == "func": paddle.seed(self.seed) main_program = fluid.Program() startup_program = fluid.Program() params = copy.deepcopy(kwargs) with fluid.unique_name.guard(): with fluid.program_guard(main_program=main_program, startup_program=startup_program): # PS:没有单列出一个函数做值传递,因为self.kwargs只有一个,就没单列出来 xyz = [] for k, v in kwargs.items(): if isinstance(v, (np.generic, np.ndarray)): # no_grad_Var不需要转换类型 if self.no_grad_var is not None and k in self.no_grad_var: kwargs[k] = v else: kwargs[k] = v.astype(self.dtype) for k, v in params.items(): if isinstance(v, (np.generic, np.ndarray)): # no_grad_Var不需要转换类型 if self.no_grad_var is not None and k in self.no_grad_var: params[k] = fluid.data(name=k, shape=v.shape, dtype=v.dtype) else: params[k] = fluid.data(name=k, shape=v.shape, dtype=self.dtype) xyz.append(k) # enable compute gradient params[k].stop_gradient = False output = self.func(**params) if self.enable_backward: loss = paddle.mean(output) grad_var = {} for k in xyz: grad_var[k] = fluid.gradients(loss, params[k]) exe = fluid.Executor(self.place) exe.run(startup_program) # print(list(grad_var.values())) # print([output] + list(grad_var.values())) res = exe.run(main_program, feed=kwargs, fetch_list=[output] + list(grad_var.values()), return_numpy=True) # combine grad grad = dict(zip(xyz, res[1:])) return res[0], grad else: exe = fluid.Executor(self.place) exe.run(startup_program) # print(list(grad_var.values())) # print([output] + list(grad_var.values())) res = exe.run(main_program, feed=kwargs, fetch_list=[output], return_numpy=True) return res[0] elif self.__layertype == "class": main_program = fluid.Program() startup_program = fluid.Program() main_program.random_seed = self.seed startup_program.random_seed = self.seed params = copy.deepcopy(kwargs) with fluid.unique_name.guard(): with fluid.program_guard(main_program=main_program, startup_program=startup_program): # PS:没有单列出一个函数做值传递,因为self.kwargs只有一个,就没单列出来 for k, v in kwargs.items(): if isinstance(v, (np.generic, np.ndarray)): # no_grad_Var不需要转换类型 if self.no_grad_var is not None and k in self.no_grad_var: kwargs[k] = v else: kwargs[k] = v.astype(self.dtype) for k, v in params.items(): if isinstance(v, (np.generic, np.ndarray)): # no_grad_Var不需要转换类型 if self.no_grad_var is not None and k in self.no_grad_var: params[k] = fluid.data(name=k, shape=v.shape, dtype=v.dtype) else: params[k] = fluid.data(name=k, shape=v.shape, dtype=self.dtype) # enable compute gradient params[k].stop_gradient = False if data is not None: data = data.astype(self.dtype) self.data = fluid.data(name="data", shape=data.shape, dtype=self.dtype) self.data.stop_gradient = False data = dict({"data": data}, **kwargs) obj = self.func(**params) output = obj(self.data) if self.enable_backward: loss = paddle.mean(output) g = fluid.gradients(loss, self.data) exe = fluid.Executor(self.place) exe.run(startup_program) res = exe.run(main_program, feed=data, fetch_list=[output, g], return_numpy=True) grad = {"data": res[1]} return res[0], grad else: exe = fluid.Executor(self.place) exe.run(startup_program) res = exe.run(main_program, feed=data, fetch_list=[output], return_numpy=True) return res[0]
def compute_unrolled_step(image_train, label_train, image_val, label_val, data_prog, startup_prog, lr, args): fetch = [] unrolled_model_prog = data_prog.clone() with fluid.program_guard(unrolled_model_prog, startup_prog): # construct model graph train_logits, train_loss = model(image_train, label_train, args.init_channels, args.class_num, args.layers, name="model") # construct unrolled model graph logits, unrolled_train_loss = model(image_train, label_train, args.init_channels, args.class_num, args.layers, name="unrolled_model") all_params = unrolled_model_prog.global_block().all_parameters() model_var = utility.get_parameters(all_params, 'model')[1] unrolled_model_var = utility.get_parameters(all_params, 'unrolled_model')[1] # copy model_var to unrolled_model_var for m_var, um_var in zip(model_var, unrolled_model_var): fluid.layers.assign(m_var, um_var) unrolled_optimizer = fluid.optimizer.MomentumOptimizer( lr, args.momentum, regularization=fluid.regularizer.L2DecayRegularizer( args.weight_decay)) unrolled_optimizer.minimize( unrolled_train_loss, parameter_list=[v.name for v in unrolled_model_var]) fetch.append(unrolled_train_loss) logger.info("get unrolled_model") arch_optim_prog = data_prog.clone() with fluid.program_guard(arch_optim_prog, startup_prog): train_logits, train_loss = model(image_train, label_train, args.init_channels, args.class_num, args.layers, name="model") logits, unrolled_valid_loss = model(image_val, label_val, args.init_channels, args.class_num, args.layers, name="unrolled_model") all_params = arch_optim_prog.global_block().all_parameters() model_var = utility.get_parameters(all_params, 'model')[1] unrolled_model_var = utility.get_parameters(all_params, 'unrolled_model')[1] arch_var = utility.get_parameters(all_params, 'arch')[1] # get grad of unrolled_valid_loss valid_grads = fluid.gradients(unrolled_valid_loss, unrolled_model_var) eps = 1e-2 * fluid.layers.rsqrt( fluid.layers.sums([ fluid.layers.reduce_sum(fluid.layers.square(valid_grad)) for valid_grad in valid_grads ])) model_params_grads = list(zip(model_var, valid_grads)) for param, grad in model_params_grads: param = fluid.layers.elementwise_add( param, fluid.layers.elementwise_mul(grad, eps)) logger.info("get w+") logits, train_loss = model(image_train, label_train, args.init_channels, args.class_num, args.layers, name="model") train_grads_pos = fluid.gradients(train_loss, arch_var) grads_names = [v.name for v in train_grads_pos] for name in grads_names: arch_optim_prog.global_block()._rename_var(name, name + '_pos') logger.info("get train_gards_pos") # w- = w - eps*dw`""" for param, grad in model_params_grads: param = fluid.layers.elementwise_add( param, fluid.layers.elementwise_mul(grad, eps * -2)) logger.info("get w-") logits, train_loss = model(image_train, label_train, args.init_channels, args.class_num, args.layers, name="model") train_grads_neg = fluid.gradients(train_loss, arch_var) for name in grads_names: arch_optim_prog.global_block()._rename_var(name, name + '_neg') logger.info("get train_gards_neg") # recover w for param, grad in model_params_grads: param = fluid.layers.elementwise_add( param, fluid.layers.elementwise_mul(grad, eps)) logger.info("get w") leader_opt = fluid.optimizer.Adam( args.arch_learning_rate, 0.5, 0.999, regularization=fluid.regularizer.L2DecayRegularizer( args.arch_weight_decay)) arch_params_grads = leader_opt.backward( unrolled_valid_loss, parameter_list=[v.name for v in arch_var]) grads_p = [ arch_optim_prog.global_block().var(name + '_pos') for name in grads_names ] grads_n = [ arch_optim_prog.global_block().var(name + '_neg') for name in grads_names ] for i, (var, grad) in enumerate(arch_params_grads): arch_params_grads[i] = (var, grad - ((grads_p[i] - grads_n[i]) / (eps * 2)) * lr) leader_opt.apply_gradients(arch_params_grads) logger.info("update alpha") fetch.append(unrolled_valid_loss) arch_progs_list = [unrolled_model_prog, arch_optim_prog] return arch_progs_list, fetch
def get_static_double_grad(x, y, x_init=None, dy_init=None, place=None, program=None): """ Get Double Grad result of static graph. Args: x (Variable|list[Variable]): input variables to the program. y (Variable|list[Variable]): output variables to the program. x_init (numpy.array|list[numpy.array]|None): the init value for input x. dy_init (numpy.array|list[numpy.array]|None): the init value for output y. place (fluid.CPUPlace or fluid.CUDAPlace): the device. program (Program|None): a Program with forward pass. If None, use fluid.default_main_program(). Returns: A list of numpy array that stores second derivative result calulated by static graph. """ if program is None: program = fluid.default_main_program() scope = fluid.executor.global_scope() y_grads = [] for i in six.moves.xrange(len(y)): yi = y[i] dyi_name = _append_grad_suffix_(yi.name) np_type = dtype_to_np_dtype(yi.dtype) dy = program.global_block().create_var(name=dyi_name, shape=yi.shape, dtype=np_type, persistable=True) dy.stop_gradient = False set_var_in_scope(scope, place, dyi_name, dy_init[i]) y_grads.append(dy) # append first order grads dx = fluid.gradients(y, x, y_grads) # y_grads are the input of first-order backward, # so, they are also the input of second-order backward. x += y_grads x_init += dy_init # filter None in dx for DX/DY may be None in kernel filted_dx = [dxi for dxi in dx if dxi is not None] y = filted_dx # check input arguments x = _as_list(x) y = _as_list(y) for v in x: v.stop_gradient = False v.persistable = True if place is None: place = fluid.CPUPlace() if program is None: program = fluid.default_main_program() # init variable in strtup program scope = fluid.executor.global_scope() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) x_init = _as_list(x_init) # init inputs if x_init is not None if x_init: if len(x_init) != len(x): raise ValueError('len(x_init) (=%d) is not the same' ' as len(x) (= %d)' % (len(x_init), len(x))) # init variable in main program for var, arr in zip(x, x_init): assert var.shape == arr.shape feeds = {k.name: v for k, v in zip(x, x_init)} exe.run(program, feed=feeds, scope=scope) dys = [] for yi in y: np_type = dtype_to_np_dtype(yi.dtype) dy_name = _append_grad_suffix_(yi.name) # create dy Variable in Program dy = program.global_block().create_var(name=dy_name, shape=yi.shape, dtype=np_type, persistable=True) # init dy tensor in scope value = np.ones(yi.shape, dtype=np_type) dy_t = set_var_in_scope(scope, place, dy_name, value) dys.append(dy) # append second order backward ddx = fluid.gradients(y, x, dys) exe = fluid.Executor(place) # filter None in dx for DX/DY may be None in kernel # only fetch not None dx in exe.run filted = [(i, dxi) for i, dxi in enumerate(ddx) if dxi is not None] filted_idx, filted_ddx = zip(*filted) ddx_res = exe.run(program, scope=scope, fetch_list=filted_ddx) return ddx_res
def triple_grad_check(x, y, x_init=None, y_grads=None, x_grads_grads=None, place=None, program=None, eps=1e-6, atol=1e-5, rtol=1e-3, raise_exception=True): """ Check triple gradients. This function will append backward to the program before third order gradient check. Args: x (Variable|list[Variable]): input variables to the program. y (Variable|list[Variable]): output variables to the program. x_init (numpy.array|list[numpy.array]|None): the init value for input x. y_grads (numpy.array|list[numpy.array]|None): the gradients with respect to y. x_grads_grads (numpy.array|list[numpy.array]|None): the gradients with respect to your input. place (fluid.CPUPlace or fluid.CUDAPlace): the device. program (Program|None): a Program with forward pass. If None, use fluid.default_main_program(). eps (float): perturbation for finite differences. atol (float): absolute tolerance. rtol (float): relative tolerance. raise_exception (bool): whether to raise an exception if the check fails. Default is True. Returns: True if all differences satisfy numpy.allclose condition. """ # check input arguments x = _as_list(x) for v in x: v.stop_gradient = False v.persistable = True y = _as_list(y) if program is None: program = fluid.default_main_program() if y_grads is None: scope = fluid.executor.global_scope() y_grads = [] y_grads_init = [] for yi in y: dyi_name = _append_grad_suffix_(yi.name) np_type = dtype_to_np_dtype(yi.dtype) dy = program.global_block().create_var(name=dyi_name, shape=yi.shape, dtype=np_type, persistable=True) dy.stop_gradient = False v = np.random.random(size=yi.shape).astype(np_type) set_var_in_scope(scope, place, dyi_name, v) y_grads.append(dy) y_grads_init.append(v) else: y_grads = _as_list(y_grads) y_grads_init = [ var_to_np_array_in_scope(scope, place, v.name) for v in y_grads ] # append first order grads target_grads = fluid.gradients(y, x, y_grads) if x_grads_grads is None: scope = fluid.executor.global_scope() x_grads_grads = [] x_grads_grads_init = [] for dxi in target_grads: ddxi_name = _append_grad_suffix_(dxi.name) np_type = dtype_to_np_dtype(dxi.dtype) ddx = program.global_block().create_var(name=ddxi_name, shape=dxi.shape, dtype=np_type, persistable=True) ddx.stop_gradient = False v = np.random.random(size=dxi.shape).astype(np_type) set_var_in_scope(scope, place, ddxi_name, v) x_grads_grads.append(ddx) x_grads_grads_init.append(v) else: x_grads_grads = _as_list(x_grads_grads) x_grads_grads_init = [ var_to_np_array_in_scope(scope, place, v.name) for v in x_grads_grads ] x += y_grads x_init = _as_list(x_init) x_init += y_grads_init # append second order grads target_grads_grads = fluid.gradients(target_grads, x, x_grads_grads) # filter None in target_grads_grads for Dy/Dx may be None in kernel filted = [(i, dyi) for i, dyi in enumerate(target_grads_grads) if dyi is not None] filted_idx, filted_target_grads_grads = zip(*filted) x += x_grads_grads x_init += x_grads_grads_init # x <=> [x, dout, ddx] grad_check(x=x, y=filted_target_grads_grads, x_init=x_init, place=place, program=program, eps=eps, atol=atol, rtol=rtol)
def double_grad_check(x, y, x_init=None, y_grads=None, place=None, program=None, eps=1e-6, atol=1e-5, rtol=1e-3, raise_exception=True): """ Check gradients of gradients. This function will append backward to the program before second order gradient check. Args: x (Variable|list[Variable]): input variables to the program. y (Variable|list[Variable]): output variables to the program. x_init (numpy.array|list[numpy.array]|None): the init value for input x. y_grads (numpy.array|list[numpy.array]|None): the gradients with respect to y. place (fluid.CPUPlace or fluid.CUDAPlace): the device. program (Program|None): a Program with forward pass. If None, use fluid.default_main_program(). eps (float): perturbation for finite differences. atol (float): absolute tolerance. rtol (float): relative tolerance. raise_exception (bool): whether to raise an exception if the check fails. Default is True. Returns: True if all differences satisfy numpy.allclose condition. """ # check input arguments x = _as_list(x) for v in x: v.stop_gradient = False v.persistable = True y = _as_list(y) if program is None: program = fluid.default_main_program() if y_grads is None: scope = fluid.executor.global_scope() y_grads = [] y_grads_init = [] for yi in y: dyi_name = _append_grad_suffix_(yi.name) np_type = dtype_to_np_dtype(yi.dtype) dy = program.global_block().create_var(name=dyi_name, shape=yi.shape, dtype=np_type, persistable=True) dy.stop_gradient = False v = np.random.random(size=yi.shape).astype(np_type) set_var_in_scope(scope, place, dyi_name, v) y_grads.append(dy) y_grads_init.append(v) else: y_grads = _as_list(y_grads) y_grads_init = [ var_to_np_array_in_scope(scope, place, v.name) for v in y_grads ] # append first order grads target_grads = fluid.gradients(y, x, y_grads) # y_grads are the input of first-order backward, # so, they are also the input of second-order backward. x += y_grads x_init = _as_list(x_init) x_init += y_grads_init grad_check(x, target_grads, x_init, place, program, eps, atol, rtol)
print('v_shape', v_data.shape) t_data = np.array([[0, 1, 2]], dtype=np.int32) print('t_shape', t_data.shape) vertices = fluid.layers.data(name="vertices", shape=[-1, 4], dtype="float32") triangles = fluid.layers.data(name="triangles", shape=[-1, 3], dtype="int32") barycentric_coordinates, triangle_ids, z_buffer = fluid.layers.rasterize_triangles( vertices, triangles, image_height=4, image_width=4) #gradient #grd_value = np.random.random((4, 4, 3)).astype('float32') grd_value = 1000 * np.ones((4,4,3)).astype('float32') grd_var = fluid.layers.data(name='grd_var', shape=(4, 4, 3), append_batch_size=False, dtype='float32') vertices.stop_gradient = False grd = fluid.gradients([barycentric_coordinates], vertices, target_gradients= grd_var) place = fluid.CPUPlace() exe = fluid.Executor(place=place) exe.run(fluid.default_startup_program()) fetch_list = [barycentric_coordinates.name, triangle_ids.name, z_buffer.name, grd[0].name] #profiler.start_profiler('All') bc, ti, zb, grd_out = exe.run(feed = {'vertices':v_data, 'triangles':t_data}, fetch_list=fetch_list) #profiler.stop_profiler('total', '/tmp/profile') #np.save('res_paddle/bary_coor_paddle.npy', np.array(bc)) #np.save('res_paddle/tri_ids_paddle.npy', np.array(ti)) #np.save('res_paddle/z_b_paddle.npy', np.array(zb))
def _construct_graph(self): train_program = fluid.Program() start_program = fluid.Program() with fluid.program_guard(train_program, start_program): scope = 'first/' self.x_ph = TensorList([ fluid.layers.data('{}x_{}'.format(scope, idx), v.shape, append_batch_size=False, stop_gradient=False) for idx, v in enumerate(self.x) ]) self.p_ph = TensorList([ fluid.layers.data('{}p_{}'.format(scope, idx), v.shape, append_batch_size=False, stop_gradient=False) for idx, v in enumerate(self.x) ]) # problem forward self.f0 = self.problem(self.x_ph, scope) self.g = self.f0.apply(static_clone) # Get df/dx^t @ f0 self.dfdxt_g = TensorList( fluid.gradients(self.f0, self.x_ph, self.g)) # For computing A tmp = [a * b for a, b in zip(self.dfdxt_g, self.p_ph)] self.dfdx_x = TensorList(fluid.gradients(tmp, self.g)) # self.dfdx_x = TensorList(fluid.gradients(self.dfdxt_g, self.g, self.p_ph)) train_program2 = fluid.Program() start_program2 = fluid.Program() with fluid.program_guard(train_program2, start_program2): scope = 'second/' self.x_ph_2 = TensorList([ fluid.layers.data('{}x_{}'.format(scope, idx), v.shape, append_batch_size=False, stop_gradient=False) for idx, v in enumerate(self.x) ]) self.dfdx_x_ph = TensorList([ fluid.layers.data('{}dfdx_x_{}'.format(scope, idx), v.shape, append_batch_size=False, stop_gradient=False) for idx, v in enumerate(self.g) ]) self.f0_2 = self.problem(self.x_ph_2, scope) self.dfdx_dfdx = TensorList( fluid.gradients(self.f0_2 * self.dfdx_x_ph, self.x_ph_2)) place = fluid.CUDAPlace(0) self.exe = fluid.Executor(place) self.exe.run(program=fluid.default_startup_program()) self.compiled_prog = fluid.compiler.CompiledProgram(train_program) place2 = fluid.CUDAPlace(0) self.exe2 = fluid.Executor(place2) self.exe2.run(program=fluid.default_startup_program()) self.compiled_prog2 = fluid.compiler.CompiledProgram(train_program2)
lr = 0.1 startup_prog = fluid.Program() train_prog = fluid.Program() with fluid.program_guard(train_prog, startup_prog): with fluid.unique_name.guard(): inputs = P.data(name='input_1', shape=[-1, 3, 28, 28], append_batch_size=False, dtype='float32') conv01_out_tensor = fluid.layers.conv2d(input=inputs, num_filters=7, filter_size=1, stride=1, padding=0, param_attr=ParamAttr(name="conv01_weights"), bias_attr=ParamAttr(name="conv01_bias")) conv02_out_tensor = fluid.layers.conv2d(input=conv01_out_tensor, num_filters=8, filter_size=4, stride=2, padding=1, param_attr=ParamAttr(name="conv02_weights"), bias_attr=ParamAttr(name="conv02_bias")) grad = fluid.gradients(conv02_out_tensor, conv01_out_tensor, no_grad_set=None)[0] # 建立损失函数 y_true = P.data(name='y_true', shape=[-1, 8, 14, 14], append_batch_size=False, dtype='float32') # 先把差值逐项平方,可以用P.pow()这个op,也可以用python里的运算符**。 mseloss = P.pow(y_true - conv02_out_tensor, 2) mseloss = P.reduce_mean(mseloss) # 再求平均,即mse损失函数 # 优化器,选SGD optimizer = fluid.optimizer.SGD(learning_rate=lr) optimizer.minimize(mseloss) eval_prog = fluid.Program() with fluid.program_guard(eval_prog, startup_prog):
def _paddle_prepare(self, predict_fn=None): if predict_fn is None: import paddle.fluid as fluid startup_prog = fluid.Program() main_program = fluid.Program() with fluid.program_guard(main_program, startup_prog): with fluid.unique_name.guard(): image_op = fluid.data(name='image', shape=[1] + self.model_input_shape, dtype='float32') label_op = fluid.layers.data(name='label', shape=[1], dtype='int64') probs = self.paddle_model(image_op) if isinstance(probs, tuple): probs = probs[0] # manually switch the model to test mode for op in main_program.global_block().ops: if op.type == 'batch_norm': op._set_attr('use_global_stats', True) elif op.type == 'dropout': op._set_attr('dropout_prob', 0.0) # fetch the target layer trainable_vars = list(main_program.list_vars()) for v in trainable_vars: if v.name == self.target_layer_name: conv = v class_num = probs.shape[-1] one_hot = fluid.layers.one_hot(label_op, class_num) one_hot = fluid.layers.elementwise_mul(probs, one_hot) target_category_loss = fluid.layers.reduce_sum(one_hot) # target_category_loss = - fluid.layers.cross_entropy(probs, label_op)[0] # add back-propagration p_g_list = fluid.backward.append_backward( target_category_loss) # calculate the gradients w.r.t. the target layer gradients_map = fluid.gradients(target_category_loss, conv)[0] if self.use_cuda: gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) place = fluid.CUDAPlace(gpu_id) else: place = fluid.CPUPlace() exe = fluid.Executor(place) fluid.io.load_persistables(exe, self.trained_model_path, main_program) def predict_fn(data): # if label is None, let it be the most likely label if self.label is None: out = exe.run(main_program, feed={ 'image': data, 'label': np.array([[0]]) }, fetch_list=[probs]) self.label = np.argmax(out[0][0]) feature_map, gradients = exe.run( main_program, feed={ 'image': data, 'label': np.array([[self.label]]) }, fetch_list=[conv, gradients_map]) return feature_map, gradients self.predict_fn = predict_fn self.paddle_prepared = True
def create_model(args, pyreader_name, ernie_config, is_prediction=False, task_name=""): pyreader = fluid.layers.py_reader(capacity=50, shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, 1], [-1, 1]], dtypes=[ 'int64', 'int64', 'int64', 'int64', 'float32', 'int64', 'int64' ], lod_levels=[0, 0, 0, 0, 0, 0, 0], name=task_name + "_" + pyreader_name, use_double_buffer=True) (src_ids, sent_ids, pos_ids, task_ids, input_mask, labels, qids) = fluid.layers.read_file(pyreader) def _model(is_noise=False): ernie = ErnieModel(src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, task_ids=task_ids, input_mask=input_mask, config=ernie_config, is_noise=is_noise) cls_feats = ernie.get_pooled_output() if not is_noise: cls_feats = fluid.layers.dropout( x=cls_feats, dropout_prob=0.1, dropout_implementation="upscale_in_train") logits = fluid.layers.fc( input=cls_feats, size=args.num_labels, param_attr=fluid.ParamAttr( name=task_name + "_cls_out_w", initializer=fluid.initializer.TruncatedNormal(scale=0.02)), bias_attr=fluid.ParamAttr( name=task_name + "_cls_out_b", initializer=fluid.initializer.Constant(0.))) """ if is_prediction: probs = fluid.layers.softmax(logits) feed_targets_name = [ src_ids.name, sent_ids.name, pos_ids.name, input_mask.name ] if ernie_version == "2.0": feed_targets_name += [task_ids.name] return pyreader, probs, feed_targets_name """ num_seqs = fluid.layers.create_tensor(dtype='int64') ## add focal loss ce_loss, probs = fluid.layers.softmax_with_cross_entropy( logits=logits, label=labels, return_softmax=True) loss = fluid.layers.mean(x=ce_loss) accuracy = fluid.layers.accuracy(input=probs, label=labels, total=num_seqs) graph_vars = { "loss": loss, "probs": probs, "accuracy": accuracy, "labels": labels, "num_seqs": num_seqs, "qids": qids } return graph_vars if not is_prediction: graph_vars = _model(is_noise=True) old_loss = graph_vars["loss"] token_emb = fluid.default_main_program().global_block().var( "word_embedding") # print(token_emb) token_emb.stop_gradient = False token_gradient = fluid.gradients(old_loss, token_emb)[0] token_gradient.stop_gradient = False epsilon = 1e-8 norm = (fluid.layers.sqrt( fluid.layers.reduce_sum(fluid.layers.square(token_gradient)) + epsilon)) gp = (0.01 * token_gradient) / norm gp.stop_gradient = True fluid.layers.assign(token_emb + gp, token_emb) graph_vars = _model() fluid.layers.assign(token_emb - gp, token_emb) else: graph_vars = _model() return pyreader, graph_vars
import paddle.fluid as fluid #define operation w = fluid.data(name='w', shape=[None, 1], dtype='float32') w.stop_gradient = False loss = w * w grad = fluid.gradients([loss], w) #Define Exector cpu = fluid.core.CPUPlace() exe = fluid.Executor(cpu) exe.run(fluid.default_startup_program()) #Prepare data x = numpy.ones((1, 1)) x = x.astype('float32') #Run computing outs = exe.run(feed={'w': x}, fetch_list=[loss, grad]) print('loss: {}, grad: {}'.format(outs[0][0], outs[1][0]))
def _compute_analytical_jacobian(program, x, y, place, scope): """Computes the analytical Jacobian for dy/dx. Args: program (Program): a Program with forward pass. x (Variable|list[Variable]): a variable or list of variable y (Variable): the target variable. place (fluid.CPUPlace or fluid.CUDAPlace): the device. scope (Scope): the scope used to run program. Returns: A list of 2-D numpy array. The list length is len(x). Each 2-D numpy array represents the Jacobian for dy/dx_i. It has "xi_size" rows and "dy_size" columns where "x_size" is the number of elements in x_i and "dy_size" is the number of elements in y. """ if not isinstance(y, fluid.framework.Variable): raise TypeError('y is not Variable') dy_name = _append_grad_suffix_(y.name) np_type = dtype_to_np_dtype(y.dtype) # create dy Variable in Program dy = program.global_block().create_var(name=dy_name, shape=y.shape, dtype=np_type, persistable=True) # append backward dx = fluid.gradients(y, x, dy) # init dy tensor in scope value = np.zeros(y.shape, dtype=np_type) dy_t = set_var_in_scope(scope, place, dy_name, value) exe = fluid.Executor(place) y_size = _product(y.shape) x = _as_list(x) jacobian = make_jacobian(x, y_size, np_type) # filter None in dx for DX/DY may be None in kernel # only fetch not None dx in exe.run filted = [(i, dxi) for i, dxi in enumerate(dx) if dxi is not None] filted_idx, filted_dx = zip(*filted) for i in six.moves.xrange(y_size): _set_item(dy_t, i, 1, np_type) dx_res = exe.run(program, scope=scope, fetch_list=filted_dx) for j in six.moves.xrange(len(filted_dx)): dx_idx = filted_idx[j] if dx_res[j] is not None: jacobian[dx_idx][:, i] = dx_res[j].flatten() else: jacobian[dx_idx][:, i] = np.zeros(dx[dx_idx].shape, dtype=np_type).flatten() _set_item(dy_t, i, 0, np_type) return jacobian
def dlg_attack(args, feature, label, network, exe, origin_grad): """ The implementation of DLG attack. :param args: the parameters for dlg attack :param feature: the variable of feature :param label: the variable of label :param network: the network of model which is trained :param exe: the same executor with normal training procedure :param origin_grad: the original gradients of model params generated by target data, i.e., the data which is being attacked. :return: """ main_program = fluid.Program() # use a new program with fluid.program_guard(main_program): # the dummy feature, which aims to imitate the target data dummy_x = fluid.data(name="dummy_x", shape=list(feature.shape), dtype=feature.dtype) # let dummy_x can be updated dummy_x.stop_gradient = False # the dummy_label dummy_y = fluid.data(name="dummy_y", shape=list(label.shape), dtype=label.dtype) # let dummy_y can be updated dummy_y.stop_gradient = False # use the model network of training _, dummy_loss = network(dummy_x, dummy_y) # get gradients of params that can be trainable all_params = main_program.global_block().all_parameters() grad_params = [param for param in all_params if param.trainable] dummy_grads = fluid.gradients(dummy_loss, grad_params) # original gradients origin_grad_vars = [] for g_id, origin_g in enumerate(origin_grad): grad_name = "origin_g_" + str(g_id) grad_shape = origin_g.shape grad = fluid.data(name=grad_name, shape=grad_shape, dtype=origin_g.dtype) origin_grad_vars.append(grad) # the target loss of optimization, i.e., the difference # between gradients of model parameters generated respectively # by target data and dummy data diff_loss = 0.0 for orig_g, dum_g in zip(origin_grad_vars, dummy_grads): cur_loss = fluid.layers.square_error_cost(orig_g, dum_g) cur_loss = fluid.layers.reduce_mean(cur_loss) diff_loss += cur_loss mean_diff_loss = fluid.layers.mean(diff_loss) # the gradient of dummy_x grad_of_x = fluid.gradients(mean_diff_loss, dummy_x) dummy_feature_shape = [1 if d == -1 else d for d in list(feature.shape)] dummy_label_shape = [1 if d == -1 else d for d in list(label.shape)] # Generate dummy target data. The main two types, i.e., float32 and int64, # are used here for feature and label variables respectively, which can be # changed according to different types in different scenarios. dummy_feature = numpy.random.normal( 0, 1, size=dummy_feature_shape).astype("float32") dummy_label = numpy.zeros(shape=dummy_label_shape).astype("int64") feed_dict = {} # add original gradients into feed_dict for idx, orig_g in enumerate(origin_grad): key = "origin_g_" + str(idx) feed_dict[key] = orig_g # the time of starting attack start = time.time() for iteration in range(args.iterations): feed_dict["dummy_x"] = dummy_feature feed_dict["dummy_y"] = dummy_label result = exe.run(main_program, feed=feed_dict, fetch_list=[mean_diff_loss] + grad_of_x) grad_diff_loss, feature_grad = result[0][0], result[1:] # update dummy_x with it's gradient feature_grad = numpy.array(feature_grad).reshape(dummy_feature_shape) dummy_feature = numpy.add(dummy_feature, args.learning_rate * feature_grad) dummy_feature = numpy.array(dummy_feature) # the shape of target image img_shape = dummy_feature_shape[-2:] # save attack results per 100 iterations if iteration % 100 == 0: print("Attack Iteration {}: grad_diff_loss = {}".format( iteration, grad_diff_loss)) if not os.path.exists(args.result_dir): os.makedirs(args.result_dir) img = Image.fromarray( (dummy_feature * 255).reshape(img_shape).astype(numpy.uint8)) img.save(args.result_dir + "/result_{}.png".format(iteration)) end = time.time() print("Attack cost time in seconds: {}".format(end - start)) # exit after attack finished exit("Attack finished.")
out8 = fluid.layers.softmax(out_logit8) fluid.io.load_persistables(exe, pretrained_model, main_program=main_programs) print('ok') init_prog(main_programs) eval_program = main_programs.clone(for_test=True) label = fluid.layers.data(name="label", shape=[1], dtype='int64') y = fluid.layers.data(name="y", shape=[8], dtype='int64') out_logits = (out_logit1[:, :121] * y[0] + out_logit2 * y[1] + out_logit3 * y[2] + out_logit4 * y[3] + out_logit5 * y[4] + out_logit6 * y[5] + out_logit7 * y[6] + out_logit8 * y[7]) / ( y[0] + y[1] + y[2] + y[3] + y[4] + y[5] + y[6] + y[7]) out = fluid.layers.softmax(out_logits) loss = fluid.layers.cross_entropy(input=out, label=label) gradients = fluid.gradients(targets=loss, inputs=[input_layer])[0] def inference(img): result1, result2, result3, result4, result5, result6, result7, result8 = exe.run( eval_program, fetch_list=[out1, out2, out3, out4, out5, out6, out7, out8], feed={'image': img}) result1 = result1[0, :121] pred1 = np.argmax(result1) result2 = result2[0] pred2 = np.argmax(result2) result3 = result3[0] pred3 = np.argmax(result3) result4 = result4[0] pred4 = np.argmax(result4)