def test_is_supported_layers(self): program = paddle.static.default_main_program() names = [ 'embedding_0.w_0', 'fack_layer_0.w_0', 'conv2d_0.w_0', 'conv2d_0.b_0', 'conv2d_1.w_0', 'conv2d_1.b_0', 'fc_0.w_0', 'fc_0.b_0', 'fc_1.w_0', 'fc_1.b_0', 'linear_2.w_0', 'linear_2.b_0' ] ref = [ False, False, True, False, True, False, True, False, True, False, True, False ] for i, name in enumerate(names): self.assertTrue( ref[i] == ASPHelper._is_supported_layer(program, name)) paddle.incubate.asp.set_excluded_layers(['fc_1', 'conv2d_0'], program) ref = [ False, False, False, False, True, False, True, False, False, False, True, False ] for i, name in enumerate(names): self.assertTrue( ref[i] == ASPHelper._is_supported_layer(program, name)) paddle.incubate.asp.reset_excluded_layers(program) ref = [ False, False, True, False, True, False, True, False, True, False, True, False ] for i, name in enumerate(names): self.assertTrue( ref[i] == ASPHelper._is_supported_layer(program, name))
def test_decorate(self): param_names = [param.name for param in self.layer.parameters()] self.optimizer = paddle.incubate.asp.decorate(self.optimizer) program = paddle.static.default_main_program() for name in param_names: mask_var = ASPHelper._get_program_asp_info(program).mask_vars.get( name, None) if ASPHelper._is_supported_layer(program, name): self.assertTrue(mask_var is not None) else: self.assertTrue(mask_var is None)
def test_asp_training_with_amp(self): if core.is_compiled_with_cuda(): place = paddle.CUDAPlace(0) with fluid.program_guard(self.main_program, self.startup_program): self.optimizer = fluid.contrib.mixed_precision.decorator.decorate( self.optimizer) self.optimizer = paddle.incubate.asp.decorate(self.optimizer) self.optimizer.minimize(self.loss, self.startup_program) exe = fluid.Executor(place) feeder = fluid.DataFeeder( feed_list=[self.img, self.label], place=place) exe.run(self.startup_program) paddle.incubate.asp.prune_model(self.main_program) data = (np.random.randn(32, 3, 24, 24), np.random.randint( 10, size=(32, 1))) exe.run(self.main_program, feed=feeder.feed([data])) for param in self.main_program.global_block().all_parameters(): if ASPHelper._is_supported_layer(self.main_program, param.name): mat = np.array(fluid.global_scope().find_var(param.name) .get_tensor()) self.assertTrue( paddle.fluid.contrib.sparsity.check_sparsity( mat.T, n=2, m=4))
def test_with_asp_sharding(self): fleet.init(is_collective=True) train_prog, startup_prog = fluid.Program(), fluid.Program() avg_cost, strategy, input_x, input_y = self.net( train_prog, startup_prog) with fluid.program_guard(train_prog, startup_prog): optimizer = paddle.fluid.optimizer.SGD(learning_rate=0.01) optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) optimizer.minimize(avg_cost) if paddle.fluid.is_compiled_with_cuda(): place = fluid.CUDAPlace( int(os.environ.get('FLAGS_selected_gpus', 0))) else: place = fluid.CPUPlace() exe = fluid.Executor(place) feeder = fluid.DataFeeder(feed_list=[input_x, input_y], place=place) exe.run(startup_prog) sparsity.prune_model(train_prog) data = (np.random.randn(64, 32), np.random.randint(2, size=(64, 1))) exe.run(train_prog, feed=feeder.feed([data])) for param in train_prog.global_block().all_parameters(): if ASPHelper._is_supported_layer(train_prog, param.name): mat = np.array(fluid.global_scope().find_var( param.name).get_tensor()) self.assertTrue( paddle.fluid.contrib.sparsity.check_sparsity(mat.T, n=2, m=4))
def test_with_asp(self): fleet.init(is_collective=True) self.optimizer = paddle.incubate.asp.decorate(self.optimizer) paddle.incubate.asp.prune_model(self.layer) self.optimizer = fleet.distributed_optimizer(self.optimizer) self.layer = fleet.distributed_model(self.layer) imgs = paddle.to_tensor(np.random.randn(64, 32), dtype='float32', place=self.place, stop_gradient=False) labels = paddle.to_tensor(np.random.randint(10, size=(64, 1)), dtype='float32', place=self.place, stop_gradient=False) loss_fn = paddle.nn.MSELoss(reduction='mean') output = self.layer(imgs) loss = loss_fn(output, labels) loss.backward() self.optimizer.step() self.optimizer.clear_grad() for param in self.layer.parameters(): if ASPHelper._is_supported_layer( paddle.static.default_main_program(), param.name): mat = param.numpy() self.assertTrue( paddle.fluid.contrib.sparsity.check_sparsity(mat.T, n=2, m=4))
def test_save_and_load(self): path = "/tmp/paddle_asp_save_st/" param_path = path + "asp.pdparams" model_path = path + "asp.pdmodel" paddle.save(self.main_program.state_dict(), param_path) paddle.save(self.main_program, model_path) prog = paddle.load(model_path) state_dict = paddle.load(param_path) prog.set_state_dict(state_dict) feeder = fluid.DataFeeder(feed_list=[self.img, self.label], place=self.place) data = (np.random.randn(64, 3, 32, 32), np.random.randint(10, size=(64, 1))) self.exe.run(prog, feed=feeder.feed([data])) for param in prog.global_block().all_parameters(): if ASPHelper._is_supported_layer(prog, param.name): mat = np.array(fluid.global_scope().find_var( param.name).get_tensor()) self.assertTrue( paddle.fluid.contrib.sparsity.check_sparsity(mat.T, n=2, m=4))
def test_asp_training_with_amp(self): self.optimizer = paddle.incubate.asp.decorate(self.optimizer) paddle.incubate.asp.prune_model(self.layer) imgs = paddle.to_tensor(np.random.randn(32, 3, 24, 24), dtype='float32', place=self.place, stop_gradient=False) labels = paddle.to_tensor(np.random.randint(10, size=(32, 1)), dtype='float32', place=self.place, stop_gradient=False) loss_fn = paddle.nn.MSELoss(reduction='mean') scaler = paddle.amp.GradScaler(init_loss_scaling=1024) with paddle.amp.auto_cast(enable=True): output = self.layer(imgs) loss = loss_fn(output, labels) scaled = scaler.scale(loss) scaled.backward() scaler.minimize(self.optimizer, scaled) self.optimizer.clear_grad() for param in self.layer.parameters(): if ASPHelper._is_supported_layer( paddle.static.default_main_program(), param.name): mat = param.numpy() self.assertTrue( paddle.fluid.contrib.sparsity.check_sparsity(mat.T, n=2, m=4))
def test_save_and_load(self): path = "/tmp/paddle_asp_save_dy/" net_path = path + "asp_net.pdparams" opt_path = path + "asp_opt.pdopt" paddle.save(self.layer.state_dict(), net_path) paddle.save(self.optimizer.state_dict(), opt_path) asp_info = ASPHelper._get_program_asp_info( paddle.static.default_main_program()) for param_name in asp_info.mask_vars: mask = asp_info.mask_vars[param_name] asp_info.update_mask_vars( param_name, paddle.ones(shape=mask.shape, dtype=mask.dtype)) asp_info.update_masks(param_name, np.ones(shape=mask.shape)) net_state_dict = paddle.load(net_path) opt_state_dict = paddle.load(opt_path) self.layer.set_state_dict(net_state_dict) self.optimizer.set_state_dict(opt_state_dict) imgs = paddle.to_tensor(np.random.randn(64, 3, 32, 32), dtype='float32', place=self.place, stop_gradient=False) labels = paddle.to_tensor(np.random.randint(10, size=(64, 1)), dtype='float32', place=self.place, stop_gradient=False) loss_fn = paddle.nn.MSELoss(reduction='mean') output = self.layer(imgs) loss = loss_fn(output, labels) loss.backward() self.optimizer.step() self.optimizer.clear_grad() for param in self.layer.parameters(): if ASPHelper._is_supported_layer( paddle.static.default_main_program(), param.name): mat = param.numpy() self.assertTrue( paddle.fluid.contrib.sparsity.check_sparsity(mat.T, n=2, m=4))
def test_get_not_ASP_relevant_vars(self): def check_params(params, params_from_asp): if len(params_from_asp) != len(params): return False for i, p in enumerate(params_from_asp): if p.name != params[i].name: return False return True params = self.main_program.global_block().all_parameters() params_from_asp = ASPHelper._get_not_ASP_relevant_vars( self.main_program) self.assertTrue(check_params(params, params_from_asp)) with fluid.program_guard(self.main_program, self.startup_program): ASPHelper._minimize(self.optimizer, self.loss, self.main_program, self.startup_program) params_from_asp_after_opt = ASPHelper._get_not_ASP_relevant_vars( self.main_program) self.assertTrue(check_params(params, params_from_asp_after_opt))
def __pruning_and_checking(self, with_mask): paddle.incubate.asp.prune_model(self.layer, mask_algo=self.mask_gen_func, with_mask=with_mask) for param in self.layer.parameters(): if ASPHelper._is_supported_layer( paddle.static.default_main_program(), param.name): mat = param.numpy() self.assertTrue( paddle.fluid.contrib.sparsity.check_sparsity( mat.T, func_name=self.mask_check_func, n=2, m=4))
def __pruning_and_checking(self, exe, place, mask_func_name, check_func_name, with_mask): exe.run(self.startup_program) paddle.incubate.asp.prune_model(self.main_program, mask_algo=mask_func_name, with_mask=with_mask) for param in self.main_program.global_block().all_parameters(): if ASPHelper._is_supported_layer(self.main_program, param.name): mat = np.array(fluid.global_scope().find_var( param.name).get_tensor()) self.assertTrue( paddle.fluid.contrib.sparsity.check_sparsity( mat.T, func_name=check_func_name, n=2, m=4))
def __check_mask_variables_and_ops(self, param_names, param_names_after_minimize): for n in param_names: self.assertFalse(ASPHelper._is_supported_layer(self.main_program, n) and \ ASPHelper._get_mask_name(n) not in param_names_after_minimize) mask_names = [] for n in param_names: if ASPHelper._is_supported_layer(self.main_program, n): mask_names.append(ASPHelper._get_mask_name(n)) masking_ops = [] for op in self.main_program.global_block().ops: if op.type == 'elementwise_mul' and \ op.input('Y')[0] in mask_names: masking_ops.append(op.input('Y')[0]) self.assertTrue(len(masking_ops) == len(mask_names)) for n in masking_ops: self.assertTrue(n in mask_names) for n in mask_names: self.assertTrue(n in masking_ops)
def minimize_impl(self, loss, startup_program=None, parameter_list=None, no_grad_set=None): optimize_ops, params_grads = ASPHelper._minimize( self.inner_opt, loss, startup_program=startup_program, parameter_list=parameter_list, no_grad_set=no_grad_set) return optimize_ops, params_grads
def test_with_asp_and_pure_fp16(self): fleet.init(is_collective=True) train_prog, startup_prog = fluid.Program(), fluid.Program() with paddle.static.amp.fp16_guard(): avg_cost, strategy, \ input_x, input_y = self.net(train_prog, startup_prog) strategy.amp = True strategy.amp_configs = {'use_pure_fp16': True} with fluid.program_guard(train_prog, startup_prog): with paddle.static.amp.fp16_guard(): optimizer = optimizer = paddle.optimizer.Momentum( learning_rate=0.01, multi_precision=True) optimizer = fleet.distributed_optimizer( optimizer, strategy=strategy) optimizer.minimize(avg_cost) place = fluid.CUDAPlace(0) if paddle.fluid.is_compiled_with_cuda( ) else fluid.CPUPlace() exe = fluid.Executor(place) feeder = fluid.DataFeeder(feed_list=[input_x, input_y], place=place) exe.run(startup_prog) optimizer.amp_init(place) sparsity.prune_model(train_prog) data = (np.random.randn(64, 32), np.random.randint(2, size=(64, 1))) exe.run(train_prog, feed=feeder.feed([data])) for param in train_prog.global_block().all_parameters(): if ASPHelper._is_supported_layer(train_prog, param.name): mat = np.array(fluid.global_scope().find_var(param.name) .get_tensor()) self.assertTrue( paddle.fluid.contrib.sparsity.check_sparsity( mat.T, n=2, m=4))