def test_visualdl_callback(self): # visualdl not support python2 if sys.version_info < (3, ): return inputs = [InputSpec([-1, 1, 28, 28], 'float32', 'image')] labels = [InputSpec([None, 1], 'int64', 'label')] transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])]) train_dataset = MnistDataset(mode='train', transform=transform) eval_dataset = MnistDataset(mode='test', transform=transform) net = paddle.vision.models.LeNet() model = paddle.Model(net, inputs, labels) optim = paddle.optimizer.Adam(0.001, parameters=net.parameters()) model.prepare(optimizer=optim, loss=paddle.nn.CrossEntropyLoss(), metrics=paddle.metric.Accuracy()) callback = paddle.callbacks.VisualDL(log_dir='visualdl_log_dir') model.fit(train_dataset, eval_dataset, batch_size=64, callbacks=callback)
def func_test_main(self): transform = T.Transpose() mnist = FashionMNIST(mode='train', transform=transform) self.assertTrue(len(mnist) == 60000) i = np.random.randint(0, len(mnist) - 1) image, label = mnist[i] self.assertTrue(image.shape[0] == 1) self.assertTrue(image.shape[1] == 28) self.assertTrue(image.shape[2] == 28) self.assertTrue(label.shape[0] == 1) self.assertTrue(0 <= int(label) <= 9) # test cv2 backend mnist = FashionMNIST(mode='train', transform=transform, backend='cv2') self.assertTrue(len(mnist) == 60000) for i in range(len(mnist)): image, label = mnist[i] self.assertTrue(image.shape[0] == 1) self.assertTrue(image.shape[1] == 28) self.assertTrue(image.shape[2] == 28) self.assertTrue(label.shape[0] == 1) self.assertTrue(0 <= int(label) <= 9) break with self.assertRaises(ValueError): mnist = FashionMNIST(mode='train', transform=transform, backend=1)
def prepare_input(): transforms = [ T.Resize(size=(target_height, target_width)), T.Normalize(mean=(0, 0, 0), std=(1, 1, 1), data_format='HWC', to_rgb=True), T.Transpose() ] img_file = root_path / "street.jpeg" img = cv2.imread(str(img_file)) normalized_img = T.Compose(transforms)(img) normalized_img = normalized_img.astype(np.float32, copy=False) / 255.0 # add an new axis in front img_input = normalized_img[np.newaxis, :] # scale_factor is calculated as: im_shape / original_im_shape h_scale = target_height / img.shape[0] w_scale = target_width / img.shape[1] input = { "image": img_input, "im_shape": [target_height, target_width], "scale_factor": [h_scale, w_scale] } return input, img
def run_model(self, model): transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])]) train_dataset = MNIST(mode='train', transform=transform) model.fit(train_dataset, epochs=1, batch_size=64, num_iters=2, log_freq=1)
def test_ptq(self): seed = 1 np.random.seed(seed) paddle.static.default_main_program().random_seed = seed paddle.static.default_startup_program().random_seed = seed _logger.info("create the fp32 model") fp32_lenet = ImperativeLenet() _logger.info("prepare data") batch_size = 64 transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])]) train_dataset = paddle.vision.datasets.MNIST( mode='train', backend='cv2', transform=transform) val_dataset = paddle.vision.datasets.MNIST( mode='test', backend='cv2', transform=transform) place = paddle.CUDAPlace(0) \ if paddle.is_compiled_with_cuda() else paddle.CPUPlace() train_reader = paddle.io.DataLoader( train_dataset, drop_last=True, places=place, batch_size=batch_size, return_list=True) test_reader = paddle.io.DataLoader( val_dataset, places=place, batch_size=batch_size, return_list=True) _logger.info("train the fp32 model") self.model_train(fp32_lenet, train_reader) _logger.info("test fp32 model") fp32_top1, fp32_top5 = self.model_test(fp32_lenet, test_reader) _logger.info("quantize the fp32 model") quanter = PTQ() quant_lenet = quanter.quantize(fp32_lenet, fuse=True) _logger.info("calibrate") self.calibrate(quant_lenet, test_reader) _logger.info("save and test the quantized model") save_path = "./tmp/model" input_spec = paddle.static.InputSpec( shape=[None, 1, 28, 28], dtype='float32') quanter.save_quantized_model( quant_lenet, save_path, input_spec=[input_spec]) quant_top1, quant_top5 = self.model_test(quant_lenet, test_reader) _logger.info("FP32 acc: top1: {}, top5: {}".format(fp32_top1, fp32_top5)) _logger.info("Int acc: top1: {}, top5: {}".format(quant_top1, quant_top5)) diff = 0.002 self.assertTrue( fp32_top1 - quant_top1 < diff, msg="The acc of quant model is too lower than fp32 model")
def __init__(self, noise_path, size, keys=None): self.noise_path = noise_path self.noise_imgs = sorted(glob.glob(noise_path + '*.png')) self.size = size self.keys = keys self.transform = T.Compose([ T.RandomCrop(size), T.Transpose(), T.Normalize([0., 0., 0.], [255., 255., 255.]) ])
def get_makeup_transform(cfg, pic="image"): if pic == "image": transform = T.Compose([ T.Resize(size=cfg.trans_size), T.Transpose(), ]) else: transform = T.Resize(size=cfg.trans_size, interpolation=cv2.INTER_NEAREST) return transform
def test_main(self): transform = T.Transpose() mnist = MNIST(mode='test', transform=transform) self.assertTrue(len(mnist) == 10000) for i in range(len(mnist)): image, label = mnist[i] self.assertTrue(image.shape[0] == 1) self.assertTrue(image.shape[1] == 28) self.assertTrue(image.shape[2] == 28) self.assertTrue(label.shape[0] == 1) self.assertTrue(0 <= int(label) <= 9)
def func_test_main(self): transform = T.Transpose() mnist = FashionMNIST(mode='test', transform=transform) self.assertTrue(len(mnist) == 10000) i = np.random.randint(0, len(mnist) - 1) image, label = mnist[i] self.assertTrue(image.shape[0] == 1) self.assertTrue(image.shape[1] == 28) self.assertTrue(image.shape[2] == 28) self.assertTrue(label.shape[0] == 1) self.assertTrue(0 <= int(label) <= 9)
def __init__(self, data_root, input_size, mean, std): super(NormalDataset, self).__init__() self.mean = mean self.std = std self.input_size = input_size self.data_root = data_root self.trans = transforms.Compose([ # transforms.Resize([int(self.input_size[0]), int(self.input_size[1])]), # smaller side resized transforms.Transpose(order=(2, 0, 1)), transforms.Normalize(mean=self.mean, std=self.std), ]) self.image_data, self.image_label = self.data_prepare() self.num_classes = len(self.image_data) self.len = len(self.image_data)
def __init__(self, methodName='runTest', param_names=[]): super(TestFilterPruner, self).__init__(methodName) self._param_names = param_names transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])]) self.train_dataset = paddle.vision.datasets.MNIST(mode="train", backend="cv2", transform=transform) self.val_dataset = paddle.vision.datasets.MNIST(mode="test", backend="cv2", transform=transform) def _reader(): for data in self.val_dataset: yield data self.val_reader = _reader
def __init__(self, *args, **kwargs): super(TestStaticMasks, self).__init__(*args, **kwargs) paddle.disable_static() transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])]) self.train_dataset = paddle.vision.datasets.MNIST( mode="train", backend="cv2", transform=transform) self.train_loader = paddle.io.DataLoader( self.train_dataset, places=paddle.set_device('cpu'), return_list=True) def _reader(): for data in self.val_dataset: yield data self.val_reader = _reader
def eval(args): paddle.set_device('gpu' if args.use_gpu else 'cpu') test_reader = None if args.data == "cifar10": transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])]) val_dataset = paddle.vision.datasets.Cifar10(mode="test", backend="cv2", transform=transform) class_dim = 10 image_shape = [3, 224, 224] pretrain = False elif args.data == "imagenet": val_dataset = ImageNetDataset("data/ILSVRC2012", mode='val', image_size=224, resize_short_size=256) class_dim = 1000 image_shape = [3, 224, 224] pretrain = True else: raise ValueError("{} is not supported.".format(args.data)) assert args.model in model_list, "{} is not in lists: {}".format( args.model, model_list) inputs = [Input([None] + image_shape, 'float32', name='image')] labels = [Input([None, 1], 'int64', name='label')] # model definition net = models.__dict__[args.model](pretrained=pretrain, num_classes=class_dim) pruner = paddleslim.dygraph.L1NormFilterPruner(net, [1] + image_shape) params = get_pruned_params(args, net) ratios = {} for param in params: ratios[param] = args.pruned_ratio print("ratios: {}".format(ratios)) pruner.prune_vars(ratios, [0]) model = paddle.Model(net, inputs, labels) model.prepare(None, paddle.nn.CrossEntropyLoss(), paddle.metric.Accuracy(topk=(1, 5))) model.load(args.checkpoint) model.evaluate(eval_data=val_dataset, batch_size=args.batch_size, verbose=1, num_workers=8)
def __init__(self, output_path='output_dir', weight_path=None, use_adjust_brightness=True): self.output_path = output_path self.input_size = (256, 256) self.use_adjust_brightness = use_adjust_brightness if weight_path is None: vox_cpk_weight_url = 'https://paddlegan.bj.bcebos.com/models/animeganv2_hayao.pdparams' weight_path = get_path_from_url(vox_cpk_weight_url) self.weight_path = weight_path self.generator = self.load_checkpoints() self.transform = T.Compose([ ResizeToScale((256, 256), 32), T.Transpose(), T.Normalize([127.5, 127.5, 127.5], [127.5, 127.5, 127.5]) ])
def func_warn_or_error(self): with self.assertRaises(ValueError): paddle.callbacks.ReduceLROnPlateau(factor=2.0) # warning paddle.callbacks.ReduceLROnPlateau(mode='1', patience=3, verbose=1) transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])]) train_dataset = CustomMnist(mode='train', transform=transform) val_dataset = CustomMnist(mode='test', transform=transform) net = LeNet() optim = paddle.optimizer.Adam(learning_rate=0.001, parameters=net.parameters()) inputs = [InputSpec([None, 1, 28, 28], 'float32', 'x')] labels = [InputSpec([None, 1], 'int64', 'label')] model = Model(net, inputs=inputs, labels=labels) model.prepare(optim, loss=CrossEntropyLoss(), metrics=[Accuracy()]) callbacks = paddle.callbacks.ReduceLROnPlateau(monitor='miou', patience=3, verbose=1) model.fit(train_dataset, val_dataset, batch_size=8, log_freq=1, save_freq=10, epochs=1, callbacks=[callbacks]) optim = paddle.optimizer.Adam( learning_rate=paddle.optimizer.lr.PiecewiseDecay([0.001, 0.0001], [5, 10]), parameters=net.parameters()) model.prepare(optim, loss=CrossEntropyLoss(), metrics=[Accuracy()]) callbacks = paddle.callbacks.ReduceLROnPlateau(monitor='acc', mode='max', patience=3, verbose=1, cooldown=1) model.fit(train_dataset, val_dataset, batch_size=8, log_freq=1, save_freq=10, epochs=3, callbacks=[callbacks])
def test_save_load(self): paddle.disable_static() paddle.set_device('gpu') amp_level = {"level": "O1", "init_loss_scaling": 128} paddle.seed(2021) model = self.get_model(amp_level) transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])]) train_dataset = MNIST(mode='train', transform=transform) model.fit(train_dataset, epochs=1, batch_size=64, num_iters=2, log_freq=1) model.save('./lenet_amp') with paddle.fluid.unique_name.guard(): paddle.seed(2021) new_model = self.get_model(amp_level) train_dataset = MNIST(mode='train', transform=transform) new_model.fit(train_dataset, epochs=1, batch_size=64, num_iters=1, log_freq=1) # not equal before load self.assertNotEqual(new_model._scaler.state_dict()['incr_count'], model._scaler.state_dict()['incr_count']) print((new_model._scaler.state_dict()['incr_count'], model._scaler.state_dict()['incr_count'])) # equal after load new_model.load('./lenet_amp') self.assertEqual(new_model._scaler.state_dict()['incr_count'], model._scaler.state_dict()['incr_count']) self.assertEqual(new_model._scaler.state_dict()['decr_count'], model._scaler.state_dict()['decr_count']) self.assertTrue( np.array_equal( new_model._optimizer.state_dict() ['conv2d_1.w_0_moment1_0'].numpy(), model._optimizer.state_dict() ['conv2d_1.w_0_moment1_0'].numpy()))
def func_reduce_lr_on_plateau(self): transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])]) train_dataset = CustomMnist(mode='train', transform=transform) val_dataset = CustomMnist(mode='test', transform=transform) net = LeNet() optim = paddle.optimizer.Adam(learning_rate=0.001, parameters=net.parameters()) inputs = [InputSpec([None, 1, 28, 28], 'float32', 'x')] labels = [InputSpec([None, 1], 'int64', 'label')] model = Model(net, inputs=inputs, labels=labels) model.prepare(optim, loss=CrossEntropyLoss(), metrics=[Accuracy()]) callbacks = paddle.callbacks.ReduceLROnPlateau(patience=1, verbose=1, cooldown=1) model.fit(train_dataset, val_dataset, batch_size=8, log_freq=1, save_freq=10, epochs=10, callbacks=[callbacks])
def compress(args): shuffle = True if args.ce_test: # set seed seed = 111 paddle.seed(seed) np.random.seed(seed) random.seed(seed) args.num_workers = 0 shuffle = False if args.use_gpu: place = paddle.set_device('gpu') else: place = paddle.set_device('cpu') trainer_num = paddle.distributed.get_world_size() use_data_parallel = trainer_num != 1 if use_data_parallel: dist.init_parallel_env() train_reader = None test_reader = None if args.data == "imagenet": import imagenet_reader as reader train_dataset = reader.ImageNetDataset(mode='train') val_dataset = reader.ImageNetDataset(mode='val') class_dim = 1000 elif args.data == "cifar10": normalize = T.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5], data_format='CHW') transform = T.Compose([T.Transpose(), normalize]) train_dataset = paddle.vision.datasets.Cifar10(mode='train', backend='cv2', transform=transform) val_dataset = paddle.vision.datasets.Cifar10(mode='test', backend='cv2', transform=transform) class_dim = 10 else: raise ValueError("{} is not supported.".format(args.data)) batch_sampler = paddle.io.DistributedBatchSampler( train_dataset, batch_size=args.batch_size, shuffle=shuffle, drop_last=True) train_loader = paddle.io.DataLoader(train_dataset, places=place, batch_sampler=batch_sampler, return_list=True, num_workers=args.num_workers, use_shared_memory=True) valid_loader = paddle.io.DataLoader( val_dataset, places=place, drop_last=False, return_list=True, batch_size=args.batch_size_for_validation, shuffle=False, use_shared_memory=True) step_per_epoch = int( np.ceil(len(train_dataset) / args.batch_size / ParallelEnv().nranks)) # model definition model = mobilenet_v1(num_classes=class_dim, pretrained=True) if ParallelEnv().nranks > 1: model = paddle.DataParallel(model) opt, learning_rate = create_optimizer(args, step_per_epoch, model) if args.checkpoint is not None and args.last_epoch > -1: if args.checkpoint.endswith('pdparams'): args.checkpoint = args.checkpoint[:-9] if args.checkpoint.endswith('pdopt'): args.checkpoint = args.checkpoint[:-6] model.set_state_dict(paddle.load(args.checkpoint + ".pdparams")) opt.set_state_dict(paddle.load(args.checkpoint + ".pdopt")) elif args.pretrained_model is not None: if args.pretrained_model.endswith('pdparams'): args.pretrained_model = args.pretrained_model[:-9] if args.pretrained_model.endswith('pdopt'): args.pretrained_model = args.pretrained_model[:-6] model.set_state_dict(paddle.load(args.pretrained_model + ".pdparams")) if args.pruning_strategy == 'gmp': # GMP pruner step 0: define configs. No need to do this if you are not using 'gmp' configs = { 'stable_iterations': args.stable_epochs * step_per_epoch, 'pruning_iterations': args.pruning_epochs * step_per_epoch, 'tunning_iterations': args.tunning_epochs * step_per_epoch, 'resume_iteration': (args.last_epoch + 1) * step_per_epoch, 'pruning_steps': args.pruning_steps, 'initial_ratio': args.initial_ratio, } else: configs = None # GMP pruner step 1: initialize a pruner object pruner = create_unstructured_pruner(model, args, configs=configs) def test(epoch): model.eval() acc_top1_ns = [] acc_top5_ns = [] for batch_id, data in enumerate(valid_loader): start_time = time.time() x_data = data[0] y_data = paddle.to_tensor(data[1]) if args.data == 'cifar10': y_data = paddle.unsqueeze(y_data, 1) logits = model(x_data) loss = F.cross_entropy(logits, y_data) acc_top1 = paddle.metric.accuracy(logits, y_data, k=1) acc_top5 = paddle.metric.accuracy(logits, y_data, k=5) end_time = time.time() if batch_id % args.log_period == 0: _logger.info( "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, np.mean(acc_top1.numpy()), np.mean(acc_top5.numpy()), end_time - start_time)) acc_top1_ns.append(np.mean(acc_top1.numpy())) acc_top5_ns.append(np.mean(acc_top5.numpy())) _logger.info( "Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format( epoch, np.mean(np.array(acc_top1_ns, dtype="object")), np.mean(np.array(acc_top5_ns, dtype="object")))) def train(epoch): model.train() train_reader_cost = 0.0 train_run_cost = 0.0 total_samples = 0 reader_start = time.time() for batch_id, data in enumerate(train_loader): train_reader_cost += time.time() - reader_start x_data = data[0] y_data = paddle.to_tensor(data[1]) if args.data == 'cifar10': y_data = paddle.unsqueeze(y_data, 1) train_start = time.time() logits = model(x_data) loss = F.cross_entropy(logits, y_data) acc_top1 = paddle.metric.accuracy(logits, y_data, k=1) acc_top5 = paddle.metric.accuracy(logits, y_data, k=5) loss.backward() opt.step() learning_rate.step() opt.clear_grad() # GMP pruner step 2: step() to update ratios and other internal states of the pruner. pruner.step() train_run_cost += time.time() - train_start total_samples += args.batch_size if batch_id % args.log_period == 0: _logger.info( "epoch[{}]-batch[{}] lr: {:.6f} - loss: {}; acc_top1: {}; acc_top5: {}; avg_reader_cost: {:.5f} sec, avg_batch_cost: {:.5f} sec, avg_samples: {:.5f}, ips: {:.5f} images/sec" .format( epoch, batch_id, opt.get_lr(), np.mean(loss.numpy()), np.mean(acc_top1.numpy()), np.mean(acc_top5.numpy()), train_reader_cost / args.log_period, (train_reader_cost + train_run_cost) / args.log_period, total_samples / args.log_period, total_samples / (train_reader_cost + train_run_cost))) train_reader_cost = 0.0 train_run_cost = 0.0 total_samples = 0 reader_start = time.time() for i in range(args.last_epoch + 1, args.num_epochs): train(i) # GMP pruner step 3: update params before summrizing sparsity, saving model or evaluation. pruner.update_params() if (i + 1) % args.test_period == 0: _logger.info( "The current sparsity of the pruned model is: {}%".format( round(100 * UnstructuredPruner.total_sparse(model), 2))) test(i) if (i + 1) % args.model_period == 0: pruner.update_params() paddle.save(model.state_dict(), os.path.join(args.model_path, "model.pdparams")) paddle.save(opt.state_dict(), os.path.join(args.model_path, "model.pdopt"))
def test_qat_acc(self): self.prepare() self.set_seed() fp32_lenet = ImperativeLenet() place = paddle.CUDAPlace(0) \ if paddle.is_compiled_with_cuda() else paddle.CPUPlace() transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])]) train_dataset = paddle.vision.datasets.MNIST(mode='train', backend='cv2', transform=transform) val_dataset = paddle.vision.datasets.MNIST(mode='test', backend='cv2', transform=transform) train_reader = paddle.io.DataLoader(train_dataset, drop_last=True, places=place, batch_size=64, return_list=True) test_reader = paddle.io.DataLoader(val_dataset, places=place, batch_size=64, return_list=True) def train(model): adam = paddle.optimizer.Adam(learning_rate=0.0001, parameters=model.parameters()) epoch_num = 1 for epoch in range(epoch_num): model.train() for batch_id, data in enumerate(train_reader): img = paddle.to_tensor(data[0]) label = paddle.to_tensor(data[1]) img = paddle.reshape(img, [-1, 1, 28, 28]) label = paddle.reshape(label, [-1, 1]) out = model(img) acc = paddle.metric.accuracy(out, label) loss = paddle.nn.functional.loss.cross_entropy(out, label) avg_loss = paddle.mean(loss) avg_loss.backward() adam.minimize(avg_loss) model.clear_gradients() if batch_id % 100 == 0: _logger.info( "Train | At epoch {} step {}: loss = {:}, acc= {:}" .format(epoch, batch_id, avg_loss.numpy(), acc.numpy())) def test(model): model.eval() avg_acc = [[], []] for batch_id, data in enumerate(test_reader): img = paddle.to_tensor(data[0]) img = paddle.reshape(img, [-1, 1, 28, 28]) label = paddle.to_tensor(data[1]) label = paddle.reshape(label, [-1, 1]) out = model(img) acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1) acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5) avg_acc[0].append(acc_top1.numpy()) avg_acc[1].append(acc_top5.numpy()) if batch_id % 100 == 0: _logger.info( "Test | step {}: acc1 = {:}, acc5 = {:}".format( batch_id, acc_top1.numpy(), acc_top5.numpy())) _logger.info("Test | Average: acc_top1 {}, acc_top5 {}".format( np.mean(avg_acc[0]), np.mean(avg_acc[1]))) return np.mean(avg_acc[0]), np.mean(avg_acc[1]) train(fp32_lenet) top1_1, top5_1 = test(fp32_lenet) fp32_lenet.__init__() quant_lenet = self.quanter.quantize(fp32_lenet) train(quant_lenet) top1_2, top5_2 = test(quant_lenet) self.quanter.save_quantized_model(quant_lenet, './tmp/qat', input_spec=[ paddle.static.InputSpec( shape=[None, 1, 28, 28], dtype='float32') ]) # values before quantization and after quantization should be close _logger.info("Before quantization: top1: {}, top5: {}".format( top1_1, top5_1)) _logger.info("After quantization: top1: {}, top5: {}".format( top1_2, top5_2)) _logger.info("\n") diff = 0.002 self.assertTrue( top1_1 - top1_2 < diff, msg="The acc of quant model is too lower than fp32 model")
#!/usr/bin/env python # -*- coding: utf-8 -*- # author:gentelyang time:2021-06-10 import os import paddle import paddle.distributed as dist from paddle.io import DataLoader from paddle.vision import transforms normalize = transforms.Normalize(mean=[123.675, 116.28, 103.53], std=[58.395, 57.120, 57.375]) transform = transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.Transpose(), normalize ]) def train(): # 设置支持多卡训练 dist.init_parallel_env() train_dataset = paddle.vision.datasets.Cifar10(mode='train', transform=transform) batch_sampler = paddle.io.DistributedBatchSampler(train_dataset, batch_size=32, shuffle=True) train_loader = DataLoader(dataset=train_dataset, batch_sampler=batch_sampler) model = paddle.vision.mobilenet_v2(num_classes=10) # 设置支持多卡训练 model = paddle.DataParallel(model) # 设置优化方法
def compress(args): paddle.set_device('gpu' if args.use_gpu else 'cpu') train_reader = None test_reader = None if args.data == "cifar10": transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])]) train_dataset = paddle.vision.datasets.Cifar10(mode="train", backend="cv2", transform=transform) val_dataset = paddle.vision.datasets.Cifar10(mode="test", backend="cv2", transform=transform) class_dim = 10 image_shape = [3, 32, 32] pretrain = False elif args.data == "imagenet": train_dataset = ImageNetDataset("data/ILSVRC2012", mode='train', image_size=224, resize_short_size=256) val_dataset = ImageNetDataset("data/ILSVRC2012", mode='val', image_size=224, resize_short_size=256) class_dim = 1000 image_shape = [3, 224, 224] pretrain = True else: raise ValueError("{} is not supported.".format(args.data)) assert args.model in model_list, "{} is not in lists: {}".format( args.model, model_list) inputs = [Input([None] + image_shape, 'float32', name='image')] labels = [Input([None, 1], 'int64', name='label')] # model definition net = models.__dict__[args.model](pretrained=pretrain, num_classes=class_dim) _logger.info("FLOPs before pruning: {}GFLOPs".format( flops(net, [1] + image_shape) / 1000)) net.eval() if args.criterion == 'fpgm': pruner = paddleslim.dygraph.FPGMFilterPruner(net, [1] + image_shape) elif args.criterion == 'l1_norm': pruner = paddleslim.dygraph.L1NormFilterPruner(net, [1] + image_shape) params = get_pruned_params(args, net) ratios = {} for param in params: ratios[param] = args.pruned_ratio plan = pruner.prune_vars(ratios, [0]) _logger.info("FLOPs after pruning: {}GFLOPs; pruned ratio: {}".format( flops(net, [1] + image_shape) / 1000, plan.pruned_flops)) for param in net.parameters(): if "conv2d" in param.name: print("{}\t{}".format(param.name, param.shape)) net.train() model = paddle.Model(net, inputs, labels) steps_per_epoch = int(np.ceil(len(train_dataset) * 1. / args.batch_size)) opt = create_optimizer(args, net.parameters(), steps_per_epoch) model.prepare(opt, paddle.nn.CrossEntropyLoss(), paddle.metric.Accuracy(topk=(1, 5))) if args.checkpoint is not None: model.load(args.checkpoint) model.fit(train_data=train_dataset, eval_data=val_dataset, epochs=args.num_epochs, batch_size=args.batch_size // ParallelEnv().nranks, verbose=1, save_dir=args.model_path, num_workers=8)
import paddle import paddle.nn.functional as F import numpy as np import random import matplotlib.pyplot as plt from PIL import Image from collections import defaultdict print(paddle.__version__) import paddle.vision.transforms as T transform = T.Compose([T.Transpose((2, 0, 1))]) cifar10_train = paddle.vision.datasets.Cifar10(mode='train', transform=transform) x_train = np.zeros((50000, 3, 32, 32)) y_train = np.zeros((50000, 1), dtype='int32') for i in range(len(cifar10_train)): train_image, train_label = cifar10_train[i] # normalize the data x_train[i, :, :, :] = train_image / 255. y_train[i, 0] = train_label y_train = np.squeeze(y_train) print(x_train.shape) print(y_train.shape)
import numpy as np import warnings warnings.filterwarnings("ignore", category=Warning) paddle.set_device('gpu') place = paddle.CUDAPlace(0) model = WideResNet(28, 10, 20, 0.3) mean, std = ([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) mean = list(map(lambda x: x * 255, mean)) std = list(map(lambda x: x * 255, std)) val_loader = paddle.io.DataLoader(paddle.vision.datasets.Cifar10( mode='test', transform=transforms.Compose([ transforms.Transpose(order=(2, 0, 1)), transforms.Normalize(mean=mean, std=std), ])), places=place, batch_size=256, shuffle=False, num_workers=4, use_shared_memory=True) checkpoint = paddle.load('/home/aistudio/checkpoint.pdparams') model.set_state_dict(checkpoint) loss_fn = paddle.nn.CrossEntropyLoss() acc_fn = paddle.metric.accuracy accuracies = [] losses = [] model.eval()
def search_mobilenetv2(config, args, image_size, is_server=True): places = static.cuda_places() if args.use_gpu else static.cpu_places() place = places[0] if is_server: ### start a server and a client rl_nas = RLNAS(key='lstm', configs=config, is_sync=False, server_addr=(args.server_address, args.port), controller_batch_size=1, controller_decay_steps=1000, controller_decay_rate=0.8, lstm_num_layers=1, hidden_size=10, temperature=1.0) else: ### start a client rl_nas = RLNAS(key='lstm', configs=config, is_sync=False, server_addr=(args.server_address, args.port), lstm_num_layers=1, hidden_size=10, temperature=1.0, controller_batch_size=1, controller_decay_steps=1000, controller_decay_rate=0.8, is_server=False) image_shape = [3, image_size, image_size] if args.data == 'cifar10': transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])]) train_dataset = paddle.vision.datasets.Cifar10(mode='train', transform=transform, backend='cv2') val_dataset = paddle.vision.datasets.Cifar10(mode='test', transform=transform, backend='cv2') elif args.data == 'imagenet': train_dataset = imagenet_reader.ImageNetDataset(mode='train') val_dataset = imagenet_reader.ImageNetDataset(mode='val') for step in range(args.search_steps): archs = rl_nas.next_archs(1)[0][0] train_program = static.Program() test_program = static.Program() startup_program = static.Program() train_loader, avg_cost, acc_top1, acc_top5 = build_program( train_program, startup_program, image_shape, train_dataset, archs, args, places) test_loader, test_avg_cost, test_acc_top1, test_acc_top5 = build_program( test_program, startup_program, image_shape, val_dataset, archs, args, place, is_test=True) test_program = test_program.clone(for_test=True) exe = static.Executor(place) exe.run(startup_program) build_strategy = static.BuildStrategy() train_compiled_program = static.CompiledProgram( train_program).with_data_parallel(loss_name=avg_cost.name, build_strategy=build_strategy) for epoch_id in range(args.retain_epoch): for batch_id, data in enumerate(train_loader()): fetches = [avg_cost.name] s_time = time.time() outs = exe.run(train_compiled_program, feed=data, fetch_list=fetches)[0] batch_time = time.time() - s_time if batch_id % 10 == 0: _logger.info( 'TRAIN: steps: {}, epoch: {}, batch: {}, cost: {}, batch_time: {}ms' .format(step, epoch_id, batch_id, outs[0], batch_time)) reward = [] for batch_id, data in enumerate(test_loader()): test_fetches = [ test_avg_cost.name, test_acc_top1.name, test_acc_top5.name ] batch_reward = exe.run(test_program, feed=data, fetch_list=test_fetches) reward_avg = np.mean(np.array(batch_reward), axis=1) reward.append(reward_avg) _logger.info( 'TEST: step: {}, batch: {}, avg_cost: {}, acc_top1: {}, acc_top5: {}' .format(step, batch_id, batch_reward[0], batch_reward[1], batch_reward[2])) finally_reward = np.mean(np.array(reward), axis=0) _logger.info( 'FINAL TEST: avg_cost: {}, acc_top1: {}, acc_top5: {}'.format( finally_reward[0], finally_reward[1], finally_reward[2])) rl_nas.reward(np.float32(finally_reward[1]))
def test_qat_acc(self): lenet = ImperativeLenet() quant_config = { 'activation_preprocess_type': 'PACT', 'quantizable_layer_type': ['Conv2D', 'Linear'], } quanter = QAT(config=quant_config) quanter.quantize(lenet) place = paddle.CUDAPlace( 0) if paddle.is_compiled_with_cuda() else paddle.CPUPlace() transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])]) train_dataset = paddle.vision.datasets.MNIST(mode='train', backend='cv2', transform=transform) val_dataset = paddle.vision.datasets.MNIST(mode='test', backend='cv2', transform=transform) train_reader = paddle.io.DataLoader(train_dataset, drop_last=True, places=place, batch_size=64) test_reader = paddle.io.DataLoader(val_dataset, places=place, batch_size=64) def train(model): adam = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) epoch_num = 1 for epoch in range(epoch_num): model.train() for batch_id, data in enumerate(train_reader): img = paddle.to_tensor(data[0]) label = paddle.to_tensor(data[1]) img = paddle.reshape(img, [-1, 1, 28, 28]) label = paddle.reshape(label, [-1, 1]) out = model(img) acc = paddle.metric.accuracy(out, label) loss = paddle.nn.functional.loss.cross_entropy(out, label) avg_loss = paddle.mean(loss) avg_loss.backward() adam.minimize(avg_loss) model.clear_gradients() if batch_id % 100 == 0: _logger.info( "Train | At epoch {} step {}: loss = {:}, acc= {:}" .format(epoch, batch_id, avg_loss.numpy(), acc.numpy())) def test(model): model.eval() avg_acc = [[], []] for batch_id, data in enumerate(test_reader): img = paddle.to_tensor(data[0]) label = paddle.to_tensor(data[1]) img = paddle.reshape(img, [-1, 1, 28, 28]) label = paddle.reshape(label, [-1, 1]) out = model(img) acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1) acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5) avg_acc[0].append(acc_top1.numpy()) avg_acc[1].append(acc_top5.numpy()) if batch_id % 100 == 0: _logger.info( "Test | step {}: acc1 = {:}, acc5 = {:}".format( batch_id, acc_top1.numpy(), acc_top5.numpy())) _logger.info("Test |Average: acc_top1 {}, acc_top5 {}".format( np.mean(avg_acc[0]), np.mean(avg_acc[1]))) return np.mean(avg_acc[0]), np.mean(avg_acc[1]) train(lenet) top1_1, top5_1 = test(lenet) quanter.save_quantized_model(lenet, './dygraph_qat', input_spec=[ paddle.static.InputSpec( shape=[None, 1, 28, 28], dtype='float32') ]) lenet.__init__() train(lenet) top1_2, top5_2 = test(lenet) # values before quantization and after quantization should be close _logger.info("Before quantization: top1: {}, top5: {}".format( top1_2, top5_2)) _logger.info("After quantization: top1: {}, top5: {}".format( top1_1, top5_1)) # test for saving model in train mode lenet.train() quanter.save_quantized_model(lenet, './dygraph_qat', input_spec=[ paddle.static.InputSpec( shape=[None, 1, 28, 28], dtype='float32') ])
def run( self, image, need_align=False, start_lr=0.1, final_lr=0.025, latent_level=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], # for ffhq (0~17) step=100, mse_weight=1, pre_latent=None): if need_align: src_img = run_alignment(image) else: src_img = Image.open(image).convert("RGB") generator = self.generator generator.train() percept = LPIPS(net='vgg') # on PaddlePaddle, lpips's default eval mode means no gradients. percept.train() n_mean_latent = 4096 transform = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(256), transforms.Transpose(), transforms.Normalize([127.5, 127.5, 127.5], [127.5, 127.5, 127.5]), ]) imgs = paddle.to_tensor(transform(src_img)).unsqueeze(0) if pre_latent is None: with paddle.no_grad(): noise_sample = paddle.randn( (n_mean_latent, generator.style_dim)) latent_out = generator.style(noise_sample) latent_mean = latent_out.mean(0) latent_in = latent_mean.detach().clone().unsqueeze(0).tile( (imgs.shape[0], 1)) latent_in = latent_in.unsqueeze(1).tile( (1, generator.n_latent, 1)).detach() else: latent_in = paddle.to_tensor(np.load(pre_latent)).unsqueeze(0) var_levels = list(latent_level) const_levels = [ i for i in range(generator.n_latent) if i not in var_levels ] assert len(var_levels) > 0 if len(const_levels) > 0: latent_fix = latent_in.index_select(paddle.to_tensor(const_levels), 1).detach().clone() latent_in = latent_in.index_select(paddle.to_tensor(var_levels), 1).detach().clone() latent_in.stop_gradient = False optimizer = optim.Adam(parameters=[latent_in], learning_rate=start_lr) pbar = tqdm(range(step)) for i in pbar: t = i / step lr = get_lr(t, step, start_lr, final_lr) optimizer.set_lr(lr) if len(const_levels) > 0: latent_dict = {} for idx, idx2 in enumerate(var_levels): latent_dict[idx2] = latent_in[:, idx:idx + 1] for idx, idx2 in enumerate(const_levels): latent_dict[idx2] = (latent_fix[:, idx:idx + 1]).detach() latent_list = [] for idx in range(generator.n_latent): latent_list.append(latent_dict[idx]) latent_n = paddle.concat(latent_list, 1) else: latent_n = latent_in img_gen, _ = generator([latent_n], input_is_latent=True, randomize_noise=False) batch, channel, height, width = img_gen.shape if height > 256: factor = height // 256 img_gen = img_gen.reshape((batch, channel, height // factor, factor, width // factor, factor)) img_gen = img_gen.mean([3, 5]) p_loss = percept(img_gen, imgs).sum() mse_loss = F.mse_loss(img_gen, imgs) loss = p_loss + mse_weight * mse_loss optimizer.clear_grad() loss.backward() optimizer.step() pbar.set_description( (f"perceptual: {p_loss.numpy()[0]:.4f}; " f"mse: {mse_loss.numpy()[0]:.4f}; lr: {lr:.4f}")) img_gen, _ = generator([latent_n], input_is_latent=True, randomize_noise=False) dst_img = make_image(img_gen)[0] dst_latent = latent_n.numpy()[0] os.makedirs(self.output_path, exist_ok=True) save_src_path = os.path.join(self.output_path, 'src.fitting.png') cv2.imwrite(save_src_path, cv2.cvtColor(np.asarray(src_img), cv2.COLOR_RGB2BGR)) save_dst_path = os.path.join(self.output_path, 'dst.fitting.png') cv2.imwrite(save_dst_path, cv2.cvtColor(dst_img, cv2.COLOR_RGB2BGR)) save_npy_path = os.path.join(self.output_path, 'dst.fitting.npy') np.save(save_npy_path, dst_latent) return np.asarray(src_img), dst_img, dst_latent
def search_mobilenetv2(config, args, image_size, is_server=True): places = static.cuda_places() if args.use_gpu else static.cpu_places() place = places[0] if is_server: ### start a server and a client rl_nas = RLNAS( key='ddpg', configs=config, is_sync=False, obs_dim=26, ### step + length_of_token server_addr=(args.server_address, args.port)) else: ### start a client rl_nas = RLNAS(key='ddpg', configs=config, is_sync=False, obs_dim=26, server_addr=(args.server_address, args.port), is_server=False) image_shape = [3, image_size, image_size] if args.data == 'cifar10': transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])]) train_dataset = paddle.vision.datasets.Cifar10(mode='train', transform=transform, backend='cv2') val_dataset = paddle.vision.datasets.Cifar10(mode='test', transform=transform, backend='cv2') elif args.data == 'imagenet': train_dataset = imagenet_reader.ImageNetDataset(mode='train') val_dataset = imagenet_reader.ImageNetDataset(mode='val') for step in range(args.search_steps): if step == 0: action_prev = [1. for _ in rl_nas.range_tables] else: action_prev = rl_nas.tokens[0] obs = [step] obs.extend(action_prev) archs = rl_nas.next_archs(obs=obs)[0][0] train_program = static.Program() test_program = static.Program() startup_program = static.Program() train_loader, avg_cost, acc_top1, acc_top5 = build_program( train_program, startup_program, image_shape, train_dataset, archs, args, places) test_loader, test_avg_cost, test_acc_top1, test_acc_top5 = build_program( test_program, startup_program, image_shape, val_dataset, archs, args, place, is_test=True) test_program = test_program.clone(for_test=True) exe = static.Executor(place) exe.run(startup_program) build_strategy = static.BuildStrategy() train_compiled_program = static.CompiledProgram( train_program).with_data_parallel(loss_name=avg_cost.name, build_strategy=build_strategy) for epoch_id in range(args.retain_epoch): for batch_id, data in enumerate(train_loader()): fetches = [avg_cost.name] s_time = time.time() outs = exe.run(train_compiled_program, feed=data, fetch_list=fetches)[0] batch_time = time.time() - s_time if batch_id % 10 == 0: _logger.info( 'TRAIN: steps: {}, epoch: {}, batch: {}, cost: {}, batch_time: {}ms' .format(step, epoch_id, batch_id, outs[0], batch_time)) reward = [] for batch_id, data in enumerate(test_loader()): test_fetches = [ test_avg_cost.name, test_acc_top1.name, test_acc_top5.name ] batch_reward = exe.run(test_program, feed=data, fetch_list=test_fetches) reward_avg = np.mean(np.array(batch_reward), axis=1) reward.append(reward_avg) _logger.info( 'TEST: step: {}, batch: {}, avg_cost: {}, acc_top1: {}, acc_top5: {}' .format(step, batch_id, batch_reward[0], batch_reward[1], batch_reward[2])) finally_reward = np.mean(np.array(reward), axis=0) _logger.info( 'FINAL TEST: avg_cost: {}, acc_top1: {}, acc_top5: {}'.format( finally_reward[0], finally_reward[1], finally_reward[2])) obs = np.expand_dims(obs, axis=0).astype('float32') actions = rl_nas.tokens obs_next = [step + 1] obs_next.extend(actions[0]) obs_next = np.expand_dims(obs_next, axis=0).astype('float32') if step == args.search_steps - 1: terminal = np.expand_dims([True], axis=0).astype(np.bool) else: terminal = np.expand_dims([False], axis=0).astype(np.bool) rl_nas.reward(np.expand_dims(np.float32(finally_reward[1]), axis=0), obs=obs, actions=actions.astype('float32'), obs_next=obs_next, terminal=terminal) if step == 2: sys.exit(0)
def compress(args): if args.use_gpu: place = paddle.set_device('gpu') else: place = paddle.set_device('cpu') trainer_num = paddle.distributed.get_world_size() use_data_parallel = trainer_num != 1 if use_data_parallel: dist.init_parallel_env() train_reader = None test_reader = None if args.data == "imagenet": import imagenet_reader as reader train_dataset = reader.ImageNetDataset(mode='train') val_dataset = reader.ImageNetDataset(mode='val') class_dim = 1000 elif args.data == "cifar10": normalize = T.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5], data_format='CHW') transform = T.Compose([T.Transpose(), normalize]) train_dataset = paddle.vision.datasets.Cifar10(mode='train', backend='cv2', transform=transform) val_dataset = paddle.vision.datasets.Cifar10(mode='test', backend='cv2', transform=transform) class_dim = 10 else: raise ValueError("{} is not supported.".format(args.data)) batch_sampler = paddle.io.DistributedBatchSampler( train_dataset, batch_size=args.batch_size, shuffle=True, drop_last=True) train_loader = paddle.io.DataLoader(train_dataset, places=place, batch_sampler=batch_sampler, return_list=True, num_workers=args.num_workers, use_shared_memory=True) valid_loader = paddle.io.DataLoader( val_dataset, places=place, drop_last=False, return_list=True, batch_size=args.batch_size_for_validation, shuffle=False, use_shared_memory=True) step_per_epoch = int( np.ceil(len(train_dataset) / args.batch_size / ParallelEnv().nranks)) # model definition model = mobilenet_v1(num_classes=class_dim, pretrained=True) if ParallelEnv().nranks > 1: model = paddle.DataParallel(model) if args.pretrained_model is not None: model.set_state_dict(paddle.load(args.pretrained_model)) opt, learning_rate = create_optimizer(args, step_per_epoch, model) def test(epoch): model.eval() acc_top1_ns = [] acc_top5_ns = [] for batch_id, data in enumerate(valid_loader): start_time = time.time() x_data = data[0] y_data = paddle.to_tensor(data[1]) if args.data == 'cifar10': y_data = paddle.unsqueeze(y_data, 1) logits = model(x_data) loss = F.cross_entropy(logits, y_data) acc_top1 = paddle.metric.accuracy(logits, y_data, k=1) acc_top5 = paddle.metric.accuracy(logits, y_data, k=5) end_time = time.time() if batch_id % args.log_period == 0: _logger.info( "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, np.mean(acc_top1.numpy()), np.mean(acc_top5.numpy()), end_time - start_time)) acc_top1_ns.append(np.mean(acc_top1.numpy())) acc_top5_ns.append(np.mean(acc_top5.numpy())) _logger.info( "Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format( epoch, np.mean(np.array(acc_top1_ns, dtype="object")), np.mean(np.array(acc_top5_ns, dtype="object")))) def train(epoch): model.train() train_reader_cost = 0.0 train_run_cost = 0.0 total_samples = 0 reader_start = time.time() for batch_id, data in enumerate(train_loader): train_reader_cost += time.time() - reader_start x_data = data[0] y_data = paddle.to_tensor(data[1]) if args.data == 'cifar10': y_data = paddle.unsqueeze(y_data, 1) train_start = time.time() logits = model(x_data) loss = F.cross_entropy(logits, y_data) acc_top1 = paddle.metric.accuracy(logits, y_data, k=1) acc_top5 = paddle.metric.accuracy(logits, y_data, k=5) loss.backward() opt.step() learning_rate.step() opt.clear_grad() pruner.step() train_run_cost += time.time() - train_start total_samples += args.batch_size * ParallelEnv().nranks if batch_id % args.log_period == 0: _logger.info( "epoch[{}]-batch[{}] lr: {:.6f} - loss: {}; acc_top1: {}; acc_top5: {}; avg_reader_cost: {:.5f} sec, avg_batch_cost: {:.5f} sec, avg_samples: {:.5f}, ips: {:.5f} images/sec" .format( epoch, batch_id, opt.get_lr(), np.mean(loss.numpy()), np.mean(acc_top1.numpy()), np.mean(acc_top5.numpy()), train_reader_cost / args.log_period, (train_reader_cost + train_run_cost) / args.log_period, total_samples / args.log_period, total_samples / (train_reader_cost + train_run_cost))) train_reader_cost = 0.0 train_run_cost = 0.0 total_samples = 0 reader_start = time.time() pruner = UnstructuredPruner(model, mode=args.pruning_mode, ratio=args.ratio, threshold=args.threshold) for i in range(args.resume_epoch + 1, args.num_epochs): train(i) if (i + 1) % args.test_period == 0: pruner.update_params() _logger.info( "The current density of the pruned model is: {}%".format( round(100 * UnstructuredPruner.total_sparse(model), 2))) test(i) if (i + 1) % args.model_period == 0: pruner.update_params() paddle.save(model.state_dict(), os.path.join(args.model_path, "model-pruned.pdparams")) paddle.save(opt.state_dict(), os.path.join(args.model_path, "opt-pruned.pdopt"))
def compress(args): train_reader = None test_reader = None if args.data == "mnist": transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])]) train_dataset = paddle.vision.datasets.MNIST(mode='train', backend="cv2", transform=transform) val_dataset = paddle.vision.datasets.MNIST(mode='test', backend="cv2", transform=transform) class_dim = 10 image_shape = "1,28,28" elif args.data == "imagenet": import imagenet_reader as reader train_dataset = reader.ImageNetDataset(mode='train') val_dataset = reader.ImageNetDataset(mode='val') class_dim = 1000 image_shape = "3,224,224" else: raise ValueError("{} is not supported.".format(args.data)) image_shape = [int(m) for m in image_shape.split(",")] assert args.model in model_list, "{} is not in lists: {}".format( args.model, model_list) places = paddle.static.cuda_places( ) if args.use_gpu else paddle.static.cpu_places() place = places[0] exe = paddle.static.Executor(place) image = paddle.static.data(name='image', shape=[None] + image_shape, dtype='float32') label = paddle.static.data(name='label', shape=[None, 1], dtype='int64') batch_size_per_card = int(args.batch_size / len(places)) train_loader = paddle.io.DataLoader(train_dataset, places=places, feed_list=[image, label], drop_last=True, batch_size=batch_size_per_card, shuffle=True, return_list=False, use_shared_memory=True, num_workers=16) valid_loader = paddle.io.DataLoader(val_dataset, places=place, feed_list=[image, label], drop_last=False, return_list=False, use_shared_memory=True, batch_size=batch_size_per_card, shuffle=False) step_per_epoch = int(np.ceil(len(train_dataset) * 1. / args.batch_size)) # model definition model = models.__dict__[args.model]() out = model.net(input=image, class_dim=class_dim) cost = paddle.nn.functional.loss.cross_entropy(input=out, label=label) avg_cost = paddle.mean(x=cost) acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1) acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5) val_program = paddle.static.default_main_program().clone(for_test=True) opt, learning_rate = create_optimizer(args, step_per_epoch) opt.minimize(avg_cost) exe.run(paddle.static.default_startup_program()) if args.pretrained_model: def if_exist(var): return os.path.exists(os.path.join(args.pretrained_model, var.name)) _logger.info("Load pretrained model from {}".format( args.pretrained_model)) paddle.static.load(paddle.static.default_main_program(), args.pretrained_model, exe) def test(epoch, program): acc_top1_ns = [] acc_top5_ns = [] for batch_id, data in enumerate(valid_loader): start_time = time.time() acc_top1_n, acc_top5_n = exe.run( program, feed=data, fetch_list=[acc_top1.name, acc_top5.name]) end_time = time.time() if batch_id % args.log_period == 0: _logger.info( "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, np.mean(acc_top1_n), np.mean(acc_top5_n), end_time - start_time)) acc_top1_ns.append(np.mean(acc_top1_n)) acc_top5_ns.append(np.mean(acc_top5_n)) _logger.info( "Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format( epoch, np.mean(np.array(acc_top1_ns)), np.mean(np.array(acc_top5_ns)))) def train(epoch, program): for batch_id, data in enumerate(train_loader): start_time = time.time() loss_n, acc_top1_n, acc_top5_n = exe.run( train_program, feed=data, fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name]) end_time = time.time() loss_n = np.mean(loss_n) acc_top1_n = np.mean(acc_top1_n) acc_top5_n = np.mean(acc_top5_n) if batch_id % args.log_period == 0: _logger.info( "epoch[{}]-batch[{}] lr: {:.6f} - loss: {}; acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, learning_rate.get_lr(), loss_n, acc_top1_n, acc_top5_n, end_time - start_time)) learning_rate.step() batch_id += 1 test(0, val_program) params = get_pruned_params(args, paddle.static.default_main_program()) _logger.info("FLOPs before pruning: {}".format( flops(paddle.static.default_main_program()))) pruner = Pruner(args.criterion) pruned_val_program, _, _ = pruner.prune(val_program, paddle.static.global_scope(), params=params, ratios=[args.pruned_ratio] * len(params), place=place, only_graph=True) pruned_program, _, _ = pruner.prune(paddle.static.default_main_program(), paddle.static.global_scope(), params=params, ratios=[args.pruned_ratio] * len(params), place=place) _logger.info("FLOPs after pruning: {}".format(flops(pruned_program))) build_strategy = paddle.static.BuildStrategy() exec_strategy = paddle.static.ExecutionStrategy() train_program = paddle.static.CompiledProgram( pruned_program).with_data_parallel(loss_name=avg_cost.name, build_strategy=build_strategy, exec_strategy=exec_strategy) for i in range(args.num_epochs): train(i, train_program) if (i + 1) % args.test_period == 0: test(i, pruned_val_program) save_model(exe, pruned_val_program, os.path.join(args.model_path, str(i))) if args.save_inference: infer_model_path = os.path.join(args.model_path, "infer_models", str(i)) paddle.static.save_inference_model(infer_model_path, [image], [out], exe, program=pruned_val_program) _logger.info( "Saved inference model into [{}]".format(infer_model_path))
def compress(args): test_reader = None if args.data == "imagenet": import imagenet_reader as reader val_dataset = reader.ImageNetDataset(mode='val') class_dim = 1000 elif args.data == "cifar10": normalize = T.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5], data_format='CHW') transform = T.Compose([T.Transpose(), normalize]) val_dataset = paddle.vision.datasets.Cifar10(mode='test', backend='cv2', transform=transform) class_dim = 10 else: raise ValueError("{} is not supported.".format(args.data)) places = paddle.static.cuda_places( ) if args.use_gpu else paddle.static.cpu_places() valid_loader = paddle.io.DataLoader(val_dataset, places=places, drop_last=False, return_list=True, batch_size=args.batch_size, shuffle=False, use_shared_memory=True) # model definition model = mobilenet_v1(num_classes=class_dim, pretrained=True) def test(epoch): model.eval() acc_top1_ns = [] acc_top5_ns = [] for batch_id, data in enumerate(valid_loader): start_time = time.time() x_data = data[0] y_data = paddle.to_tensor(data[1]) if args.data == 'cifar10': y_data = paddle.unsqueeze(y_data, 1) logits = model(x_data) loss = F.cross_entropy(logits, y_data) acc_top1 = paddle.metric.accuracy(logits, y_data, k=1) acc_top5 = paddle.metric.accuracy(logits, y_data, k=5) end_time = time.time() if batch_id % args.log_period == 0: _logger.info( "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, np.mean(acc_top1.numpy()), np.mean(acc_top5.numpy()), end_time - start_time)) acc_top1_ns.append(np.mean(acc_top1.numpy())) acc_top5_ns.append(np.mean(acc_top5.numpy())) _logger.info( "Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format( epoch, np.mean(np.array(acc_top1_ns, dtype="object")), np.mean(np.array(acc_top5_ns, dtype="object")))) model.set_state_dict(paddle.load(args.pruned_model)) _logger.info("The current sparsity of the pruned model is: {}%".format( round(100 * UnstructuredPruner.total_sparse(model), 2))) test(0)