def test_visualdl_callback(self):
        # visualdl not support python2
        if sys.version_info < (3, ):
            return

        inputs = [InputSpec([-1, 1, 28, 28], 'float32', 'image')]
        labels = [InputSpec([None, 1], 'int64', 'label')]

        transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])])
        train_dataset = MnistDataset(mode='train', transform=transform)
        eval_dataset = MnistDataset(mode='test', transform=transform)

        net = paddle.vision.models.LeNet()
        model = paddle.Model(net, inputs, labels)

        optim = paddle.optimizer.Adam(0.001, parameters=net.parameters())
        model.prepare(optimizer=optim,
                      loss=paddle.nn.CrossEntropyLoss(),
                      metrics=paddle.metric.Accuracy())

        callback = paddle.callbacks.VisualDL(log_dir='visualdl_log_dir')
        model.fit(train_dataset,
                  eval_dataset,
                  batch_size=64,
                  callbacks=callback)
예제 #2
0
    def func_test_main(self):
        transform = T.Transpose()
        mnist = FashionMNIST(mode='train', transform=transform)
        self.assertTrue(len(mnist) == 60000)

        i = np.random.randint(0, len(mnist) - 1)
        image, label = mnist[i]
        self.assertTrue(image.shape[0] == 1)
        self.assertTrue(image.shape[1] == 28)
        self.assertTrue(image.shape[2] == 28)
        self.assertTrue(label.shape[0] == 1)
        self.assertTrue(0 <= int(label) <= 9)

        # test cv2 backend
        mnist = FashionMNIST(mode='train', transform=transform, backend='cv2')
        self.assertTrue(len(mnist) == 60000)

        for i in range(len(mnist)):
            image, label = mnist[i]
            self.assertTrue(image.shape[0] == 1)
            self.assertTrue(image.shape[1] == 28)
            self.assertTrue(image.shape[2] == 28)
            self.assertTrue(label.shape[0] == 1)
            self.assertTrue(0 <= int(label) <= 9)
            break

        with self.assertRaises(ValueError):
            mnist = FashionMNIST(mode='train', transform=transform, backend=1)
def prepare_input():
    transforms = [
        T.Resize(size=(target_height, target_width)),
        T.Normalize(mean=(0, 0, 0),
                    std=(1, 1, 1),
                    data_format='HWC',
                    to_rgb=True),
        T.Transpose()
    ]

    img_file = root_path / "street.jpeg"
    img = cv2.imread(str(img_file))
    normalized_img = T.Compose(transforms)(img)
    normalized_img = normalized_img.astype(np.float32, copy=False) / 255.0

    # add an new axis in front
    img_input = normalized_img[np.newaxis, :]
    # scale_factor is calculated as: im_shape / original_im_shape
    h_scale = target_height / img.shape[0]
    w_scale = target_width / img.shape[1]
    input = {
        "image": img_input,
        "im_shape": [target_height, target_width],
        "scale_factor": [h_scale, w_scale]
    }
    return input, img
예제 #4
0
 def run_model(self, model):
     transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])])
     train_dataset = MNIST(mode='train', transform=transform)
     model.fit(train_dataset,
               epochs=1,
               batch_size=64,
               num_iters=2,
               log_freq=1)
예제 #5
0
    def test_ptq(self):
        seed = 1
        np.random.seed(seed)
        paddle.static.default_main_program().random_seed = seed
        paddle.static.default_startup_program().random_seed = seed

        _logger.info("create the fp32 model")
        fp32_lenet = ImperativeLenet()

        _logger.info("prepare data")
        batch_size = 64
        transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])])
        train_dataset = paddle.vision.datasets.MNIST(
            mode='train', backend='cv2', transform=transform)
        val_dataset = paddle.vision.datasets.MNIST(
            mode='test', backend='cv2', transform=transform)

        place = paddle.CUDAPlace(0) \
            if paddle.is_compiled_with_cuda() else paddle.CPUPlace()
        train_reader = paddle.io.DataLoader(
            train_dataset,
            drop_last=True,
            places=place,
            batch_size=batch_size,
            return_list=True)
        test_reader = paddle.io.DataLoader(
            val_dataset, places=place, batch_size=batch_size, return_list=True)

        _logger.info("train the fp32 model")
        self.model_train(fp32_lenet, train_reader)

        _logger.info("test fp32 model")
        fp32_top1, fp32_top5 = self.model_test(fp32_lenet, test_reader)

        _logger.info("quantize the fp32 model")
        quanter = PTQ()
        quant_lenet = quanter.quantize(fp32_lenet, fuse=True)

        _logger.info("calibrate")
        self.calibrate(quant_lenet, test_reader)

        _logger.info("save and test the quantized model")
        save_path = "./tmp/model"
        input_spec = paddle.static.InputSpec(
            shape=[None, 1, 28, 28], dtype='float32')
        quanter.save_quantized_model(
            quant_lenet, save_path, input_spec=[input_spec])
        quant_top1, quant_top5 = self.model_test(quant_lenet, test_reader)

        _logger.info("FP32 acc: top1: {}, top5: {}".format(fp32_top1,
                                                           fp32_top5))
        _logger.info("Int acc: top1: {}, top5: {}".format(quant_top1,
                                                          quant_top5))

        diff = 0.002
        self.assertTrue(
            fp32_top1 - quant_top1 < diff,
            msg="The acc of quant model is too lower than fp32 model")
예제 #6
0
 def __init__(self, noise_path, size, keys=None):
     self.noise_path = noise_path
     self.noise_imgs = sorted(glob.glob(noise_path + '*.png'))
     self.size = size
     self.keys = keys
     self.transform = T.Compose([
         T.RandomCrop(size),
         T.Transpose(),
         T.Normalize([0., 0., 0.], [255., 255., 255.])
     ])
예제 #7
0
def get_makeup_transform(cfg, pic="image"):
    if pic == "image":
        transform = T.Compose([
            T.Resize(size=cfg.trans_size),
            T.Transpose(),
        ])
    else:
        transform = T.Resize(size=cfg.trans_size,
                             interpolation=cv2.INTER_NEAREST)

    return transform
예제 #8
0
    def test_main(self):
        transform = T.Transpose()
        mnist = MNIST(mode='test', transform=transform)
        self.assertTrue(len(mnist) == 10000)

        for i in range(len(mnist)):
            image, label = mnist[i]
            self.assertTrue(image.shape[0] == 1)
            self.assertTrue(image.shape[1] == 28)
            self.assertTrue(image.shape[2] == 28)
            self.assertTrue(label.shape[0] == 1)
            self.assertTrue(0 <= int(label) <= 9)
예제 #9
0
    def func_test_main(self):
        transform = T.Transpose()
        mnist = FashionMNIST(mode='test', transform=transform)
        self.assertTrue(len(mnist) == 10000)

        i = np.random.randint(0, len(mnist) - 1)
        image, label = mnist[i]
        self.assertTrue(image.shape[0] == 1)
        self.assertTrue(image.shape[1] == 28)
        self.assertTrue(image.shape[2] == 28)
        self.assertTrue(label.shape[0] == 1)
        self.assertTrue(0 <= int(label) <= 9)
예제 #10
0
 def __init__(self, data_root, input_size, mean, std):
     super(NormalDataset, self).__init__()
     self.mean = mean
     self.std = std
     self.input_size = input_size
     self.data_root = data_root
     self.trans = transforms.Compose([
         # transforms.Resize([int(self.input_size[0]), int(self.input_size[1])]),  # smaller side resized
         transforms.Transpose(order=(2, 0, 1)),
         transforms.Normalize(mean=self.mean, std=self.std),
     ])
     self.image_data, self.image_label = self.data_prepare()
     self.num_classes = len(self.image_data)
     self.len = len(self.image_data)
예제 #11
0
    def __init__(self, methodName='runTest', param_names=[]):
        super(TestFilterPruner, self).__init__(methodName)
        self._param_names = param_names
        transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])])
        self.train_dataset = paddle.vision.datasets.MNIST(mode="train",
                                                          backend="cv2",
                                                          transform=transform)
        self.val_dataset = paddle.vision.datasets.MNIST(mode="test",
                                                        backend="cv2",
                                                        transform=transform)

        def _reader():
            for data in self.val_dataset:
                yield data

        self.val_reader = _reader
예제 #12
0
    def __init__(self, *args, **kwargs):
        super(TestStaticMasks, self).__init__(*args, **kwargs)
        paddle.disable_static()
        transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])])
        self.train_dataset = paddle.vision.datasets.MNIST(
            mode="train", backend="cv2", transform=transform)
        self.train_loader = paddle.io.DataLoader(
            self.train_dataset,
            places=paddle.set_device('cpu'),
            return_list=True)

        def _reader():
            for data in self.val_dataset:
                yield data

        self.val_reader = _reader
예제 #13
0
파일: eval.py 프로젝트: itminner/PaddleSlim
def eval(args):

    paddle.set_device('gpu' if args.use_gpu else 'cpu')
    test_reader = None
    if args.data == "cifar10":
        transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])])
        val_dataset = paddle.vision.datasets.Cifar10(mode="test",
                                                     backend="cv2",
                                                     transform=transform)
        class_dim = 10
        image_shape = [3, 224, 224]
        pretrain = False
    elif args.data == "imagenet":
        val_dataset = ImageNetDataset("data/ILSVRC2012",
                                      mode='val',
                                      image_size=224,
                                      resize_short_size=256)
        class_dim = 1000
        image_shape = [3, 224, 224]
        pretrain = True
    else:
        raise ValueError("{} is not supported.".format(args.data))
    assert args.model in model_list, "{} is not in lists: {}".format(
        args.model, model_list)
    inputs = [Input([None] + image_shape, 'float32', name='image')]
    labels = [Input([None, 1], 'int64', name='label')]

    # model definition
    net = models.__dict__[args.model](pretrained=pretrain,
                                      num_classes=class_dim)

    pruner = paddleslim.dygraph.L1NormFilterPruner(net, [1] + image_shape)
    params = get_pruned_params(args, net)
    ratios = {}
    for param in params:
        ratios[param] = args.pruned_ratio
    print("ratios: {}".format(ratios))
    pruner.prune_vars(ratios, [0])

    model = paddle.Model(net, inputs, labels)
    model.prepare(None, paddle.nn.CrossEntropyLoss(),
                  paddle.metric.Accuracy(topk=(1, 5)))
    model.load(args.checkpoint)
    model.evaluate(eval_data=val_dataset,
                   batch_size=args.batch_size,
                   verbose=1,
                   num_workers=8)
예제 #14
0
 def __init__(self,
              output_path='output_dir',
              weight_path=None,
              use_adjust_brightness=True):
     self.output_path = output_path
     self.input_size = (256, 256)
     self.use_adjust_brightness = use_adjust_brightness
     if weight_path is None:
         vox_cpk_weight_url = 'https://paddlegan.bj.bcebos.com/models/animeganv2_hayao.pdparams'
         weight_path = get_path_from_url(vox_cpk_weight_url)
     self.weight_path = weight_path
     self.generator = self.load_checkpoints()
     self.transform = T.Compose([
         ResizeToScale((256, 256), 32),
         T.Transpose(),
         T.Normalize([127.5, 127.5, 127.5], [127.5, 127.5, 127.5])
     ])
    def func_warn_or_error(self):
        with self.assertRaises(ValueError):
            paddle.callbacks.ReduceLROnPlateau(factor=2.0)
        # warning
        paddle.callbacks.ReduceLROnPlateau(mode='1', patience=3, verbose=1)

        transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])])
        train_dataset = CustomMnist(mode='train', transform=transform)
        val_dataset = CustomMnist(mode='test', transform=transform)
        net = LeNet()
        optim = paddle.optimizer.Adam(learning_rate=0.001,
                                      parameters=net.parameters())
        inputs = [InputSpec([None, 1, 28, 28], 'float32', 'x')]
        labels = [InputSpec([None, 1], 'int64', 'label')]
        model = Model(net, inputs=inputs, labels=labels)
        model.prepare(optim, loss=CrossEntropyLoss(), metrics=[Accuracy()])
        callbacks = paddle.callbacks.ReduceLROnPlateau(monitor='miou',
                                                       patience=3,
                                                       verbose=1)
        model.fit(train_dataset,
                  val_dataset,
                  batch_size=8,
                  log_freq=1,
                  save_freq=10,
                  epochs=1,
                  callbacks=[callbacks])

        optim = paddle.optimizer.Adam(
            learning_rate=paddle.optimizer.lr.PiecewiseDecay([0.001, 0.0001],
                                                             [5, 10]),
            parameters=net.parameters())

        model.prepare(optim, loss=CrossEntropyLoss(), metrics=[Accuracy()])
        callbacks = paddle.callbacks.ReduceLROnPlateau(monitor='acc',
                                                       mode='max',
                                                       patience=3,
                                                       verbose=1,
                                                       cooldown=1)
        model.fit(train_dataset,
                  val_dataset,
                  batch_size=8,
                  log_freq=1,
                  save_freq=10,
                  epochs=3,
                  callbacks=[callbacks])
예제 #16
0
    def test_save_load(self):
        paddle.disable_static()
        paddle.set_device('gpu')
        amp_level = {"level": "O1", "init_loss_scaling": 128}
        paddle.seed(2021)
        model = self.get_model(amp_level)
        transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])])
        train_dataset = MNIST(mode='train', transform=transform)
        model.fit(train_dataset,
                  epochs=1,
                  batch_size=64,
                  num_iters=2,
                  log_freq=1)
        model.save('./lenet_amp')

        with paddle.fluid.unique_name.guard():
            paddle.seed(2021)
            new_model = self.get_model(amp_level)
            train_dataset = MNIST(mode='train', transform=transform)
            new_model.fit(train_dataset,
                          epochs=1,
                          batch_size=64,
                          num_iters=1,
                          log_freq=1)
        # not equal before load
        self.assertNotEqual(new_model._scaler.state_dict()['incr_count'],
                            model._scaler.state_dict()['incr_count'])
        print((new_model._scaler.state_dict()['incr_count'],
               model._scaler.state_dict()['incr_count']))

        # equal after load
        new_model.load('./lenet_amp')
        self.assertEqual(new_model._scaler.state_dict()['incr_count'],
                         model._scaler.state_dict()['incr_count'])
        self.assertEqual(new_model._scaler.state_dict()['decr_count'],
                         model._scaler.state_dict()['decr_count'])
        self.assertTrue(
            np.array_equal(
                new_model._optimizer.state_dict()
                ['conv2d_1.w_0_moment1_0'].numpy(),
                model._optimizer.state_dict()
                ['conv2d_1.w_0_moment1_0'].numpy()))
 def func_reduce_lr_on_plateau(self):
     transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])])
     train_dataset = CustomMnist(mode='train', transform=transform)
     val_dataset = CustomMnist(mode='test', transform=transform)
     net = LeNet()
     optim = paddle.optimizer.Adam(learning_rate=0.001,
                                   parameters=net.parameters())
     inputs = [InputSpec([None, 1, 28, 28], 'float32', 'x')]
     labels = [InputSpec([None, 1], 'int64', 'label')]
     model = Model(net, inputs=inputs, labels=labels)
     model.prepare(optim, loss=CrossEntropyLoss(), metrics=[Accuracy()])
     callbacks = paddle.callbacks.ReduceLROnPlateau(patience=1,
                                                    verbose=1,
                                                    cooldown=1)
     model.fit(train_dataset,
               val_dataset,
               batch_size=8,
               log_freq=1,
               save_freq=10,
               epochs=10,
               callbacks=[callbacks])
예제 #18
0
def compress(args):
    shuffle = True
    if args.ce_test:
        # set seed
        seed = 111
        paddle.seed(seed)
        np.random.seed(seed)
        random.seed(seed)
        args.num_workers = 0
        shuffle = False

    if args.use_gpu:
        place = paddle.set_device('gpu')
    else:
        place = paddle.set_device('cpu')

    trainer_num = paddle.distributed.get_world_size()
    use_data_parallel = trainer_num != 1
    if use_data_parallel:
        dist.init_parallel_env()

    train_reader = None
    test_reader = None
    if args.data == "imagenet":
        import imagenet_reader as reader
        train_dataset = reader.ImageNetDataset(mode='train')
        val_dataset = reader.ImageNetDataset(mode='val')
        class_dim = 1000
    elif args.data == "cifar10":
        normalize = T.Normalize(mean=[0.5, 0.5, 0.5],
                                std=[0.5, 0.5, 0.5],
                                data_format='CHW')
        transform = T.Compose([T.Transpose(), normalize])
        train_dataset = paddle.vision.datasets.Cifar10(mode='train',
                                                       backend='cv2',
                                                       transform=transform)
        val_dataset = paddle.vision.datasets.Cifar10(mode='test',
                                                     backend='cv2',
                                                     transform=transform)
        class_dim = 10
    else:
        raise ValueError("{} is not supported.".format(args.data))

    batch_sampler = paddle.io.DistributedBatchSampler(
        train_dataset,
        batch_size=args.batch_size,
        shuffle=shuffle,
        drop_last=True)

    train_loader = paddle.io.DataLoader(train_dataset,
                                        places=place,
                                        batch_sampler=batch_sampler,
                                        return_list=True,
                                        num_workers=args.num_workers,
                                        use_shared_memory=True)

    valid_loader = paddle.io.DataLoader(
        val_dataset,
        places=place,
        drop_last=False,
        return_list=True,
        batch_size=args.batch_size_for_validation,
        shuffle=False,
        use_shared_memory=True)
    step_per_epoch = int(
        np.ceil(len(train_dataset) / args.batch_size / ParallelEnv().nranks))
    # model definition
    model = mobilenet_v1(num_classes=class_dim, pretrained=True)
    if ParallelEnv().nranks > 1:
        model = paddle.DataParallel(model)

    opt, learning_rate = create_optimizer(args, step_per_epoch, model)

    if args.checkpoint is not None and args.last_epoch > -1:
        if args.checkpoint.endswith('pdparams'):
            args.checkpoint = args.checkpoint[:-9]
        if args.checkpoint.endswith('pdopt'):
            args.checkpoint = args.checkpoint[:-6]
        model.set_state_dict(paddle.load(args.checkpoint + ".pdparams"))
        opt.set_state_dict(paddle.load(args.checkpoint + ".pdopt"))
    elif args.pretrained_model is not None:
        if args.pretrained_model.endswith('pdparams'):
            args.pretrained_model = args.pretrained_model[:-9]
        if args.pretrained_model.endswith('pdopt'):
            args.pretrained_model = args.pretrained_model[:-6]
        model.set_state_dict(paddle.load(args.pretrained_model + ".pdparams"))

    if args.pruning_strategy == 'gmp':
        # GMP pruner step 0: define configs. No need to do this if you are not using 'gmp'
        configs = {
            'stable_iterations': args.stable_epochs * step_per_epoch,
            'pruning_iterations': args.pruning_epochs * step_per_epoch,
            'tunning_iterations': args.tunning_epochs * step_per_epoch,
            'resume_iteration': (args.last_epoch + 1) * step_per_epoch,
            'pruning_steps': args.pruning_steps,
            'initial_ratio': args.initial_ratio,
        }
    else:
        configs = None

    # GMP pruner step 1: initialize a pruner object
    pruner = create_unstructured_pruner(model, args, configs=configs)

    def test(epoch):
        model.eval()
        acc_top1_ns = []
        acc_top5_ns = []
        for batch_id, data in enumerate(valid_loader):
            start_time = time.time()
            x_data = data[0]
            y_data = paddle.to_tensor(data[1])
            if args.data == 'cifar10':
                y_data = paddle.unsqueeze(y_data, 1)

            logits = model(x_data)
            loss = F.cross_entropy(logits, y_data)
            acc_top1 = paddle.metric.accuracy(logits, y_data, k=1)
            acc_top5 = paddle.metric.accuracy(logits, y_data, k=5)
            end_time = time.time()
            if batch_id % args.log_period == 0:
                _logger.info(
                    "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}"
                    .format(epoch, batch_id, np.mean(acc_top1.numpy()),
                            np.mean(acc_top5.numpy()), end_time - start_time))
            acc_top1_ns.append(np.mean(acc_top1.numpy()))
            acc_top5_ns.append(np.mean(acc_top5.numpy()))

        _logger.info(
            "Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format(
                epoch, np.mean(np.array(acc_top1_ns, dtype="object")),
                np.mean(np.array(acc_top5_ns, dtype="object"))))

    def train(epoch):
        model.train()
        train_reader_cost = 0.0
        train_run_cost = 0.0
        total_samples = 0
        reader_start = time.time()

        for batch_id, data in enumerate(train_loader):
            train_reader_cost += time.time() - reader_start
            x_data = data[0]
            y_data = paddle.to_tensor(data[1])
            if args.data == 'cifar10':
                y_data = paddle.unsqueeze(y_data, 1)

            train_start = time.time()
            logits = model(x_data)
            loss = F.cross_entropy(logits, y_data)
            acc_top1 = paddle.metric.accuracy(logits, y_data, k=1)
            acc_top5 = paddle.metric.accuracy(logits, y_data, k=5)

            loss.backward()
            opt.step()
            learning_rate.step()
            opt.clear_grad()
            # GMP pruner step 2: step() to update ratios and other internal states of the pruner.
            pruner.step()

            train_run_cost += time.time() - train_start
            total_samples += args.batch_size

            if batch_id % args.log_period == 0:
                _logger.info(
                    "epoch[{}]-batch[{}] lr: {:.6f} - loss: {}; acc_top1: {}; acc_top5: {}; avg_reader_cost: {:.5f} sec, avg_batch_cost: {:.5f} sec, avg_samples: {:.5f}, ips: {:.5f} images/sec"
                    .format(
                        epoch, batch_id, opt.get_lr(), np.mean(loss.numpy()),
                        np.mean(acc_top1.numpy()), np.mean(acc_top5.numpy()),
                        train_reader_cost / args.log_period,
                        (train_reader_cost + train_run_cost) / args.log_period,
                        total_samples / args.log_period,
                        total_samples / (train_reader_cost + train_run_cost)))
                train_reader_cost = 0.0
                train_run_cost = 0.0
                total_samples = 0
            reader_start = time.time()

    for i in range(args.last_epoch + 1, args.num_epochs):
        train(i)
        # GMP pruner step 3: update params before summrizing sparsity, saving model or evaluation.
        pruner.update_params()

        if (i + 1) % args.test_period == 0:
            _logger.info(
                "The current sparsity of the pruned model is: {}%".format(
                    round(100 * UnstructuredPruner.total_sparse(model), 2)))
            test(i)

        if (i + 1) % args.model_period == 0:
            pruner.update_params()
            paddle.save(model.state_dict(),
                        os.path.join(args.model_path, "model.pdparams"))
            paddle.save(opt.state_dict(),
                        os.path.join(args.model_path, "model.pdopt"))
예제 #19
0
    def test_qat_acc(self):
        self.prepare()
        self.set_seed()

        fp32_lenet = ImperativeLenet()

        place = paddle.CUDAPlace(0) \
            if paddle.is_compiled_with_cuda() else paddle.CPUPlace()

        transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])])

        train_dataset = paddle.vision.datasets.MNIST(mode='train',
                                                     backend='cv2',
                                                     transform=transform)
        val_dataset = paddle.vision.datasets.MNIST(mode='test',
                                                   backend='cv2',
                                                   transform=transform)

        train_reader = paddle.io.DataLoader(train_dataset,
                                            drop_last=True,
                                            places=place,
                                            batch_size=64,
                                            return_list=True)
        test_reader = paddle.io.DataLoader(val_dataset,
                                           places=place,
                                           batch_size=64,
                                           return_list=True)

        def train(model):
            adam = paddle.optimizer.Adam(learning_rate=0.0001,
                                         parameters=model.parameters())
            epoch_num = 1
            for epoch in range(epoch_num):
                model.train()
                for batch_id, data in enumerate(train_reader):
                    img = paddle.to_tensor(data[0])
                    label = paddle.to_tensor(data[1])
                    img = paddle.reshape(img, [-1, 1, 28, 28])
                    label = paddle.reshape(label, [-1, 1])

                    out = model(img)
                    acc = paddle.metric.accuracy(out, label)
                    loss = paddle.nn.functional.loss.cross_entropy(out, label)
                    avg_loss = paddle.mean(loss)
                    avg_loss.backward()
                    adam.minimize(avg_loss)
                    model.clear_gradients()
                    if batch_id % 100 == 0:
                        _logger.info(
                            "Train | At epoch {} step {}: loss = {:}, acc= {:}"
                            .format(epoch, batch_id, avg_loss.numpy(),
                                    acc.numpy()))

        def test(model):
            model.eval()
            avg_acc = [[], []]
            for batch_id, data in enumerate(test_reader):
                img = paddle.to_tensor(data[0])
                img = paddle.reshape(img, [-1, 1, 28, 28])
                label = paddle.to_tensor(data[1])
                label = paddle.reshape(label, [-1, 1])

                out = model(img)
                acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1)
                acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5)
                avg_acc[0].append(acc_top1.numpy())
                avg_acc[1].append(acc_top5.numpy())
                if batch_id % 100 == 0:
                    _logger.info(
                        "Test | step {}: acc1 = {:}, acc5 = {:}".format(
                            batch_id, acc_top1.numpy(), acc_top5.numpy()))

            _logger.info("Test | Average: acc_top1 {}, acc_top5 {}".format(
                np.mean(avg_acc[0]), np.mean(avg_acc[1])))
            return np.mean(avg_acc[0]), np.mean(avg_acc[1])

        train(fp32_lenet)
        top1_1, top5_1 = test(fp32_lenet)

        fp32_lenet.__init__()
        quant_lenet = self.quanter.quantize(fp32_lenet)
        train(quant_lenet)
        top1_2, top5_2 = test(quant_lenet)
        self.quanter.save_quantized_model(quant_lenet,
                                          './tmp/qat',
                                          input_spec=[
                                              paddle.static.InputSpec(
                                                  shape=[None, 1, 28, 28],
                                                  dtype='float32')
                                          ])

        # values before quantization and after quantization should be close
        _logger.info("Before quantization: top1: {}, top5: {}".format(
            top1_1, top5_1))
        _logger.info("After quantization: top1: {}, top5: {}".format(
            top1_2, top5_2))
        _logger.info("\n")

        diff = 0.002
        self.assertTrue(
            top1_1 - top1_2 < diff,
            msg="The acc of quant model is too lower than fp32 model")
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# author:gentelyang  time:2021-06-10
import os
import paddle
import paddle.distributed as dist
from paddle.io import DataLoader
from paddle.vision import transforms
normalize = transforms.Normalize(mean=[123.675, 116.28, 103.53],
                                 std=[58.395, 57.120, 57.375])
transform = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.Transpose(), normalize
])


def train():
    # 设置支持多卡训练
    dist.init_parallel_env()
    train_dataset = paddle.vision.datasets.Cifar10(mode='train',
                                                   transform=transform)
    batch_sampler = paddle.io.DistributedBatchSampler(train_dataset,
                                                      batch_size=32,
                                                      shuffle=True)
    train_loader = DataLoader(dataset=train_dataset,
                              batch_sampler=batch_sampler)
    model = paddle.vision.mobilenet_v2(num_classes=10)
    # 设置支持多卡训练
    model = paddle.DataParallel(model)
    # 设置优化方法
예제 #21
0
def compress(args):

    paddle.set_device('gpu' if args.use_gpu else 'cpu')
    train_reader = None
    test_reader = None
    if args.data == "cifar10":

        transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])])

        train_dataset = paddle.vision.datasets.Cifar10(mode="train",
                                                       backend="cv2",
                                                       transform=transform)
        val_dataset = paddle.vision.datasets.Cifar10(mode="test",
                                                     backend="cv2",
                                                     transform=transform)
        class_dim = 10
        image_shape = [3, 32, 32]
        pretrain = False
    elif args.data == "imagenet":

        train_dataset = ImageNetDataset("data/ILSVRC2012",
                                        mode='train',
                                        image_size=224,
                                        resize_short_size=256)

        val_dataset = ImageNetDataset("data/ILSVRC2012",
                                      mode='val',
                                      image_size=224,
                                      resize_short_size=256)

        class_dim = 1000
        image_shape = [3, 224, 224]
        pretrain = True
    else:
        raise ValueError("{} is not supported.".format(args.data))
    assert args.model in model_list, "{} is not in lists: {}".format(
        args.model, model_list)
    inputs = [Input([None] + image_shape, 'float32', name='image')]
    labels = [Input([None, 1], 'int64', name='label')]

    # model definition
    net = models.__dict__[args.model](pretrained=pretrain,
                                      num_classes=class_dim)

    _logger.info("FLOPs before pruning: {}GFLOPs".format(
        flops(net, [1] + image_shape) / 1000))
    net.eval()
    if args.criterion == 'fpgm':
        pruner = paddleslim.dygraph.FPGMFilterPruner(net, [1] + image_shape)
    elif args.criterion == 'l1_norm':
        pruner = paddleslim.dygraph.L1NormFilterPruner(net, [1] + image_shape)

    params = get_pruned_params(args, net)
    ratios = {}
    for param in params:
        ratios[param] = args.pruned_ratio
    plan = pruner.prune_vars(ratios, [0])

    _logger.info("FLOPs after pruning: {}GFLOPs; pruned ratio: {}".format(
        flops(net, [1] + image_shape) / 1000, plan.pruned_flops))

    for param in net.parameters():
        if "conv2d" in param.name:
            print("{}\t{}".format(param.name, param.shape))

    net.train()
    model = paddle.Model(net, inputs, labels)
    steps_per_epoch = int(np.ceil(len(train_dataset) * 1. / args.batch_size))
    opt = create_optimizer(args, net.parameters(), steps_per_epoch)
    model.prepare(opt, paddle.nn.CrossEntropyLoss(),
                  paddle.metric.Accuracy(topk=(1, 5)))
    if args.checkpoint is not None:
        model.load(args.checkpoint)
    model.fit(train_data=train_dataset,
              eval_data=val_dataset,
              epochs=args.num_epochs,
              batch_size=args.batch_size // ParallelEnv().nranks,
              verbose=1,
              save_dir=args.model_path,
              num_workers=8)
예제 #22
0
import paddle
import paddle.nn.functional as F
import numpy as np
import random
import matplotlib.pyplot as plt
from PIL import Image
from collections import defaultdict

print(paddle.__version__)

import paddle.vision.transforms as T

transform = T.Compose([T.Transpose((2, 0, 1))])

cifar10_train = paddle.vision.datasets.Cifar10(mode='train',
                                               transform=transform)
x_train = np.zeros((50000, 3, 32, 32))
y_train = np.zeros((50000, 1), dtype='int32')

for i in range(len(cifar10_train)):
    train_image, train_label = cifar10_train[i]

    # normalize the data
    x_train[i, :, :, :] = train_image / 255.
    y_train[i, 0] = train_label

y_train = np.squeeze(y_train)

print(x_train.shape)
print(y_train.shape)
예제 #23
0
import numpy as np
import warnings

warnings.filterwarnings("ignore", category=Warning)

paddle.set_device('gpu')
place = paddle.CUDAPlace(0)
model = WideResNet(28, 10, 20, 0.3)

mean, std = ([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])
mean = list(map(lambda x: x * 255, mean))
std = list(map(lambda x: x * 255, std))
val_loader = paddle.io.DataLoader(paddle.vision.datasets.Cifar10(
    mode='test',
    transform=transforms.Compose([
        transforms.Transpose(order=(2, 0, 1)),
        transforms.Normalize(mean=mean, std=std),
    ])),
                                  places=place,
                                  batch_size=256,
                                  shuffle=False,
                                  num_workers=4,
                                  use_shared_memory=True)

checkpoint = paddle.load('/home/aistudio/checkpoint.pdparams')
model.set_state_dict(checkpoint)
loss_fn = paddle.nn.CrossEntropyLoss()
acc_fn = paddle.metric.accuracy
accuracies = []
losses = []
model.eval()
예제 #24
0
def search_mobilenetv2(config, args, image_size, is_server=True):
    places = static.cuda_places() if args.use_gpu else static.cpu_places()
    place = places[0]
    if is_server:
        ### start a server and a client
        rl_nas = RLNAS(key='lstm',
                       configs=config,
                       is_sync=False,
                       server_addr=(args.server_address, args.port),
                       controller_batch_size=1,
                       controller_decay_steps=1000,
                       controller_decay_rate=0.8,
                       lstm_num_layers=1,
                       hidden_size=10,
                       temperature=1.0)
    else:
        ### start a client
        rl_nas = RLNAS(key='lstm',
                       configs=config,
                       is_sync=False,
                       server_addr=(args.server_address, args.port),
                       lstm_num_layers=1,
                       hidden_size=10,
                       temperature=1.0,
                       controller_batch_size=1,
                       controller_decay_steps=1000,
                       controller_decay_rate=0.8,
                       is_server=False)

    image_shape = [3, image_size, image_size]
    if args.data == 'cifar10':
        transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])])
        train_dataset = paddle.vision.datasets.Cifar10(mode='train',
                                                       transform=transform,
                                                       backend='cv2')
        val_dataset = paddle.vision.datasets.Cifar10(mode='test',
                                                     transform=transform,
                                                     backend='cv2')

    elif args.data == 'imagenet':
        train_dataset = imagenet_reader.ImageNetDataset(mode='train')
        val_dataset = imagenet_reader.ImageNetDataset(mode='val')

    for step in range(args.search_steps):
        archs = rl_nas.next_archs(1)[0][0]

        train_program = static.Program()
        test_program = static.Program()
        startup_program = static.Program()
        train_loader, avg_cost, acc_top1, acc_top5 = build_program(
            train_program, startup_program, image_shape, train_dataset, archs,
            args, places)

        test_loader, test_avg_cost, test_acc_top1, test_acc_top5 = build_program(
            test_program,
            startup_program,
            image_shape,
            val_dataset,
            archs,
            args,
            place,
            is_test=True)
        test_program = test_program.clone(for_test=True)

        exe = static.Executor(place)
        exe.run(startup_program)

        build_strategy = static.BuildStrategy()
        train_compiled_program = static.CompiledProgram(
            train_program).with_data_parallel(loss_name=avg_cost.name,
                                              build_strategy=build_strategy)
        for epoch_id in range(args.retain_epoch):
            for batch_id, data in enumerate(train_loader()):
                fetches = [avg_cost.name]
                s_time = time.time()
                outs = exe.run(train_compiled_program,
                               feed=data,
                               fetch_list=fetches)[0]
                batch_time = time.time() - s_time
                if batch_id % 10 == 0:
                    _logger.info(
                        'TRAIN: steps: {}, epoch: {}, batch: {}, cost: {}, batch_time: {}ms'
                        .format(step, epoch_id, batch_id, outs[0], batch_time))

        reward = []
        for batch_id, data in enumerate(test_loader()):
            test_fetches = [
                test_avg_cost.name, test_acc_top1.name, test_acc_top5.name
            ]
            batch_reward = exe.run(test_program,
                                   feed=data,
                                   fetch_list=test_fetches)
            reward_avg = np.mean(np.array(batch_reward), axis=1)
            reward.append(reward_avg)

            _logger.info(
                'TEST: step: {}, batch: {}, avg_cost: {}, acc_top1: {}, acc_top5: {}'
                .format(step, batch_id, batch_reward[0], batch_reward[1],
                        batch_reward[2]))

        finally_reward = np.mean(np.array(reward), axis=0)
        _logger.info(
            'FINAL TEST: avg_cost: {}, acc_top1: {}, acc_top5: {}'.format(
                finally_reward[0], finally_reward[1], finally_reward[2]))

        rl_nas.reward(np.float32(finally_reward[1]))
예제 #25
0
    def test_qat_acc(self):
        lenet = ImperativeLenet()
        quant_config = {
            'activation_preprocess_type': 'PACT',
            'quantizable_layer_type': ['Conv2D', 'Linear'],
        }
        quanter = QAT(config=quant_config)
        quanter.quantize(lenet)

        place = paddle.CUDAPlace(
            0) if paddle.is_compiled_with_cuda() else paddle.CPUPlace()

        transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])])

        train_dataset = paddle.vision.datasets.MNIST(mode='train',
                                                     backend='cv2',
                                                     transform=transform)
        val_dataset = paddle.vision.datasets.MNIST(mode='test',
                                                   backend='cv2',
                                                   transform=transform)
        train_reader = paddle.io.DataLoader(train_dataset,
                                            drop_last=True,
                                            places=place,
                                            batch_size=64)
        test_reader = paddle.io.DataLoader(val_dataset,
                                           places=place,
                                           batch_size=64)

        def train(model):
            adam = paddle.optimizer.Adam(learning_rate=0.001,
                                         parameters=model.parameters())
            epoch_num = 1
            for epoch in range(epoch_num):
                model.train()
                for batch_id, data in enumerate(train_reader):
                    img = paddle.to_tensor(data[0])
                    label = paddle.to_tensor(data[1])
                    img = paddle.reshape(img, [-1, 1, 28, 28])
                    label = paddle.reshape(label, [-1, 1])

                    out = model(img)
                    acc = paddle.metric.accuracy(out, label)
                    loss = paddle.nn.functional.loss.cross_entropy(out, label)
                    avg_loss = paddle.mean(loss)
                    avg_loss.backward()
                    adam.minimize(avg_loss)
                    model.clear_gradients()
                    if batch_id % 100 == 0:
                        _logger.info(
                            "Train | At epoch {} step {}: loss = {:}, acc= {:}"
                            .format(epoch, batch_id, avg_loss.numpy(),
                                    acc.numpy()))

        def test(model):
            model.eval()
            avg_acc = [[], []]
            for batch_id, data in enumerate(test_reader):
                img = paddle.to_tensor(data[0])
                label = paddle.to_tensor(data[1])
                img = paddle.reshape(img, [-1, 1, 28, 28])
                label = paddle.reshape(label, [-1, 1])

                out = model(img)
                acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1)
                acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5)
                avg_acc[0].append(acc_top1.numpy())
                avg_acc[1].append(acc_top5.numpy())
                if batch_id % 100 == 0:
                    _logger.info(
                        "Test | step {}: acc1 = {:}, acc5 = {:}".format(
                            batch_id, acc_top1.numpy(), acc_top5.numpy()))

            _logger.info("Test |Average: acc_top1 {}, acc_top5 {}".format(
                np.mean(avg_acc[0]), np.mean(avg_acc[1])))
            return np.mean(avg_acc[0]), np.mean(avg_acc[1])

        train(lenet)
        top1_1, top5_1 = test(lenet)
        quanter.save_quantized_model(lenet,
                                     './dygraph_qat',
                                     input_spec=[
                                         paddle.static.InputSpec(
                                             shape=[None, 1, 28, 28],
                                             dtype='float32')
                                     ])

        lenet.__init__()
        train(lenet)
        top1_2, top5_2 = test(lenet)

        # values before quantization and after quantization should be close
        _logger.info("Before quantization: top1: {}, top5: {}".format(
            top1_2, top5_2))
        _logger.info("After quantization: top1: {}, top5: {}".format(
            top1_1, top5_1))

        # test for saving model in train mode
        lenet.train()
        quanter.save_quantized_model(lenet,
                                     './dygraph_qat',
                                     input_spec=[
                                         paddle.static.InputSpec(
                                             shape=[None, 1, 28, 28],
                                             dtype='float32')
                                     ])
예제 #26
0
    def run(
            self,
            image,
            need_align=False,
            start_lr=0.1,
            final_lr=0.025,
            latent_level=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
                          11],  # for ffhq (0~17)
            step=100,
            mse_weight=1,
            pre_latent=None):

        if need_align:
            src_img = run_alignment(image)
        else:
            src_img = Image.open(image).convert("RGB")

        generator = self.generator
        generator.train()

        percept = LPIPS(net='vgg')
        # on PaddlePaddle, lpips's default eval mode means no gradients.
        percept.train()

        n_mean_latent = 4096

        transform = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(256),
            transforms.Transpose(),
            transforms.Normalize([127.5, 127.5, 127.5], [127.5, 127.5, 127.5]),
        ])

        imgs = paddle.to_tensor(transform(src_img)).unsqueeze(0)

        if pre_latent is None:
            with paddle.no_grad():
                noise_sample = paddle.randn(
                    (n_mean_latent, generator.style_dim))
                latent_out = generator.style(noise_sample)

                latent_mean = latent_out.mean(0)

            latent_in = latent_mean.detach().clone().unsqueeze(0).tile(
                (imgs.shape[0], 1))
            latent_in = latent_in.unsqueeze(1).tile(
                (1, generator.n_latent, 1)).detach()

        else:
            latent_in = paddle.to_tensor(np.load(pre_latent)).unsqueeze(0)

        var_levels = list(latent_level)
        const_levels = [
            i for i in range(generator.n_latent) if i not in var_levels
        ]
        assert len(var_levels) > 0
        if len(const_levels) > 0:
            latent_fix = latent_in.index_select(paddle.to_tensor(const_levels),
                                                1).detach().clone()
            latent_in = latent_in.index_select(paddle.to_tensor(var_levels),
                                               1).detach().clone()

        latent_in.stop_gradient = False

        optimizer = optim.Adam(parameters=[latent_in], learning_rate=start_lr)

        pbar = tqdm(range(step))

        for i in pbar:
            t = i / step
            lr = get_lr(t, step, start_lr, final_lr)
            optimizer.set_lr(lr)

            if len(const_levels) > 0:
                latent_dict = {}
                for idx, idx2 in enumerate(var_levels):
                    latent_dict[idx2] = latent_in[:, idx:idx + 1]
                for idx, idx2 in enumerate(const_levels):
                    latent_dict[idx2] = (latent_fix[:, idx:idx + 1]).detach()
                latent_list = []
                for idx in range(generator.n_latent):
                    latent_list.append(latent_dict[idx])
                latent_n = paddle.concat(latent_list, 1)
            else:
                latent_n = latent_in

            img_gen, _ = generator([latent_n],
                                   input_is_latent=True,
                                   randomize_noise=False)

            batch, channel, height, width = img_gen.shape

            if height > 256:
                factor = height // 256

                img_gen = img_gen.reshape((batch, channel, height // factor,
                                           factor, width // factor, factor))
                img_gen = img_gen.mean([3, 5])

            p_loss = percept(img_gen, imgs).sum()
            mse_loss = F.mse_loss(img_gen, imgs)
            loss = p_loss + mse_weight * mse_loss

            optimizer.clear_grad()
            loss.backward()
            optimizer.step()

            pbar.set_description(
                (f"perceptual: {p_loss.numpy()[0]:.4f}; "
                 f"mse: {mse_loss.numpy()[0]:.4f}; lr: {lr:.4f}"))

        img_gen, _ = generator([latent_n],
                               input_is_latent=True,
                               randomize_noise=False)
        dst_img = make_image(img_gen)[0]
        dst_latent = latent_n.numpy()[0]

        os.makedirs(self.output_path, exist_ok=True)
        save_src_path = os.path.join(self.output_path, 'src.fitting.png')
        cv2.imwrite(save_src_path,
                    cv2.cvtColor(np.asarray(src_img), cv2.COLOR_RGB2BGR))
        save_dst_path = os.path.join(self.output_path, 'dst.fitting.png')
        cv2.imwrite(save_dst_path, cv2.cvtColor(dst_img, cv2.COLOR_RGB2BGR))
        save_npy_path = os.path.join(self.output_path, 'dst.fitting.npy')
        np.save(save_npy_path, dst_latent)

        return np.asarray(src_img), dst_img, dst_latent
예제 #27
0
def search_mobilenetv2(config, args, image_size, is_server=True):
    places = static.cuda_places() if args.use_gpu else static.cpu_places()
    place = places[0]
    if is_server:
        ### start a server and a client
        rl_nas = RLNAS(
            key='ddpg',
            configs=config,
            is_sync=False,
            obs_dim=26,  ### step + length_of_token
            server_addr=(args.server_address, args.port))
    else:
        ### start a client
        rl_nas = RLNAS(key='ddpg',
                       configs=config,
                       is_sync=False,
                       obs_dim=26,
                       server_addr=(args.server_address, args.port),
                       is_server=False)

    image_shape = [3, image_size, image_size]
    if args.data == 'cifar10':
        transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])])
        train_dataset = paddle.vision.datasets.Cifar10(mode='train',
                                                       transform=transform,
                                                       backend='cv2')
        val_dataset = paddle.vision.datasets.Cifar10(mode='test',
                                                     transform=transform,
                                                     backend='cv2')

    elif args.data == 'imagenet':
        train_dataset = imagenet_reader.ImageNetDataset(mode='train')
        val_dataset = imagenet_reader.ImageNetDataset(mode='val')

    for step in range(args.search_steps):
        if step == 0:
            action_prev = [1. for _ in rl_nas.range_tables]
        else:
            action_prev = rl_nas.tokens[0]
        obs = [step]
        obs.extend(action_prev)
        archs = rl_nas.next_archs(obs=obs)[0][0]

        train_program = static.Program()
        test_program = static.Program()
        startup_program = static.Program()
        train_loader, avg_cost, acc_top1, acc_top5 = build_program(
            train_program, startup_program, image_shape, train_dataset, archs,
            args, places)

        test_loader, test_avg_cost, test_acc_top1, test_acc_top5 = build_program(
            test_program,
            startup_program,
            image_shape,
            val_dataset,
            archs,
            args,
            place,
            is_test=True)
        test_program = test_program.clone(for_test=True)

        exe = static.Executor(place)
        exe.run(startup_program)

        build_strategy = static.BuildStrategy()
        train_compiled_program = static.CompiledProgram(
            train_program).with_data_parallel(loss_name=avg_cost.name,
                                              build_strategy=build_strategy)
        for epoch_id in range(args.retain_epoch):
            for batch_id, data in enumerate(train_loader()):
                fetches = [avg_cost.name]
                s_time = time.time()
                outs = exe.run(train_compiled_program,
                               feed=data,
                               fetch_list=fetches)[0]
                batch_time = time.time() - s_time
                if batch_id % 10 == 0:
                    _logger.info(
                        'TRAIN: steps: {}, epoch: {}, batch: {}, cost: {}, batch_time: {}ms'
                        .format(step, epoch_id, batch_id, outs[0], batch_time))

        reward = []
        for batch_id, data in enumerate(test_loader()):
            test_fetches = [
                test_avg_cost.name, test_acc_top1.name, test_acc_top5.name
            ]
            batch_reward = exe.run(test_program,
                                   feed=data,
                                   fetch_list=test_fetches)
            reward_avg = np.mean(np.array(batch_reward), axis=1)
            reward.append(reward_avg)

            _logger.info(
                'TEST: step: {}, batch: {}, avg_cost: {}, acc_top1: {}, acc_top5: {}'
                .format(step, batch_id, batch_reward[0], batch_reward[1],
                        batch_reward[2]))

        finally_reward = np.mean(np.array(reward), axis=0)
        _logger.info(
            'FINAL TEST: avg_cost: {}, acc_top1: {}, acc_top5: {}'.format(
                finally_reward[0], finally_reward[1], finally_reward[2]))

        obs = np.expand_dims(obs, axis=0).astype('float32')
        actions = rl_nas.tokens
        obs_next = [step + 1]
        obs_next.extend(actions[0])
        obs_next = np.expand_dims(obs_next, axis=0).astype('float32')

        if step == args.search_steps - 1:
            terminal = np.expand_dims([True], axis=0).astype(np.bool)
        else:
            terminal = np.expand_dims([False], axis=0).astype(np.bool)
        rl_nas.reward(np.expand_dims(np.float32(finally_reward[1]), axis=0),
                      obs=obs,
                      actions=actions.astype('float32'),
                      obs_next=obs_next,
                      terminal=terminal)

        if step == 2:
            sys.exit(0)
예제 #28
0
def compress(args):
    if args.use_gpu:
        place = paddle.set_device('gpu')
    else:
        place = paddle.set_device('cpu')

    trainer_num = paddle.distributed.get_world_size()
    use_data_parallel = trainer_num != 1
    if use_data_parallel:
        dist.init_parallel_env()

    train_reader = None
    test_reader = None
    if args.data == "imagenet":
        import imagenet_reader as reader
        train_dataset = reader.ImageNetDataset(mode='train')
        val_dataset = reader.ImageNetDataset(mode='val')
        class_dim = 1000
    elif args.data == "cifar10":
        normalize = T.Normalize(mean=[0.5, 0.5, 0.5],
                                std=[0.5, 0.5, 0.5],
                                data_format='CHW')
        transform = T.Compose([T.Transpose(), normalize])
        train_dataset = paddle.vision.datasets.Cifar10(mode='train',
                                                       backend='cv2',
                                                       transform=transform)
        val_dataset = paddle.vision.datasets.Cifar10(mode='test',
                                                     backend='cv2',
                                                     transform=transform)
        class_dim = 10
    else:
        raise ValueError("{} is not supported.".format(args.data))

    batch_sampler = paddle.io.DistributedBatchSampler(
        train_dataset,
        batch_size=args.batch_size,
        shuffle=True,
        drop_last=True)

    train_loader = paddle.io.DataLoader(train_dataset,
                                        places=place,
                                        batch_sampler=batch_sampler,
                                        return_list=True,
                                        num_workers=args.num_workers,
                                        use_shared_memory=True)

    valid_loader = paddle.io.DataLoader(
        val_dataset,
        places=place,
        drop_last=False,
        return_list=True,
        batch_size=args.batch_size_for_validation,
        shuffle=False,
        use_shared_memory=True)
    step_per_epoch = int(
        np.ceil(len(train_dataset) / args.batch_size / ParallelEnv().nranks))
    # model definition
    model = mobilenet_v1(num_classes=class_dim, pretrained=True)
    if ParallelEnv().nranks > 1:
        model = paddle.DataParallel(model)

    if args.pretrained_model is not None:
        model.set_state_dict(paddle.load(args.pretrained_model))

    opt, learning_rate = create_optimizer(args, step_per_epoch, model)

    def test(epoch):
        model.eval()
        acc_top1_ns = []
        acc_top5_ns = []
        for batch_id, data in enumerate(valid_loader):
            start_time = time.time()
            x_data = data[0]
            y_data = paddle.to_tensor(data[1])
            if args.data == 'cifar10':
                y_data = paddle.unsqueeze(y_data, 1)

            logits = model(x_data)
            loss = F.cross_entropy(logits, y_data)
            acc_top1 = paddle.metric.accuracy(logits, y_data, k=1)
            acc_top5 = paddle.metric.accuracy(logits, y_data, k=5)
            end_time = time.time()
            if batch_id % args.log_period == 0:
                _logger.info(
                    "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}"
                    .format(epoch, batch_id, np.mean(acc_top1.numpy()),
                            np.mean(acc_top5.numpy()), end_time - start_time))
            acc_top1_ns.append(np.mean(acc_top1.numpy()))
            acc_top5_ns.append(np.mean(acc_top5.numpy()))

        _logger.info(
            "Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format(
                epoch, np.mean(np.array(acc_top1_ns, dtype="object")),
                np.mean(np.array(acc_top5_ns, dtype="object"))))

    def train(epoch):
        model.train()
        train_reader_cost = 0.0
        train_run_cost = 0.0
        total_samples = 0
        reader_start = time.time()

        for batch_id, data in enumerate(train_loader):
            train_reader_cost += time.time() - reader_start
            x_data = data[0]
            y_data = paddle.to_tensor(data[1])
            if args.data == 'cifar10':
                y_data = paddle.unsqueeze(y_data, 1)

            train_start = time.time()
            logits = model(x_data)
            loss = F.cross_entropy(logits, y_data)
            acc_top1 = paddle.metric.accuracy(logits, y_data, k=1)
            acc_top5 = paddle.metric.accuracy(logits, y_data, k=5)

            loss.backward()
            opt.step()
            learning_rate.step()
            opt.clear_grad()
            pruner.step()
            train_run_cost += time.time() - train_start
            total_samples += args.batch_size * ParallelEnv().nranks

            if batch_id % args.log_period == 0:
                _logger.info(
                    "epoch[{}]-batch[{}] lr: {:.6f} - loss: {}; acc_top1: {}; acc_top5: {}; avg_reader_cost: {:.5f} sec, avg_batch_cost: {:.5f} sec, avg_samples: {:.5f}, ips: {:.5f} images/sec"
                    .format(
                        epoch, batch_id, opt.get_lr(), np.mean(loss.numpy()),
                        np.mean(acc_top1.numpy()), np.mean(acc_top5.numpy()),
                        train_reader_cost / args.log_period,
                        (train_reader_cost + train_run_cost) / args.log_period,
                        total_samples / args.log_period,
                        total_samples / (train_reader_cost + train_run_cost)))
                train_reader_cost = 0.0
                train_run_cost = 0.0
                total_samples = 0

            reader_start = time.time()

    pruner = UnstructuredPruner(model,
                                mode=args.pruning_mode,
                                ratio=args.ratio,
                                threshold=args.threshold)

    for i in range(args.resume_epoch + 1, args.num_epochs):
        train(i)
        if (i + 1) % args.test_period == 0:
            pruner.update_params()
            _logger.info(
                "The current density of the pruned model is: {}%".format(
                    round(100 * UnstructuredPruner.total_sparse(model), 2)))
            test(i)
        if (i + 1) % args.model_period == 0:
            pruner.update_params()
            paddle.save(model.state_dict(),
                        os.path.join(args.model_path, "model-pruned.pdparams"))
            paddle.save(opt.state_dict(),
                        os.path.join(args.model_path, "opt-pruned.pdopt"))
예제 #29
0
def compress(args):
    train_reader = None
    test_reader = None
    if args.data == "mnist":
        transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])])
        train_dataset = paddle.vision.datasets.MNIST(mode='train',
                                                     backend="cv2",
                                                     transform=transform)
        val_dataset = paddle.vision.datasets.MNIST(mode='test',
                                                   backend="cv2",
                                                   transform=transform)
        class_dim = 10
        image_shape = "1,28,28"
    elif args.data == "imagenet":
        import imagenet_reader as reader
        train_dataset = reader.ImageNetDataset(mode='train')
        val_dataset = reader.ImageNetDataset(mode='val')
        class_dim = 1000
        image_shape = "3,224,224"
    else:
        raise ValueError("{} is not supported.".format(args.data))
    image_shape = [int(m) for m in image_shape.split(",")]
    assert args.model in model_list, "{} is not in lists: {}".format(
        args.model, model_list)
    places = paddle.static.cuda_places(
    ) if args.use_gpu else paddle.static.cpu_places()
    place = places[0]
    exe = paddle.static.Executor(place)
    image = paddle.static.data(name='image',
                               shape=[None] + image_shape,
                               dtype='float32')
    label = paddle.static.data(name='label', shape=[None, 1], dtype='int64')
    batch_size_per_card = int(args.batch_size / len(places))
    train_loader = paddle.io.DataLoader(train_dataset,
                                        places=places,
                                        feed_list=[image, label],
                                        drop_last=True,
                                        batch_size=batch_size_per_card,
                                        shuffle=True,
                                        return_list=False,
                                        use_shared_memory=True,
                                        num_workers=16)
    valid_loader = paddle.io.DataLoader(val_dataset,
                                        places=place,
                                        feed_list=[image, label],
                                        drop_last=False,
                                        return_list=False,
                                        use_shared_memory=True,
                                        batch_size=batch_size_per_card,
                                        shuffle=False)
    step_per_epoch = int(np.ceil(len(train_dataset) * 1. / args.batch_size))

    # model definition
    model = models.__dict__[args.model]()
    out = model.net(input=image, class_dim=class_dim)
    cost = paddle.nn.functional.loss.cross_entropy(input=out, label=label)
    avg_cost = paddle.mean(x=cost)
    acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1)
    acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5)
    val_program = paddle.static.default_main_program().clone(for_test=True)
    opt, learning_rate = create_optimizer(args, step_per_epoch)
    opt.minimize(avg_cost)

    exe.run(paddle.static.default_startup_program())

    if args.pretrained_model:

        def if_exist(var):
            return os.path.exists(os.path.join(args.pretrained_model,
                                               var.name))

        _logger.info("Load pretrained model from {}".format(
            args.pretrained_model))
        paddle.static.load(paddle.static.default_main_program(),
                           args.pretrained_model, exe)

    def test(epoch, program):
        acc_top1_ns = []
        acc_top5_ns = []
        for batch_id, data in enumerate(valid_loader):
            start_time = time.time()
            acc_top1_n, acc_top5_n = exe.run(
                program, feed=data, fetch_list=[acc_top1.name, acc_top5.name])
            end_time = time.time()
            if batch_id % args.log_period == 0:
                _logger.info(
                    "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}"
                    .format(epoch, batch_id, np.mean(acc_top1_n),
                            np.mean(acc_top5_n), end_time - start_time))
            acc_top1_ns.append(np.mean(acc_top1_n))
            acc_top5_ns.append(np.mean(acc_top5_n))

        _logger.info(
            "Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format(
                epoch, np.mean(np.array(acc_top1_ns)),
                np.mean(np.array(acc_top5_ns))))

    def train(epoch, program):
        for batch_id, data in enumerate(train_loader):
            start_time = time.time()
            loss_n, acc_top1_n, acc_top5_n = exe.run(
                train_program,
                feed=data,
                fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name])
            end_time = time.time()
            loss_n = np.mean(loss_n)
            acc_top1_n = np.mean(acc_top1_n)
            acc_top5_n = np.mean(acc_top5_n)
            if batch_id % args.log_period == 0:
                _logger.info(
                    "epoch[{}]-batch[{}] lr: {:.6f} - loss: {}; acc_top1: {}; acc_top5: {}; time: {}"
                    .format(epoch, batch_id, learning_rate.get_lr(), loss_n,
                            acc_top1_n, acc_top5_n, end_time - start_time))
            learning_rate.step()
            batch_id += 1

    test(0, val_program)
    params = get_pruned_params(args, paddle.static.default_main_program())
    _logger.info("FLOPs before pruning: {}".format(
        flops(paddle.static.default_main_program())))
    pruner = Pruner(args.criterion)
    pruned_val_program, _, _ = pruner.prune(val_program,
                                            paddle.static.global_scope(),
                                            params=params,
                                            ratios=[args.pruned_ratio] *
                                            len(params),
                                            place=place,
                                            only_graph=True)

    pruned_program, _, _ = pruner.prune(paddle.static.default_main_program(),
                                        paddle.static.global_scope(),
                                        params=params,
                                        ratios=[args.pruned_ratio] *
                                        len(params),
                                        place=place)
    _logger.info("FLOPs after pruning: {}".format(flops(pruned_program)))

    build_strategy = paddle.static.BuildStrategy()
    exec_strategy = paddle.static.ExecutionStrategy()
    train_program = paddle.static.CompiledProgram(
        pruned_program).with_data_parallel(loss_name=avg_cost.name,
                                           build_strategy=build_strategy,
                                           exec_strategy=exec_strategy)

    for i in range(args.num_epochs):
        train(i, train_program)
        if (i + 1) % args.test_period == 0:
            test(i, pruned_val_program)
            save_model(exe, pruned_val_program,
                       os.path.join(args.model_path, str(i)))
        if args.save_inference:
            infer_model_path = os.path.join(args.model_path, "infer_models",
                                            str(i))
            paddle.static.save_inference_model(infer_model_path, [image],
                                               [out],
                                               exe,
                                               program=pruned_val_program)
            _logger.info(
                "Saved inference model into [{}]".format(infer_model_path))
예제 #30
0
def compress(args):
    test_reader = None
    if args.data == "imagenet":
        import imagenet_reader as reader
        val_dataset = reader.ImageNetDataset(mode='val')
        class_dim = 1000
    elif args.data == "cifar10":
        normalize = T.Normalize(mean=[0.5, 0.5, 0.5],
                                std=[0.5, 0.5, 0.5],
                                data_format='CHW')
        transform = T.Compose([T.Transpose(), normalize])
        val_dataset = paddle.vision.datasets.Cifar10(mode='test',
                                                     backend='cv2',
                                                     transform=transform)
        class_dim = 10
    else:
        raise ValueError("{} is not supported.".format(args.data))

    places = paddle.static.cuda_places(
    ) if args.use_gpu else paddle.static.cpu_places()
    valid_loader = paddle.io.DataLoader(val_dataset,
                                        places=places,
                                        drop_last=False,
                                        return_list=True,
                                        batch_size=args.batch_size,
                                        shuffle=False,
                                        use_shared_memory=True)

    # model definition
    model = mobilenet_v1(num_classes=class_dim, pretrained=True)

    def test(epoch):
        model.eval()
        acc_top1_ns = []
        acc_top5_ns = []
        for batch_id, data in enumerate(valid_loader):
            start_time = time.time()
            x_data = data[0]
            y_data = paddle.to_tensor(data[1])
            if args.data == 'cifar10':
                y_data = paddle.unsqueeze(y_data, 1)

            logits = model(x_data)
            loss = F.cross_entropy(logits, y_data)
            acc_top1 = paddle.metric.accuracy(logits, y_data, k=1)
            acc_top5 = paddle.metric.accuracy(logits, y_data, k=5)
            end_time = time.time()
            if batch_id % args.log_period == 0:
                _logger.info(
                    "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}"
                    .format(epoch, batch_id, np.mean(acc_top1.numpy()),
                            np.mean(acc_top5.numpy()), end_time - start_time))
            acc_top1_ns.append(np.mean(acc_top1.numpy()))
            acc_top5_ns.append(np.mean(acc_top5.numpy()))

        _logger.info(
            "Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format(
                epoch, np.mean(np.array(acc_top1_ns, dtype="object")),
                np.mean(np.array(acc_top5_ns, dtype="object"))))

    model.set_state_dict(paddle.load(args.pruned_model))
    _logger.info("The current sparsity of the pruned model is: {}%".format(
        round(100 * UnstructuredPruner.total_sparse(model), 2)))
    test(0)