def test_soft_label_loss(self):
        student_main = fluid.Program()
        student_startup = fluid.Program()
        with fluid.program_guard(student_main, student_startup):
            input = fluid.data(name="image", shape=[None, 3, 224, 224])
            conv1 = conv_bn_layer(input, 8, 3, "conv1")
            conv2 = conv_bn_layer(conv1, 8, 3, "conv2")
            student_predict = conv1 + conv2

        teacher_main = fluid.Program()
        teacher_startup = fluid.Program()
        with fluid.program_guard(teacher_main, teacher_startup):
            input = fluid.data(name="image", shape=[None, 3, 224, 224])
            conv1 = conv_bn_layer(input, 8, 3, "conv1")
            conv2 = conv_bn_layer(conv1, 8, 3, "conv2")
            sum1 = conv1 + conv2
            conv3 = conv_bn_layer(sum1, 8, 3, "conv3")
            conv4 = conv_bn_layer(conv3, 8, 3, "conv4")
            sum2 = conv4 + sum1
            conv5 = conv_bn_layer(sum2, 8, 3, "conv5")
            teacher_predict = conv_bn_layer(conv5, 8, 3, "conv6")

        place = fluid.CPUPlace()
        data_name_map = {'image': 'image'}
        merge(teacher_main, student_main, data_name_map, place)
        merged_ops = []
        for block in student_main.blocks:
            for op in block.ops:
                merged_ops.append(op.type)
        with fluid.program_guard(student_main):
            distill_loss = soft_label_loss('teacher_conv6_bn_output.tmp_2',
                                           'conv2_bn_output.tmp_2',
                                           student_main)
        loss_ops = []
        for block in student_main.blocks:
            for op in block.ops:
                loss_ops.append(op.type)
        self.assertTrue(set(merged_ops).difference(set(loss_ops)) == set())
        self.assertTrue(
            set(loss_ops).difference(set(merged_ops)) ==
            {'cross_entropy', 'softmax', 'reduce_mean', 'scale'})
Esempio n. 2
0
def compress(args):
    if args.data == "cifar10":
        import paddle.dataset.cifar as reader
        train_reader = reader.train10()
        val_reader = reader.test10()
        class_dim = 10
        image_shape = "3,32,32"
    elif args.data == "imagenet":
        import imagenet_reader as reader
        train_reader = reader.train()
        val_reader = reader.val()
        class_dim = 1000
        image_shape = "3,224,224"
    else:
        raise ValueError("{} is not supported.".format(args.data))
    image_shape = [int(m) for m in image_shape.split(",")]

    assert args.model in model_list, "{} is not in lists: {}".format(
        args.model, model_list)
    student_program = fluid.Program()
    s_startup = fluid.Program()

    with fluid.program_guard(student_program, s_startup):
        with fluid.unique_name.guard():
            image = fluid.layers.data(name='image',
                                      shape=image_shape,
                                      dtype='float32')
            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
            train_loader = fluid.io.DataLoader.from_generator(
                feed_list=[image, label],
                capacity=64,
                use_double_buffer=True,
                iterable=True)
            valid_loader = fluid.io.DataLoader.from_generator(
                feed_list=[image, label],
                capacity=64,
                use_double_buffer=True,
                iterable=True)
            # model definition
            model = models.__dict__[args.model]()
            out = model.net(input=image, class_dim=class_dim)
            cost = fluid.layers.cross_entropy(input=out, label=label)
            avg_cost = fluid.layers.mean(x=cost)
            acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
            acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)

    train_reader = paddle.batch(train_reader,
                                batch_size=args.batch_size,
                                drop_last=True)
    val_reader = paddle.batch(val_reader,
                              batch_size=args.batch_size,
                              drop_last=True)
    val_program = student_program.clone(for_test=True)

    places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places()
    train_loader.set_sample_list_generator(train_reader, places)
    valid_loader.set_sample_list_generator(val_reader, place)

    teacher_model = models.__dict__[args.teacher_model]()
    # define teacher program
    teacher_program = fluid.Program()
    t_startup = fluid.Program()
    with fluid.program_guard(teacher_program, t_startup):
        with fluid.unique_name.guard():
            image = fluid.layers.data(name='image',
                                      shape=image_shape,
                                      dtype='float32')
            predict = teacher_model.net(image, class_dim=class_dim)

    exe.run(t_startup)
    if not os.path.exists(args.teacher_pretrained_model):
        _download(
            'http://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar',
            '.')
        _decompress('./ResNet50_vd_pretrained.tar')
    assert args.teacher_pretrained_model and os.path.exists(
        args.teacher_pretrained_model
    ), "teacher_pretrained_model should be set when teacher_model is not None."

    def if_exist(var):
        return os.path.exists(
            os.path.join(args.teacher_pretrained_model, var.name))

    fluid.io.load_vars(exe,
                       args.teacher_pretrained_model,
                       main_program=teacher_program,
                       predicate=if_exist)

    data_name_map = {'image': 'image'}
    merge(teacher_program, student_program, data_name_map, place)

    with fluid.program_guard(student_program, s_startup):
        distill_loss = soft_label_loss("teacher_fc_0.tmp_0", "fc_0.tmp_0",
                                       student_program)
        loss = avg_cost + distill_loss
        lr, opt = create_optimizer(args)
        opt.minimize(loss)
    exe.run(s_startup)
    build_strategy = fluid.BuildStrategy()
    build_strategy.fuse_all_reduce_ops = False
    parallel_main = fluid.CompiledProgram(student_program).with_data_parallel(
        loss_name=loss.name, build_strategy=build_strategy)

    for epoch_id in range(args.num_epochs):
        for step_id, data in enumerate(train_loader):
            lr_np, loss_1, loss_2, loss_3 = exe.run(parallel_main,
                                                    feed=data,
                                                    fetch_list=[
                                                        lr.name, loss.name,
                                                        avg_cost.name,
                                                        distill_loss.name
                                                    ])
            if step_id % args.log_period == 0:
                _logger.info(
                    "train_epoch {} step {} lr {:.6f}, loss {:.6f}, class loss {:.6f}, distill loss {:.6f}"
                    .format(epoch_id, step_id, lr_np[0], loss_1[0], loss_2[0],
                            loss_3[0]))
        val_acc1s = []
        val_acc5s = []
        for step_id, data in enumerate(valid_loader):
            val_loss, val_acc1, val_acc5 = exe.run(
                val_program,
                data,
                fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name])
            val_acc1s.append(val_acc1)
            val_acc5s.append(val_acc5)
            if step_id % args.log_period == 0:
                _logger.info(
                    "valid_epoch {} step {} loss {:.6f}, top1 {:.6f}, top5 {:.6f}"
                    .format(epoch_id, step_id, val_loss[0], val_acc1[0],
                            val_acc5[0]))
        _logger.info("epoch {} top1 {:.6f}, top5 {:.6f}".format(
            epoch_id, np.mean(val_acc1s), np.mean(val_acc5s)))
Esempio n. 3
0
def compress(args):
    if args.data == "cifar10":
        train_dataset = paddle.vision.datasets.Cifar10(mode='train')
        val_dataset = paddle.vision.datasets.Cifar10(mode='test')
        class_dim = 10
        image_shape = "3,32,32"
    elif args.data == "imagenet":
        import imagenet_reader as reader
        train_dataset = reader.ImageNetDataset(mode='train')
        val_dataset = reader.ImageNetDataset(mode='val')
        class_dim = 1000
        image_shape = "3,224,224"
    else:
        raise ValueError("{} is not supported.".format(args.data))
    image_shape = [int(m) for m in image_shape.split(",")]

    assert args.model in model_list, "{} is not in lists: {}".format(args.model,
                                                                     model_list)
    student_program = paddle.static.Program()
    s_startup = paddle.static.Program()
    places = paddle.static.cuda_places(
    ) if args.use_gpu else paddle.static.cpu_places()
    place = places[0]

    with paddle.static.program_guard(student_program, s_startup):
        with paddle.fluid.unique_name.guard():
            image = paddle.static.data(
                name='image', shape=[None] + image_shape, dtype='float32')
            label = paddle.static.data(
                name='label', shape=[None, 1], dtype='int64')
            train_loader = paddle.io.DataLoader(
                train_dataset,
                places=places,
                feed_list=[image, label],
                drop_last=True,
                batch_size=args.batch_size,
                return_list=False,
                shuffle=True,
                use_shared_memory=True,
                num_workers=4)
            valid_loader = paddle.io.DataLoader(
                val_dataset,
                places=place,
                feed_list=[image, label],
                drop_last=False,
                return_list=False,
                use_shared_memory=True,
                batch_size=args.batch_size,
                shuffle=False)
            # model definition
            model = models.__dict__[args.model]()
            out = model.net(input=image, class_dim=class_dim)
            cost = paddle.nn.functional.loss.cross_entropy(
                input=out, label=label)
            avg_cost = paddle.mean(x=cost)
            acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1)
            acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5)

    val_program = student_program.clone(for_test=True)
    exe = paddle.static.Executor(place)

    teacher_model = models.__dict__[args.teacher_model]()
    # define teacher program
    teacher_program = paddle.static.Program()
    t_startup = paddle.static.Program()
    with paddle.static.program_guard(teacher_program, t_startup):
        with paddle.fluid.unique_name.guard():
            image = paddle.static.data(
                name='image', shape=[None] + image_shape, dtype='float32')
            predict = teacher_model.net(image, class_dim=class_dim)

    exe.run(t_startup)
    if not os.path.exists(args.teacher_pretrained_model):
        _download(
            'http://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar',
            '.')
        _decompress('./ResNet50_vd_pretrained.tar')
    assert args.teacher_pretrained_model and os.path.exists(
        args.teacher_pretrained_model
    ), "teacher_pretrained_model should be set when teacher_model is not None."

    def if_exist(var):
        exist = os.path.exists(
            os.path.join(args.teacher_pretrained_model, var.name))
        if args.data == "cifar10" and (var.name == 'fc_0.w_0' or
                                       var.name == 'fc_0.b_0'):
            exist = False
        return exist

    paddle.static.load(teacher_program, args.teacher_pretrained_model, exe)

    data_name_map = {'image': 'image'}
    merge(teacher_program, student_program, data_name_map, place)

    with paddle.static.program_guard(student_program, s_startup):
        distill_loss = soft_label_loss("teacher_fc_0.tmp_0", "fc_0.tmp_0",
                                       student_program)
        loss = avg_cost + distill_loss
        lr, opt = create_optimizer(args)
        opt.minimize(loss)
    exe.run(s_startup)
    build_strategy = paddle.static.BuildStrategy()
    build_strategy.fuse_all_reduce_ops = False
    parallel_main = paddle.static.CompiledProgram(
        student_program).with_data_parallel(
            loss_name=loss.name, build_strategy=build_strategy)

    for epoch_id in range(args.num_epochs):
        for step_id, data in enumerate(train_loader):
            loss_1, loss_2, loss_3 = exe.run(
                parallel_main,
                feed=data,
                fetch_list=[loss.name, avg_cost.name, distill_loss.name])
            if step_id % args.log_period == 0:
                _logger.info(
                    "train_epoch {} step {} lr {:.6f}, loss {:.6f}, class loss {:.6f}, distill loss {:.6f}".
                    format(epoch_id, step_id,
                           lr.get_lr(), loss_1[0], loss_2[0], loss_3[0]))
            lr.step()
        val_acc1s = []
        val_acc5s = []
        for step_id, data in enumerate(valid_loader):
            val_loss, val_acc1, val_acc5 = exe.run(
                val_program,
                data,
                fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name])
            val_acc1s.append(val_acc1)
            val_acc5s.append(val_acc5)
            if step_id % args.log_period == 0:
                _logger.info(
                    "valid_epoch {} step {} loss {:.6f}, top1 {:.6f}, top5 {:.6f}".
                    format(epoch_id, step_id, val_loss[0], val_acc1[0],
                           val_acc5[0]))
        if args.save_inference:
            paddle.fluid.io.save_inference_model(
                os.path.join("./saved_models", str(epoch_id)), ["image"],
                [out], exe, student_program)
        _logger.info("epoch {} top1 {:.6f}, top5 {:.6f}".format(
            epoch_id, np.mean(val_acc1s), np.mean(val_acc5s)))