Esempio n. 1
0
    def test_mul(self):
        main_program = fluid.Program()
        startup_program = fluid.Program()
        with fluid.program_guard(main_program, startup_program):
            input = fluid.data(name="image", shape=[None, 3, 16, 16])
            conv1 = conv_bn_layer(input, 8, 3, "conv1")
            fc_0 = paddle.fluid.layers.fc(conv1, size=10)
            fc_1 = paddle.fluid.layers.fc(fc_0, size=10)

        place = fluid.CPUPlace()
        exe = fluid.Executor(place)
        scope = fluid.Scope()
        exe.run(startup_program, scope=scope)
        pruner = Pruner()
        # test backward search of concat
        pruned_program, _, _ = pruner.prune(main_program,
                                            scope,
                                            params=["conv1_weights"],
                                            ratios=[0.5],
                                            place=place,
                                            lazy=False,
                                            only_graph=True,
                                            param_backup=None,
                                            param_shape_backup=None)
        shapes = {
            "conv1_weights": (4, 3, 3, 3),
            "fc_0.w_0": (1024, 10),
            "fc_1.w_0": (10, 10)
        }
        for param in pruned_program.global_block().all_parameters():
            if param.name in shapes.keys():
                self.assertTrue(shapes[param.name] == param.shape)
Esempio n. 2
0
    def static_prune(self, net, ratios):
        paddle.enable_static()
        main_program = fluid.Program()
        startup_program = fluid.Program()
        with fluid.unique_name.guard():
            with fluid.program_guard(main_program, startup_program):
                input = fluid.data(name="image", shape=[None, 3, 16, 16])
                model = net(pretrained=False)
                out = model(input)

        place = fluid.CPUPlace()
        exe = fluid.Executor(place)
        scope = fluid.Scope()
        exe.run(startup_program, scope=scope)
        pruner = Pruner()
        main_program, _, _ = pruner.prune(main_program,
                                          scope,
                                          params=ratios.keys(),
                                          ratios=ratios.values(),
                                          place=place,
                                          lazy=False,
                                          only_graph=False,
                                          param_backup=None,
                                          param_shape_backup=None)

        shapes = {}
        for param in main_program.global_block().all_parameters():
            shapes[param.name] = param.shape
        return shapes
Esempio n. 3
0
def main():
    cfg = load_config(FLAGS.config)

    if 'architecture' in cfg:
        main_arch = cfg.architecture
    else:
        raise ValueError("'architecture' not specified in config file.")

    merge_config(FLAGS.opt)

    # Use CPU for exporting inference model instead of GPU
    place = fluid.CPUPlace()
    exe = fluid.Executor(place)

    model = create(main_arch)

    startup_prog = fluid.Program()
    infer_prog = fluid.Program()
    with fluid.program_guard(infer_prog, startup_prog):
        with fluid.unique_name.guard():
            inputs_def = cfg['TestReader']['inputs_def']
            inputs_def['use_dataloader'] = False
            feed_vars, _ = model.build_inputs(**inputs_def)
            test_fetches = model.test(feed_vars)
    infer_prog = infer_prog.clone(True)

    pruned_params = FLAGS.pruned_params
    assert (
        FLAGS.pruned_params is not None
    ), "FLAGS.pruned_params is empty!!! Please set it by '--pruned_params' option."
    pruned_params = FLAGS.pruned_params.strip().split(",")
    logger.info("pruned params: {}".format(pruned_params))
    pruned_ratios = [float(n) for n in FLAGS.pruned_ratios.strip().split(",")]
    logger.info("pruned ratios: {}".format(pruned_ratios))
    assert (len(pruned_params) == len(pruned_ratios)
            ), "The length of pruned params and pruned ratios should be equal."
    assert (pruned_ratios > [0] * len(pruned_ratios) and
            pruned_ratios < [1] * len(pruned_ratios)
            ), "The elements of pruned ratios should be in range (0, 1)."

    base_flops = flops(infer_prog)
    pruner = Pruner()
    infer_prog, _, _ = pruner.prune(
        infer_prog,
        fluid.global_scope(),
        params=pruned_params,
        ratios=pruned_ratios,
        place=place,
        only_graph=True)
    pruned_flops = flops(infer_prog)
    logger.info("pruned FLOPS: {}".format(
        float(base_flops - pruned_flops) / base_flops))

    exe.run(startup_prog)
    checkpoint.load_checkpoint(exe, infer_prog, cfg.weights)

    save_infer_model(FLAGS, exe, feed_vars, test_fetches, infer_prog)
    def test_prune(self):
        main_program = fluid.Program()
        startup_program = fluid.Program()
        #   X       X              O       X              O
        # conv1-->conv2-->sum1-->conv3-->conv4-->sum2-->conv5-->conv6
        #     |            ^ |                    ^
        #     |____________| |____________________|
        #
        # X: prune output channels
        # O: prune input channels
        with fluid.program_guard(main_program, startup_program):
            input = fluid.data(name="image", shape=[None, 3, 16, 16])
            conv1 = conv_bn_layer(input, 8, 3, "conv1")
            conv2 = conv_bn_layer(conv1, 8, 3, "conv2")
            sum1 = conv1 + conv2
            conv3 = conv_bn_layer(sum1, 8, 3, "conv3")
            conv4 = conv_bn_layer(conv3, 8, 3, "conv4")
            sum2 = conv4 + sum1
            conv5 = conv_bn_layer(sum2, 8, 3, "conv5")
            conv6 = conv_bn_layer(conv5, 8, 3, "conv6")

        shapes = {}
        for param in main_program.global_block().all_parameters():
            shapes[param.name] = param.shape

        place = fluid.CPUPlace()
        exe = fluid.Executor(place)
        scope = fluid.Scope()
        exe.run(startup_program, scope=scope)
        criterion = 'bn_scale'
        idx_selector = 'optimal_threshold'
        pruner = Pruner(criterion)
        main_program, _, _ = pruner.prune(
            main_program,
            scope,
            params=["conv4_weights"],
            ratios=[0.5],
            place=place,
            lazy=False,
            only_graph=False,
            param_backup=None,
            param_shape_backup=None)

        shapes = {
            "conv1_weights": (4L, 3L, 3L, 3L),
            "conv2_weights": (4L, 4L, 3L, 3L),
            "conv3_weights": (8L, 4L, 3L, 3L),
            "conv4_weights": (4L, 8L, 3L, 3L),
            "conv5_weights": (8L, 4L, 3L, 3L),
            "conv6_weights": (8L, 8L, 3L, 3L)
        }

        for param in main_program.global_block().all_parameters():
            if "weights" in param.name:
                print("param: {}; param shape: {}".format(param.name,
                                                          param.shape))
Esempio n. 5
0
def channel_prune(program, prune_names, prune_ratios, place, only_graph=False):
    """通道裁剪。

    Args:
        program (paddle.fluid.Program): 需要裁剪的Program,Program的具体介绍可参见
            https://paddlepaddle.org.cn/documentation/docs/zh/beginners_guide/basic_concept/program.html#program。
        prune_names (list): 由裁剪参数名组成的参数列表。
        prune_ratios (list): 由裁剪率组成的参数列表,与prune_names中的参数列表意义对应。
        place (paddle.fluid.CUDAPlace/paddle.fluid.CPUPlace): 运行设备。
        only_graph (bool): 是否只修改网络图,当为False时代表同时修改网络图和
            scope(全局作用域)中的参数。默认为False。

    Returns:
        paddle.fluid.Program: 裁剪后的Program。
    """
    prog_var_shape_dict = {}
    for var in program.list_vars():
        try:
            prog_var_shape_dict[var.name] = var.shape
        except Exception:
            pass
    index = 0
    for param, ratio in zip(prune_names, prune_ratios):
        origin_num = prog_var_shape_dict[param][0]
        pruned_num = int(round(origin_num * ratio))
        while origin_num == pruned_num:
            ratio -= 0.1
            pruned_num = int(round(origin_num * (ratio)))
            prune_ratios[index] = ratio
        index += 1
    scope = fluid.global_scope()
    pruner = Pruner()
    program, _, _ = pruner.prune(program,
                                 scope,
                                 params=prune_names,
                                 ratios=prune_ratios,
                                 place=place,
                                 lazy=False,
                                 only_graph=only_graph,
                                 param_backup=False,
                                 param_shape_backup=False)
    return program
Esempio n. 6
0
    def test_split(self):
        main_program = fluid.Program()
        startup_program = fluid.Program()
        with fluid.program_guard(main_program, startup_program):
            input = fluid.data(name="image", shape=[None, 3, 16, 16])
            conv1 = conv_bn_layer(input, 8, 3, "conv1")
            conv2 = conv_bn_layer(input, 4, 3, "conv2")
            split_0, split_1 = paddle.split(conv1, 2, axis=1)
            add = split_0 + conv2
            out = conv_bn_layer(add, 4, 3, "conv3")
            out1 = conv_bn_layer(split_1, 4, 4, "conv4")

        shapes = {}
        for param in main_program.global_block().all_parameters():
            shapes[param.name] = param.shape

        place = fluid.CPUPlace()
        exe = fluid.Executor(place)
        scope = fluid.Scope()
        exe.run(startup_program, scope=scope)
        pruner = Pruner()
        # test backward search of concat
        pruned_program, _, _ = pruner.prune(main_program,
                                            scope,
                                            params=["conv2_weights"],
                                            ratios=[0.5],
                                            place=place,
                                            lazy=False,
                                            only_graph=True,
                                            param_backup=None,
                                            param_shape_backup=None)
        shapes = {
            "conv1_weights": (6, 3, 3, 3),
            "conv2_weights": (2, 3, 3, 3),
            "conv3_weights": (4, 2, 3, 3),
            "conv4_weights": (4, 4, 3, 3),
        }
        for param in pruned_program.global_block().all_parameters():
            if "weights" in param.name and "conv2d" in param.name:
                self.assertTrue(shapes[param.name] == param.shape)
Esempio n. 7
0
    def test_prune(self):
        main_program = fluid.Program()
        startup_program = fluid.Program()
        #   X       X              O       X              O
        # conv1-->conv2-->sum1-->conv3-->conv4-->sum2-->conv5-->conv6
        #     |            ^ |                    ^
        #     |____________| |____________________|
        #
        # X: prune output channels
        # O: prune input channels
        with fluid.program_guard(main_program, startup_program):
            input = fluid.data(name="image", shape=[None, 3, 16, 16])
            label = fluid.data(name='label', shape=[None, 1], dtype='int64')
            conv1 = conv_bn_layer(input, 8, 3, "conv1", act='relu')
            conv2 = conv_bn_layer(conv1, 8, 3, "conv2", act='leaky_relu')
            sum1 = conv1 + conv2
            conv3 = conv_bn_layer(sum1, 8, 3, "conv3", act='relu6')
            conv4 = conv_bn_layer(conv3, 8, 3, "conv4")
            sum2 = conv4 + sum1
            conv5 = conv_bn_layer(sum2, 8, 3, "conv5")
            sum3 = fluid.layers.sum([sum2, conv5])
            conv6 = conv_bn_layer(sum3, 8, 3, "conv6")
            sub1 = conv6 - sum3
            mult = sub1 * sub1
            conv7 = conv_bn_layer(
                mult, 8, 3, "Depthwise_Conv7", groups=8, use_cudnn=False)
            floored = fluid.layers.floor(conv7)
            scaled = fluid.layers.scale(floored)
            concated = fluid.layers.concat([scaled, mult], axis=1)
            conv8 = conv_bn_layer(concated, 8, 3, "conv8")
            feature = fluid.layers.reshape(conv8, [-1, 128, 16])
            predict = fluid.layers.fc(input=feature, size=10, act='softmax')
            cost = fluid.layers.cross_entropy(input=predict, label=label)
            adam_optimizer = fluid.optimizer.AdamOptimizer(0.01)
            avg_cost = fluid.layers.mean(cost)
            adam_optimizer.minimize(avg_cost)

        params = []
        for param in main_program.all_parameters():
            if 'conv' in param.name:
                params.append(param.name)

        place = fluid.CPUPlace()
        exe = fluid.Executor(place)
        exe.run(startup_program)
        x = np.random.random(size=(10, 3, 16, 16)).astype('float32')
        label = np.random.random(size=(10, 1)).astype('int64')
        loss_data, = exe.run(main_program,
                             feed={"image": x,
                                   "label": label},
                             fetch_list=[cost.name])
        pruner = Pruner()
        main_program, _, _ = pruner.prune(
            main_program,
            fluid.global_scope(),
            params=params,
            ratios=[0.5] * len(params),
            place=place,
            lazy=False,
            only_graph=False,
            param_backup=None,
            param_shape_backup=None)
Esempio n. 8
0
def main():
    """
    Main evaluate function
    """
    cfg = load_config(FLAGS.config)
    merge_config(FLAGS.opt)
    check_config(cfg)
    # check if set use_gpu=True in paddlepaddle cpu version
    check_gpu(cfg.use_gpu)
    # check if paddlepaddle version is satisfied
    check_version()

    main_arch = cfg.architecture

    multi_scale_test = getattr(cfg, 'MultiScaleTEST', None)

    # define executor
    place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)

    # build program
    model = create(main_arch)
    startup_prog = fluid.Program()
    eval_prog = fluid.Program()
    with fluid.program_guard(eval_prog, startup_prog):
        with fluid.unique_name.guard():
            inputs_def = cfg['EvalReader']['inputs_def']
            feed_vars, loader = model.build_inputs(**inputs_def)
            if multi_scale_test is None:
                fetches = model.eval(feed_vars)
            else:
                fetches = model.eval(feed_vars, multi_scale_test)
    eval_prog = eval_prog.clone(True)

    exe.run(startup_prog)
    reader = create_reader(cfg.EvalReader)
    # When iterable mode, set set_sample_list_generator(reader, place)
    loader.set_sample_list_generator(reader)

    dataset = cfg['EvalReader']['dataset']

    # eval already exists json file
    if FLAGS.json_eval:
        logger.info(
            "In json_eval mode, PaddleDetection will evaluate json files in "
            "output_eval directly. And proposal.json, bbox.json and mask.json "
            "will be detected by default.")
        json_eval_results(
            cfg.metric, json_directory=FLAGS.output_eval, dataset=dataset)
        return

    pruned_params = FLAGS.pruned_params
    assert (
        FLAGS.pruned_params is not None
    ), "FLAGS.pruned_params is empty!!! Please set it by '--pruned_params' option."
    pruned_params = FLAGS.pruned_params.strip().split(",")
    logger.info("pruned params: {}".format(pruned_params))
    pruned_ratios = [float(n) for n in FLAGS.pruned_ratios.strip().split(",")]
    logger.info("pruned ratios: {}".format(pruned_ratios))
    assert (len(pruned_params) == len(pruned_ratios)
            ), "The length of pruned params and pruned ratios should be equal."
    assert (pruned_ratios > [0] * len(pruned_ratios) and
            pruned_ratios < [1] * len(pruned_ratios)
            ), "The elements of pruned ratios should be in range (0, 1)."

    base_flops = flops(eval_prog)
    pruner = Pruner()
    eval_prog, _, _ = pruner.prune(
        eval_prog,
        fluid.global_scope(),
        params=pruned_params,
        ratios=pruned_ratios,
        place=place,
        only_graph=False)
    pruned_flops = flops(eval_prog)
    logger.info("pruned FLOPS: {}".format(
        float(base_flops - pruned_flops) / base_flops))

    compile_program = fluid.CompiledProgram(eval_prog).with_data_parallel()

    assert cfg.metric != 'OID', "eval process of OID dataset \
                          is not supported."

    if cfg.metric == "WIDERFACE":
        raise ValueError("metric type {} does not support in tools/eval.py, "
                         "please use tools/face_eval.py".format(cfg.metric))
    assert cfg.metric in ['COCO', 'VOC'], \
            "unknown metric type {}".format(cfg.metric)
    extra_keys = []

    if cfg.metric == 'COCO':
        extra_keys = ['im_info', 'im_id', 'im_shape']
    if cfg.metric == 'VOC':
        extra_keys = ['gt_bbox', 'gt_class', 'is_difficult']

    keys, values, cls = parse_fetches(fetches, eval_prog, extra_keys)

    # whether output bbox is normalized in model output layer
    is_bbox_normalized = False
    if hasattr(model, 'is_bbox_normalized') and \
            callable(model.is_bbox_normalized):
        is_bbox_normalized = model.is_bbox_normalized()

    sub_eval_prog = None
    sub_keys = None
    sub_values = None
    # build sub-program
    if 'Mask' in main_arch and multi_scale_test:
        sub_eval_prog = fluid.Program()
        with fluid.program_guard(sub_eval_prog, startup_prog):
            with fluid.unique_name.guard():
                inputs_def = cfg['EvalReader']['inputs_def']
                inputs_def['mask_branch'] = True
                feed_vars, eval_loader = model.build_inputs(**inputs_def)
                sub_fetches = model.eval(
                    feed_vars, multi_scale_test, mask_branch=True)
                assert cfg.metric == 'COCO'
                extra_keys = ['im_id', 'im_shape']
        sub_keys, sub_values, _ = parse_fetches(sub_fetches, sub_eval_prog,
                                                extra_keys)
        sub_eval_prog = sub_eval_prog.clone(True)

    # load model
    if 'weights' in cfg:
        checkpoint.load_checkpoint(exe, eval_prog, cfg.weights)

    resolution = None
    if 'Mask' in cfg.architecture:
        resolution = model.mask_head.resolution

    results = eval_run(
        exe,
        compile_program,
        loader,
        keys,
        values,
        cls,
        cfg,
        sub_eval_prog,
        sub_keys,
        sub_values,
        resolution=resolution)

    # if map_type not set, use default 11point, only use in VOC eval
    map_type = cfg.map_type if 'map_type' in cfg else '11point'
    eval_results(
        results,
        cfg.metric,
        cfg.num_classes,
        resolution,
        is_bbox_normalized,
        FLAGS.output_eval,
        map_type,
        dataset=dataset)
Esempio n. 9
0
def main():
    env = os.environ
    FLAGS.dist = 'PADDLE_TRAINER_ID' in env and 'PADDLE_TRAINERS_NUM' in env
    if FLAGS.dist:
        trainer_id = int(env['PADDLE_TRAINER_ID'])
        import random
        local_seed = (99 + trainer_id)
        random.seed(local_seed)
        np.random.seed(local_seed)

    cfg = load_config(FLAGS.config)
    merge_config(FLAGS.opt)
    check_config(cfg)
    # check if set use_gpu=True in paddlepaddle cpu version
    check_gpu(cfg.use_gpu)
    # check if paddlepaddle version is satisfied
    check_version()

    main_arch = cfg.architecture

    if cfg.use_gpu:
        devices_num = fluid.core.get_cuda_device_count()
    else:
        devices_num = int(os.environ.get('CPU_NUM', 1))

    if 'FLAGS_selected_gpus' in env:
        device_id = int(env['FLAGS_selected_gpus'])
    else:
        device_id = 0
    place = fluid.CUDAPlace(device_id) if cfg.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)

    lr_builder = create('LearningRate')
    optim_builder = create('OptimizerBuilder')

    # build program
    startup_prog = fluid.Program()
    train_prog = fluid.Program()
    with fluid.program_guard(train_prog, startup_prog):
        with fluid.unique_name.guard():
            model = create(main_arch)
            if FLAGS.fp16:
                assert (getattr(model.backbone, 'norm_type', None)
                        != 'affine_channel'), \
                    '--fp16 currently does not support affine channel, ' \
                    ' please modify backbone settings to use batch norm'

            with mixed_precision_context(FLAGS.loss_scale, FLAGS.fp16) as ctx:
                inputs_def = cfg['TrainReader']['inputs_def']
                feed_vars, train_loader = model.build_inputs(**inputs_def)
                train_fetches = model.train(feed_vars)
                loss = train_fetches['loss']
                if FLAGS.fp16:
                    loss *= ctx.get_loss_scale_var()
                lr = lr_builder()
                optimizer = optim_builder(lr)
                optimizer.minimize(loss)
                if FLAGS.fp16:
                    loss /= ctx.get_loss_scale_var()

    # parse train fetches
    train_keys, train_values, _ = parse_fetches(train_fetches)
    train_values.append(lr)

    if FLAGS.print_params:
        param_delimit_str = '-' * 20 + "All parameters in current graph" + '-' * 20
        print(param_delimit_str)
        for block in train_prog.blocks:
            for param in block.all_parameters():
                print("parameter name: {}\tshape: {}".format(param.name,
                                                             param.shape))
        print('-' * len(param_delimit_str))
        return

    if FLAGS.eval:
        eval_prog = fluid.Program()
        with fluid.program_guard(eval_prog, startup_prog):
            with fluid.unique_name.guard():
                model = create(main_arch)
                inputs_def = cfg['EvalReader']['inputs_def']
                feed_vars, eval_loader = model.build_inputs(**inputs_def)
                fetches = model.eval(feed_vars)
        eval_prog = eval_prog.clone(True)

        eval_reader = create_reader(cfg.EvalReader)
        eval_loader.set_sample_list_generator(eval_reader, place)

        # parse eval fetches
        extra_keys = []
        if cfg.metric == 'COCO':
            extra_keys = ['im_info', 'im_id', 'im_shape']
        if cfg.metric == 'VOC':
            extra_keys = ['gt_bbox', 'gt_class', 'is_difficult']
        if cfg.metric == 'WIDERFACE':
            extra_keys = ['im_id', 'im_shape', 'gt_bbox']
        eval_keys, eval_values, eval_cls = parse_fetches(fetches, eval_prog,
                                                         extra_keys)

    # compile program for multi-devices
    build_strategy = fluid.BuildStrategy()
    build_strategy.fuse_all_optimizer_ops = False
    build_strategy.fuse_elewise_add_act_ops = True
    # only enable sync_bn in multi GPU devices
    sync_bn = getattr(model.backbone, 'norm_type', None) == 'sync_bn'
    build_strategy.sync_batch_norm = sync_bn and devices_num > 1 \
        and cfg.use_gpu

    exec_strategy = fluid.ExecutionStrategy()
    # iteration number when CompiledProgram tries to drop local execution scopes.
    # Set it to be 1 to save memory usages, so that unused variables in
    # local execution scopes can be deleted after each iteration.
    exec_strategy.num_iteration_per_drop_scope = 1
    if FLAGS.dist:
        dist_utils.prepare_for_multi_process(exe, build_strategy, startup_prog,
                                             train_prog)
        exec_strategy.num_threads = 1

    exe.run(startup_prog)

    fuse_bn = getattr(model.backbone, 'norm_type', None) == 'affine_channel'

    start_iter = 0
    if cfg.pretrain_weights:
        checkpoint.load_params(exe, train_prog, cfg.pretrain_weights)

    pruned_params = FLAGS.pruned_params
    assert FLAGS.pruned_params is not None, \
        "FLAGS.pruned_params is empty!!! Please set it by '--pruned_params' option."
    pruned_params = FLAGS.pruned_params.strip().split(",")
    logger.info("pruned params: {}".format(pruned_params))
    pruned_ratios = [float(n) for n in FLAGS.pruned_ratios.strip().split(",")]
    logger.info("pruned ratios: {}".format(pruned_ratios))
    assert len(pruned_params) == len(pruned_ratios), \
        "The length of pruned params and pruned ratios should be equal."
    assert (pruned_ratios > [0] * len(pruned_ratios) and
            pruned_ratios < [1] * len(pruned_ratios)
            ), "The elements of pruned ratios should be in range (0, 1)."

    assert FLAGS.prune_criterion in ['l1_norm', 'geometry_median'], \
            "unsupported prune criterion {}".format(FLAGS.prune_criterion)
    pruner = Pruner(criterion=FLAGS.prune_criterion)
    train_prog = pruner.prune(
        train_prog,
        fluid.global_scope(),
        params=pruned_params,
        ratios=pruned_ratios,
        place=place,
        only_graph=False)[0]

    compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel(
        loss_name=loss.name,
        build_strategy=build_strategy,
        exec_strategy=exec_strategy)

    if FLAGS.eval:

        base_flops = flops(eval_prog)
        eval_prog = pruner.prune(
            eval_prog,
            fluid.global_scope(),
            params=pruned_params,
            ratios=pruned_ratios,
            place=place,
            only_graph=True)[0]
        pruned_flops = flops(eval_prog)
        logger.info("FLOPs -{}; total FLOPs: {}; pruned FLOPs: {}".format(
            float(base_flops - pruned_flops) / base_flops, base_flops,
            pruned_flops))
        compiled_eval_prog = fluid.compiler.CompiledProgram(eval_prog)

    if FLAGS.resume_checkpoint:
        checkpoint.load_checkpoint(exe, train_prog, FLAGS.resume_checkpoint)
        start_iter = checkpoint.global_step()

    train_reader = create_reader(cfg.TrainReader, (cfg.max_iters - start_iter) *
                                 devices_num, cfg)
    train_loader.set_sample_list_generator(train_reader, place)

    # whether output bbox is normalized in model output layer
    is_bbox_normalized = False
    if hasattr(model, 'is_bbox_normalized') and \
            callable(model.is_bbox_normalized):
        is_bbox_normalized = model.is_bbox_normalized()

    # if map_type not set, use default 11point, only use in VOC eval
    map_type = cfg.map_type if 'map_type' in cfg else '11point'

    train_stats = TrainingStats(cfg.log_smooth_window, train_keys)
    train_loader.start()
    start_time = time.time()
    end_time = time.time()

    cfg_name = os.path.basename(FLAGS.config).split('.')[0]
    save_dir = os.path.join(cfg.save_dir, cfg_name)
    time_stat = deque(maxlen=cfg.log_smooth_window)
    best_box_ap_list = [0.0, 0]  #[map, iter]

    # use tb-paddle to log data
    if FLAGS.use_tb:
        from tb_paddle import SummaryWriter
        tb_writer = SummaryWriter(FLAGS.tb_log_dir)
        tb_loss_step = 0
        tb_mAP_step = 0

    if FLAGS.eval:
        # evaluation
        results = eval_run(exe, compiled_eval_prog, eval_loader, eval_keys,
                           eval_values, eval_cls, cfg)
        resolution = None
        if 'mask' in results[0]:
            resolution = model.mask_head.resolution
        dataset = cfg['EvalReader']['dataset']
        box_ap_stats = eval_results(
            results,
            cfg.metric,
            cfg.num_classes,
            resolution,
            is_bbox_normalized,
            FLAGS.output_eval,
            map_type,
            dataset=dataset)

    for it in range(start_iter, cfg.max_iters):
        start_time = end_time
        end_time = time.time()
        time_stat.append(end_time - start_time)
        time_cost = np.mean(time_stat)
        eta_sec = (cfg.max_iters - it) * time_cost
        eta = str(datetime.timedelta(seconds=int(eta_sec)))
        outs = exe.run(compiled_train_prog, fetch_list=train_values)
        stats = {k: np.array(v).mean() for k, v in zip(train_keys, outs[:-1])}

        # use tb-paddle to log loss
        if FLAGS.use_tb:
            if it % cfg.log_iter == 0:
                for loss_name, loss_value in stats.items():
                    tb_writer.add_scalar(loss_name, loss_value, tb_loss_step)
                tb_loss_step += 1

        train_stats.update(stats)
        logs = train_stats.log()
        if it % cfg.log_iter == 0 and (not FLAGS.dist or trainer_id == 0):
            strs = 'iter: {}, lr: {:.6f}, {}, time: {:.3f}, eta: {}'.format(
                it, np.mean(outs[-1]), logs, time_cost, eta)
            logger.info(strs)

        if (it > 0 and it % cfg.snapshot_iter == 0 or it == cfg.max_iters - 1) \
           and (not FLAGS.dist or trainer_id == 0):
            save_name = str(it) if it != cfg.max_iters - 1 else "model_final"
            checkpoint.save(exe, train_prog, os.path.join(save_dir, save_name))

            if FLAGS.eval:
                # evaluation
                results = eval_run(
                    exe,
                    compiled_eval_prog,
                    eval_loader,
                    eval_keys,
                    eval_values,
                    eval_cls,
                    cfg=cfg)
                resolution = None
                if 'mask' in results[0]:
                    resolution = model.mask_head.resolution
                box_ap_stats = eval_results(
                    results,
                    cfg.metric,
                    cfg.num_classes,
                    resolution,
                    is_bbox_normalized,
                    FLAGS.output_eval,
                    map_type,
                    dataset=dataset)

                # use tb_paddle to log mAP
                if FLAGS.use_tb:
                    tb_writer.add_scalar("mAP", box_ap_stats[0], tb_mAP_step)
                    tb_mAP_step += 1

                if box_ap_stats[0] > best_box_ap_list[0]:
                    best_box_ap_list[0] = box_ap_stats[0]
                    best_box_ap_list[1] = it
                    checkpoint.save(exe, train_prog,
                                    os.path.join(save_dir, "best_model"))
                logger.info("Best test box ap: {}, in iter: {}".format(
                    best_box_ap_list[0], best_box_ap_list[1]))

    train_loader.reset()
Esempio n. 10
0
def compress(args):
    class_dim = 1000
    image_shape = "3,224,224"
    image_shape = [int(m) for m in image_shape.split(",")]
    assert args.model in model_list, "{} is not in lists: {}".format(
        args.model, model_list)
    image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
    # model definition
    model = models.__dict__[args.model]()
    out = model.net(input=image, class_dim=class_dim)
    cost = fluid.layers.cross_entropy(input=out, label=label)
    avg_cost = fluid.layers.mean(x=cost)
    acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
    acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
    val_program = fluid.default_main_program().clone(for_test=True)
    opt = create_optimizer(args)
    opt.minimize(avg_cost)
    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    if args.pretrained_model:

        def if_exist(var):
            exist = os.path.exists(
                os.path.join(args.pretrained_model, var.name))
            print("exist", exist)
            return exist

        #fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist)

    val_reader = paddle.fluid.io.batch(reader.val(),
                                       batch_size=args.batch_size)
    train_reader = paddle.fluid.io.batch(reader.train(),
                                         batch_size=args.batch_size,
                                         drop_last=True)

    train_feeder = feeder = fluid.DataFeeder([image, label], place)
    val_feeder = feeder = fluid.DataFeeder([image, label],
                                           place,
                                           program=val_program)

    def test(epoch, program):
        batch_id = 0
        acc_top1_ns = []
        acc_top5_ns = []
        for data in val_reader():
            start_time = time.time()
            acc_top1_n, acc_top5_n = exe.run(
                program,
                feed=train_feeder.feed(data),
                fetch_list=[acc_top1.name, acc_top5.name])
            end_time = time.time()
            print(
                "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}"
                .format(epoch, batch_id, np.mean(acc_top1_n),
                        np.mean(acc_top5_n), end_time - start_time))
            acc_top1_ns.append(np.mean(acc_top1_n))
            acc_top5_ns.append(np.mean(acc_top5_n))
            batch_id += 1

        print("Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format(
            epoch, np.mean(np.array(acc_top1_ns)),
            np.mean(np.array(acc_top5_ns))))

    def train(epoch, program):

        build_strategy = fluid.BuildStrategy()
        exec_strategy = fluid.ExecutionStrategy()
        train_program = fluid.compiler.CompiledProgram(
            program).with_data_parallel(loss_name=avg_cost.name,
                                        build_strategy=build_strategy,
                                        exec_strategy=exec_strategy)

        batch_id = 0
        for data in train_reader():
            start_time = time.time()
            loss_n, acc_top1_n, acc_top5_n, lr_n = exe.run(
                train_program,
                feed=train_feeder.feed(data),
                fetch_list=[
                    avg_cost.name, acc_top1.name, acc_top5.name,
                    "learning_rate"
                ])
            end_time = time.time()
            loss_n = np.mean(loss_n)
            acc_top1_n = np.mean(acc_top1_n)
            acc_top5_n = np.mean(acc_top5_n)
            lr_n = np.mean(lr_n)
            print(
                "epoch[{}]-batch[{}] - loss: {}; acc_top1: {}; acc_top5: {};lrn: {}; time: {}"
                .format(epoch, batch_id, loss_n, acc_top1_n, acc_top5_n, lr_n,
                        end_time - start_time))
            batch_id += 1

    params = []
    for param in fluid.default_main_program().global_block().all_parameters():
        #if "_weights" in  param.name and "conv1_weights" not in param.name:
        if "_sep_weights" in param.name:
            params.append(param.name)
    print("fops before pruning: {}".format(flops(
        fluid.default_main_program())))
    pruned_program_iter = fluid.default_main_program()
    pruned_val_program_iter = val_program
    for ratios in ratiolist:
        pruner = Pruner()
        pruned_val_program_iter = pruner.prune(pruned_val_program_iter,
                                               fluid.global_scope(),
                                               params=params,
                                               ratios=ratios,
                                               place=place,
                                               only_graph=True)

        pruned_program_iter = pruner.prune(pruned_program_iter,
                                           fluid.global_scope(),
                                           params=params,
                                           ratios=ratios,
                                           place=place)
        print("fops after pruning: {}".format(flops(pruned_program_iter)))
    """ do not inherit learning rate """
    if (os.path.exists(args.pretrained_model + "/learning_rate")):
        os.remove(args.pretrained_model + "/learning_rate")
    if (os.path.exists(args.pretrained_model + "/@LR_DECAY_COUNTER@")):
        os.remove(args.pretrained_model + "/@LR_DECAY_COUNTER@")
    fluid.io.load_vars(exe,
                       args.pretrained_model,
                       main_program=pruned_program_iter,
                       predicate=if_exist)

    pruned_program = pruned_program_iter
    pruned_val_program = pruned_val_program_iter
    for i in range(args.num_epochs):
        train(i, pruned_program)
        test(i, pruned_val_program)
        save_model(args, exe, pruned_program, pruned_val_program, i)
Esempio n. 11
0
    def test_prune(self):
        main_program = fluid.Program()
        startup_program = fluid.Program()
        #   X       X              O       X              O
        # conv1-->conv2-->sum1-->conv3-->conv4-->sum2-->conv5-->conv6
        #     |            ^ |                    ^
        #     |____________| |____________________|
        #
        # X: prune output channels
        # O: prune input channels
        with fluid.program_guard(main_program, startup_program):
            input = fluid.data(name="image", shape=[None, 3, 16, 16])
            conv1 = conv_bn_layer(input, 8, 3, "conv1")
            conv2 = conv_bn_layer(conv1, 8, 3, "conv2")
            sum1 = conv1 + conv2
            conv3 = conv_bn_layer(sum1, 8, 3, "conv3")
            conv4 = conv_bn_layer(conv3, 8, 3, "conv4")
            sum2 = conv4 + sum1
            conv5 = conv_bn_layer(sum2, 8, 3, "conv5")
            conv6 = conv_bn_layer(conv5, 8, 3, "conv6")

            conv7 = fluid.layers.conv2d_transpose(input=conv6,
                                                  num_filters=16,
                                                  filter_size=2,
                                                  stride=2)

        shapes = {}
        for param in main_program.global_block().all_parameters():
            shapes[param.name] = param.shape

        place = fluid.CPUPlace()
        exe = fluid.Executor(place)
        scope = fluid.Scope()
        exe.run(startup_program, scope=scope)
        pruner = Pruner()
        main_program, _, _ = pruner.prune(
            main_program,
            scope,
            params=["conv4_weights", "conv2d_transpose_0.w_0"],
            ratios=[0.5, 0.6],
            place=place,
            lazy=False,
            only_graph=False,
            param_backup=None,
            param_shape_backup=None)

        shapes = {
            "conv1_weights": (4, 3, 3, 3),
            "conv2_weights": (4, 4, 3, 3),
            "conv3_weights": (8, 4, 3, 3),
            "conv4_weights": (4, 8, 3, 3),
            "conv5_weights": (8, 4, 3, 3),
            "conv6_weights": (8, 8, 3, 3),
            "conv2d_transpose_0.w_0": (8, 16, 2, 2),
        }

        for param in main_program.global_block().all_parameters():
            if param.name in shapes:
                print("param: {}; param shape: {}".format(
                    param.name, param.shape))
                self.assertTrue(param.shape == shapes[param.name])
Esempio n. 12
0
def main():
    cfg = load_config(FLAGS.config)

    merge_config(FLAGS.opt)
    check_config(cfg)
    # check if set use_gpu=True in paddlepaddle cpu version
    check_gpu(cfg.use_gpu)
    # check if paddlepaddle version is satisfied
    check_version()

    main_arch = cfg.architecture

    dataset = cfg.TestReader['dataset']

    test_images = get_test_images(FLAGS.infer_dir, FLAGS.infer_img)
    dataset.set_images(test_images)

    place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)

    model = create(main_arch)

    startup_prog = fluid.Program()
    infer_prog = fluid.Program()
    with fluid.program_guard(infer_prog, startup_prog):
        with fluid.unique_name.guard():
            inputs_def = cfg['TestReader']['inputs_def']
            inputs_def['iterable'] = True
            feed_vars, loader = model.build_inputs(**inputs_def)
            test_fetches = model.test(feed_vars)
    infer_prog = infer_prog.clone(True)

    pruned_params = FLAGS.pruned_params
    assert (
        FLAGS.pruned_params is not None
    ), "FLAGS.pruned_params is empty!!! Please set it by '--pruned_params' option."
    pruned_params = FLAGS.pruned_params.strip().split(",")
    logger.info("pruned params: {}".format(pruned_params))
    pruned_ratios = [float(n) for n in FLAGS.pruned_ratios.strip().split(",")]
    logger.info("pruned ratios: {}".format(pruned_ratios))
    assert (len(pruned_params) == len(pruned_ratios)
            ), "The length of pruned params and pruned ratios should be equal."
    assert (pruned_ratios > [0] * len(pruned_ratios)
            and pruned_ratios < [1] * len(pruned_ratios)
            ), "The elements of pruned ratios should be in range (0, 1)."

    base_flops = flops(infer_prog)
    pruner = Pruner()
    infer_prog, _, _ = pruner.prune(infer_prog,
                                    fluid.global_scope(),
                                    params=pruned_params,
                                    ratios=pruned_ratios,
                                    place=place,
                                    only_graph=True)
    pruned_flops = flops(infer_prog)
    logger.info("pruned FLOPS: {}".format(
        float(base_flops - pruned_flops) / base_flops))
    reader = create_reader(cfg.TestReader, devices_num=1)
    loader.set_sample_list_generator(reader, place)

    exe.run(startup_prog)
    if cfg.weights:
        checkpoint.load_checkpoint(exe, infer_prog, cfg.weights)

    # parse infer fetches
    assert cfg.metric in ['COCO', 'VOC', 'OID', 'WIDERFACE'], \
            "unknown metric type {}".format(cfg.metric)
    extra_keys = []
    if cfg['metric'] in ['COCO', 'OID']:
        extra_keys = ['im_info', 'im_id', 'im_shape']
    if cfg['metric'] == 'VOC' or cfg['metric'] == 'WIDERFACE':
        extra_keys = ['im_id', 'im_shape']
    keys, values, _ = parse_fetches(test_fetches, infer_prog, extra_keys)

    # parse dataset category
    if cfg.metric == 'COCO':
        from ppdet.utils.coco_eval import bbox2out, mask2out, get_category_info
    if cfg.metric == 'OID':
        from ppdet.utils.oid_eval import bbox2out, get_category_info
    if cfg.metric == "VOC":
        from ppdet.utils.voc_eval import bbox2out, get_category_info
    if cfg.metric == "WIDERFACE":
        from ppdet.utils.widerface_eval_utils import bbox2out, get_category_info

    anno_file = dataset.get_anno()
    with_background = dataset.with_background
    use_default_label = dataset.use_default_label

    clsid2catid, catid2name = get_category_info(anno_file, with_background,
                                                use_default_label)

    # whether output bbox is normalized in model output layer
    is_bbox_normalized = False
    if hasattr(model, 'is_bbox_normalized') and \
            callable(model.is_bbox_normalized):
        is_bbox_normalized = model.is_bbox_normalized()

    imid2path = dataset.get_imid2path()
    for iter_id, data in enumerate(loader()):
        outs = exe.run(infer_prog,
                       feed=data,
                       fetch_list=values,
                       return_numpy=False)
        res = {
            k: (np.array(v), v.recursive_sequence_lengths())
            for k, v in zip(keys, outs)
        }
        logger.info('Infer iter {}'.format(iter_id))

        bbox_results = None
        mask_results = None
        if 'bbox' in res:
            bbox_results = bbox2out([res], clsid2catid, is_bbox_normalized)
        if 'mask' in res:
            mask_results = mask2out([res], clsid2catid,
                                    model.mask_head.resolution)

        # visualize result
        im_ids = res['im_id'][0]
        for im_id in im_ids:
            image_path = imid2path[int(im_id)]
            image = Image.open(image_path).convert('RGB')

            image = visualize_results(image, int(im_id), catid2name,
                                      FLAGS.draw_threshold, bbox_results,
                                      mask_results)

            save_name = get_save_image_name(FLAGS.output_dir, image_path)
            logger.info("Detection bbox results save in {}".format(save_name))
            image.save(save_name, quality=95)
    def test_prune(self):
        train_program = fluid.Program()
        startup_program = fluid.Program()
        with fluid.program_guard(train_program, startup_program):
            input = fluid.data(name="image", shape=[None, 3, 16, 16])
            conv1 = conv_bn_layer(input, 8, 3, "conv1")
            conv2 = conv_bn_layer(conv1, 8, 3, "conv2")
            sum1 = conv1 + conv2
            conv3 = conv_bn_layer(sum1, 8, 3, "conv3")
            conv4 = conv_bn_layer(conv3, 8, 3, "conv4")
            sum2 = conv4 + sum1
            conv5 = conv_bn_layer(sum2, 8, 3, "conv5")
            conv6 = conv_bn_layer(conv5, 8, 3, "conv6")
            feature = fluid.layers.reshape(conv6, [-1, 128, 16])
            predict = fluid.layers.fc(input=feature, size=10, act='softmax')
            label = fluid.data(name='label', shape=[None, 1], dtype='int64')
            print(label.shape)
            print(predict.shape)
            cost = fluid.layers.cross_entropy(input=predict, label=label)
            avg_cost = fluid.layers.mean(cost)
            adam_optimizer = fluid.optimizer.AdamOptimizer(0.01)
            adam_optimizer.minimize(avg_cost)

        place = fluid.CPUPlace()
        exe = fluid.Executor(place)

        scope = fluid.global_scope()
        exe.run(startup_program, scope=scope)
        criterion = 'bn_scale'
        pruner = Pruner(criterion)
        main_program, _, _ = pruner.prune(train_program,
                                          scope,
                                          params=["conv4_weights"],
                                          ratios=[0.5],
                                          place=place,
                                          lazy=False,
                                          only_graph=False,
                                          param_backup=None,
                                          param_shape_backup=None)

        x = numpy.random.random(size=(10, 3, 16, 16)).astype('float32')
        label = numpy.random.random(size=(10, 1)).astype('int64')
        loss_data, = exe.run(train_program,
                             feed={
                                 "image": x,
                                 "label": label
                             },
                             fetch_list=[cost.name])

        save_model(exe, main_program, 'model_file')
        pruned_program = fluid.Program()
        pruned_startup_program = fluid.Program()
        with fluid.program_guard(pruned_program, pruned_startup_program):
            input = fluid.data(name="image", shape=[None, 3, 16, 16])
            conv1 = conv_bn_layer(input, 8, 3, "conv1")
            conv2 = conv_bn_layer(conv1, 8, 3, "conv2")
            sum1 = conv1 + conv2
            conv3 = conv_bn_layer(sum1, 8, 3, "conv3")
            conv4 = conv_bn_layer(conv3, 8, 3, "conv4")
            sum2 = conv4 + sum1
            conv5 = conv_bn_layer(sum2, 8, 3, "conv5")
            conv6 = conv_bn_layer(conv5, 8, 3, "conv6")
        pruned_test_program = pruned_program.clone(for_test=True)
        exe.run(pruned_startup_program)
        load_model(exe, pruned_program, 'model_file')
        load_model(exe, pruned_test_program, 'model_file')
        shapes = {
            "conv1_weights": (4, 3, 3, 3),
            "conv2_weights": (4, 4, 3, 3),
            "conv3_weights": (8, 4, 3, 3),
            "conv4_weights": (4, 8, 3, 3),
            "conv5_weights": (8, 4, 3, 3),
            "conv6_weights": (8, 8, 3, 3)
        }

        for param in pruned_program.global_block().all_parameters():
            if "weights" in param.name:
                print("param: {}; param shape: {}".format(
                    param.name, param.shape))
                self.assertTrue(param.shape == shapes[param.name])
        for param in pruned_test_program.global_block().all_parameters():
            if "weights" in param.name:
                print("param: {}; param shape: {}".format(
                    param.name, param.shape))
                self.assertTrue(param.shape == shapes[param.name])
Esempio n. 14
0
    def test_prune(self):
        train_program = fluid.Program()
        startup_program = fluid.Program()
        with fluid.program_guard(train_program, startup_program):
            input = fluid.data(name="image", shape=[None, 3, 16, 16])
            conv1 = conv_bn_layer(input, 8, 3, "conv1")
            conv2 = conv_bn_layer(conv1, 8, 3, "conv2")
            sum1 = conv1 + conv2
            conv3 = conv_bn_layer(sum1, 8, 3, "conv3")
            conv4 = conv_bn_layer(conv3, 8, 3, "conv4")
            sum2 = conv4 + sum1
            conv5 = conv_bn_layer(sum2, 8, 3, "conv5")
            conv6 = conv_bn_layer(conv5, 8, 3, "conv6")

        place = fluid.CPUPlace()
        exe = fluid.Executor(place)

        scope = fluid.global_scope()
        exe.run(startup_program, scope=scope)
        criterion = 'bn_scale'
        pruner = Pruner(criterion)
        main_program, _, _ = pruner.prune(train_program,
                                          scope,
                                          params=["conv4_weights"],
                                          ratios=[0.5],
                                          place=place,
                                          lazy=False,
                                          only_graph=False,
                                          param_backup=None,
                                          param_shape_backup=None)

        x = numpy.random.random(size=(10, 3, 16, 16)).astype('float32')
        loss_data, = exe.run(train_program,
                             feed={"image": x},
                             fetch_list=[conv6.name])

        save_model(exe, main_program, 'model_file')
        pruned_program = fluid.Program()
        pruned_startup_program = fluid.Program()
        with fluid.program_guard(pruned_program, pruned_startup_program):
            input = fluid.data(name="image", shape=[None, 3, 16, 16])
            conv1 = conv_bn_layer(input, 8, 3, "conv1")
            conv2 = conv_bn_layer(conv1, 8, 3, "conv2")
            sum1 = conv1 + conv2
            conv3 = conv_bn_layer(sum1, 8, 3, "conv3")
            conv4 = conv_bn_layer(conv3, 8, 3, "conv4")
            sum2 = conv4 + sum1
            conv5 = conv_bn_layer(sum2, 8, 3, "conv5")
            conv6 = conv_bn_layer(conv5, 8, 3, "conv6")
        exe.run(pruned_startup_program)
        load_model(exe, pruned_program, 'model_file')
        shapes = {
            "conv1_weights": (4, 3, 3, 3),
            "conv2_weights": (4, 4, 3, 3),
            "conv3_weights": (8, 4, 3, 3),
            "conv4_weights": (4, 8, 3, 3),
            "conv5_weights": (8, 4, 3, 3),
            "conv6_weights": (8, 8, 3, 3)
        }

        for param in pruned_program.global_block().all_parameters():
            if "weights" in param.name:
                print("param: {}; param shape: {}".format(
                    param.name, param.shape))
                self.assertTrue(param.shape == shapes[param.name])
Esempio n. 15
0
    def test_prune(self):
        main_program = fluid.Program()
        startup_program = fluid.Program()
        #   X       X              O       X              O
        # conv1-->conv2-->sum1-->conv3-->conv4-->sum2-->conv5-->conv6
        #     |            ^ |                    ^
        #     |____________| |____________________|
        #
        # X: prune output channels
        # O: prune input channels
        with fluid.unique_name.guard():
            with fluid.program_guard(main_program, startup_program):
                input = fluid.data(name="image", shape=[None, 3, 16, 16])
                label = fluid.data(name='label', shape=[None, 1], dtype='int64')
                conv1 = conv_bn_layer(input, 8, 3, "conv1", act='relu')
                conv2 = conv_bn_layer(conv1, 8, 3, "conv2", act='leaky_relu')
                sum1 = conv1 + conv2
                conv3 = conv_bn_layer(sum1, 8, 3, "conv3", act='relu6')
                conv4 = conv_bn_layer(conv3, 8, 3, "conv4")
                sum2 = conv4 + sum1
                conv5 = conv_bn_layer(sum2, 8, 3, "conv5")

                flag = fluid.layers.fill_constant([1], value=1, dtype='int32')
                rand_flag = paddle.randint(2, dtype='int32')
                cond = fluid.layers.less_than(x=flag, y=rand_flag)
                cond_output = fluid.layers.create_global_var(
                    shape=[1],
                    value=0.0,
                    dtype='float32',
                    persistable=False,
                    name='cond_output')

                def cond_block1():
                    cond_conv = conv_bn_layer(conv5, 8, 3, "conv_cond1_1")
                    fluid.layers.assign(input=cond_conv, output=cond_output)

                def cond_block2():
                    cond_conv1 = conv_bn_layer(conv5, 8, 3, "conv_cond2_1")
                    cond_conv2 = conv_bn_layer(cond_conv1, 8, 3, "conv_cond2_2")
                    fluid.layers.assign(input=cond_conv2, output=cond_output)

                fluid.layers.cond(cond, cond_block1, cond_block2)
                sum3 = fluid.layers.sum([sum2, cond_output])

                conv6 = conv_bn_layer(sum3, 8, 3, "conv6")
                sub1 = conv6 - sum3
                mult = sub1 * sub1
                conv7 = conv_bn_layer(
                    mult, 8, 3, "Depthwise_Conv7", groups=8, use_cudnn=False)
                floored = fluid.layers.floor(conv7)
                scaled = fluid.layers.scale(floored)
                concated = fluid.layers.concat([scaled, mult], axis=1)
                conv8 = conv_bn_layer(concated, 8, 3, "conv8")
                predict = fluid.layers.fc(input=conv8, size=10, act='softmax')
                cost = fluid.layers.cross_entropy(input=predict, label=label)
                adam_optimizer = fluid.optimizer.AdamOptimizer(0.01)
                avg_cost = fluid.layers.mean(cost)
                adam_optimizer.minimize(avg_cost)

        params = []
        for param in main_program.all_parameters():
            if 'conv' in param.name:
                params.append(param.name)
        #TODO: To support pruning convolution before fc layer.
        params.remove('conv8_weights')

        place = fluid.CUDAPlace(0)
        exe = fluid.Executor(place)
        exe.run(startup_program)
        x = np.random.random(size=(10, 3, 16, 16)).astype('float32')
        label = np.random.random(size=(10, 1)).astype('int64')
        loss_data, = exe.run(main_program,
                             feed={"image": x,
                                   "label": label},
                             fetch_list=[cost.name])
        pruner = Pruner()
        main_program, _, _ = pruner.prune(
            main_program,
            fluid.global_scope(),
            params=params,
            ratios=[0.5] * len(params),
            place=place,
            lazy=False,
            only_graph=False,
            param_backup=None,
            param_shape_backup=None)

        loss_data, = exe.run(main_program,
                             feed={"image": x,
                                   "label": label},
                             fetch_list=[cost.name])
Esempio n. 16
0
def compress(args):
    train_reader = None
    test_reader = None
    if args.data == "mnist":
        transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])])
        train_dataset = paddle.vision.datasets.MNIST(mode='train',
                                                     backend="cv2",
                                                     transform=transform)
        val_dataset = paddle.vision.datasets.MNIST(mode='test',
                                                   backend="cv2",
                                                   transform=transform)
        class_dim = 10
        image_shape = "1,28,28"
    elif args.data == "imagenet":
        import imagenet_reader as reader
        train_dataset = reader.ImageNetDataset(mode='train')
        val_dataset = reader.ImageNetDataset(mode='val')
        class_dim = 1000
        image_shape = "3,224,224"
    else:
        raise ValueError("{} is not supported.".format(args.data))
    image_shape = [int(m) for m in image_shape.split(",")]
    assert args.model in model_list, "{} is not in lists: {}".format(
        args.model, model_list)
    places = paddle.static.cuda_places(
    ) if args.use_gpu else paddle.static.cpu_places()
    place = places[0]
    exe = paddle.static.Executor(place)
    image = paddle.static.data(name='image',
                               shape=[None] + image_shape,
                               dtype='float32')
    label = paddle.static.data(name='label', shape=[None, 1], dtype='int64')
    batch_size_per_card = int(args.batch_size / len(places))
    train_loader = paddle.io.DataLoader(train_dataset,
                                        places=places,
                                        feed_list=[image, label],
                                        drop_last=True,
                                        batch_size=batch_size_per_card,
                                        shuffle=True,
                                        return_list=False,
                                        use_shared_memory=True,
                                        num_workers=16)
    valid_loader = paddle.io.DataLoader(val_dataset,
                                        places=place,
                                        feed_list=[image, label],
                                        drop_last=False,
                                        return_list=False,
                                        use_shared_memory=True,
                                        batch_size=batch_size_per_card,
                                        shuffle=False)
    step_per_epoch = int(np.ceil(len(train_dataset) * 1. / args.batch_size))

    # model definition
    model = models.__dict__[args.model]()
    out = model.net(input=image, class_dim=class_dim)
    cost = paddle.nn.functional.loss.cross_entropy(input=out, label=label)
    avg_cost = paddle.mean(x=cost)
    acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1)
    acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5)
    val_program = paddle.static.default_main_program().clone(for_test=True)
    opt, learning_rate = create_optimizer(args, step_per_epoch)
    opt.minimize(avg_cost)

    exe.run(paddle.static.default_startup_program())

    if args.pretrained_model:

        def if_exist(var):
            return os.path.exists(os.path.join(args.pretrained_model,
                                               var.name))

        _logger.info("Load pretrained model from {}".format(
            args.pretrained_model))
        paddle.static.load(paddle.static.default_main_program(),
                           args.pretrained_model, exe)

    def test(epoch, program):
        acc_top1_ns = []
        acc_top5_ns = []
        for batch_id, data in enumerate(valid_loader):
            start_time = time.time()
            acc_top1_n, acc_top5_n = exe.run(
                program, feed=data, fetch_list=[acc_top1.name, acc_top5.name])
            end_time = time.time()
            if batch_id % args.log_period == 0:
                _logger.info(
                    "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}"
                    .format(epoch, batch_id, np.mean(acc_top1_n),
                            np.mean(acc_top5_n), end_time - start_time))
            acc_top1_ns.append(np.mean(acc_top1_n))
            acc_top5_ns.append(np.mean(acc_top5_n))

        _logger.info(
            "Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format(
                epoch, np.mean(np.array(acc_top1_ns)),
                np.mean(np.array(acc_top5_ns))))

    def train(epoch, program):
        for batch_id, data in enumerate(train_loader):
            start_time = time.time()
            loss_n, acc_top1_n, acc_top5_n = exe.run(
                train_program,
                feed=data,
                fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name])
            end_time = time.time()
            loss_n = np.mean(loss_n)
            acc_top1_n = np.mean(acc_top1_n)
            acc_top5_n = np.mean(acc_top5_n)
            if batch_id % args.log_period == 0:
                _logger.info(
                    "epoch[{}]-batch[{}] lr: {:.6f} - loss: {}; acc_top1: {}; acc_top5: {}; time: {}"
                    .format(epoch, batch_id, learning_rate.get_lr(), loss_n,
                            acc_top1_n, acc_top5_n, end_time - start_time))
            learning_rate.step()
            batch_id += 1

    test(0, val_program)
    params = get_pruned_params(args, paddle.static.default_main_program())
    _logger.info("FLOPs before pruning: {}".format(
        flops(paddle.static.default_main_program())))
    pruner = Pruner(args.criterion)
    pruned_val_program, _, _ = pruner.prune(val_program,
                                            paddle.static.global_scope(),
                                            params=params,
                                            ratios=[args.pruned_ratio] *
                                            len(params),
                                            place=place,
                                            only_graph=True)

    pruned_program, _, _ = pruner.prune(paddle.static.default_main_program(),
                                        paddle.static.global_scope(),
                                        params=params,
                                        ratios=[args.pruned_ratio] *
                                        len(params),
                                        place=place)
    _logger.info("FLOPs after pruning: {}".format(flops(pruned_program)))

    build_strategy = paddle.static.BuildStrategy()
    exec_strategy = paddle.static.ExecutionStrategy()
    train_program = paddle.static.CompiledProgram(
        pruned_program).with_data_parallel(loss_name=avg_cost.name,
                                           build_strategy=build_strategy,
                                           exec_strategy=exec_strategy)

    for i in range(args.num_epochs):
        train(i, train_program)
        if (i + 1) % args.test_period == 0:
            test(i, pruned_val_program)
            save_model(exe, pruned_val_program,
                       os.path.join(args.model_path, str(i)))
        if args.save_inference:
            infer_model_path = os.path.join(args.model_path, "infer_models",
                                            str(i))
            paddle.static.save_inference_model(infer_model_path, [image],
                                               [out],
                                               exe,
                                               program=pruned_val_program)
            _logger.info(
                "Saved inference model into [{}]".format(infer_model_path))
Esempio n. 17
0
def sensitivity(program,
                place,
                param_names,
                eval_func,
                sensitivities_file=None,
                pruned_ratios=None):
    scope = fluid.global_scope()
    graph = GraphWrapper(program)
    sensitivities = load_sensitivities(sensitivities_file)

    if pruned_ratios is None:
        pruned_ratios = np.arange(0.1, 1, step=0.1)

    total_evaluate_iters = 0
    for name in param_names:
        if name not in sensitivities:
            sensitivities[name] = {}
            total_evaluate_iters += len(list(pruned_ratios))
        else:
            total_evaluate_iters += (len(list(pruned_ratios)) -
                                     len(sensitivities[name]))
    eta = '-'
    start_time = time.time()
    baseline = eval_func(graph.program)
    cost = time.time() - start_time
    eta = cost * (total_evaluate_iters - 1)
    current_iter = 1
    for name in sensitivities:
        for ratio in pruned_ratios:
            if ratio in sensitivities[name]:
                logging.debug('{}, {} has computed.'.format(name, ratio))
                continue

            progress = float(current_iter) / total_evaluate_iters
            progress = "%.2f%%" % (progress * 100)
            logging.info(
                "Total evaluate iters={}, current={}, progress={}, eta={}".
                format(
                    total_evaluate_iters, current_iter, progress,
                    seconds_to_hms(
                        int(cost * (total_evaluate_iters - current_iter)))),
                use_color=True)
            current_iter += 1

            pruner = Pruner()
            logging.info("sensitive - param: {}; ratios: {}".format(
                name, ratio))
            pruned_program, param_backup, _ = pruner.prune(
                program=graph.program,
                scope=scope,
                params=[name],
                ratios=[ratio],
                place=place,
                lazy=True,
                only_graph=False,
                param_backup=True)
            pruned_metric = eval_func(pruned_program)
            loss = (baseline - pruned_metric) / baseline
            logging.info("pruned param: {}; {}; loss={}".format(
                name, ratio, loss))

            sensitivities[name][ratio] = loss

            with open(sensitivities_file, 'wb') as f:
                pickle.dump(sensitivities, f)

            for param_name in param_backup.keys():
                param_t = scope.find_var(param_name).get_tensor()
                param_t.set(param_backup[param_name], place)
    return sensitivities
Esempio n. 18
0
def compress(args):

    train_reader = None
    test_reader = None
    if args.data == "mnist":
        import paddle.dataset.mnist as reader
        train_reader = reader.train()
        val_reader = reader.test()
        class_dim = 10
        image_shape = "1,28,28"
    elif args.data == "imagenet":
        import imagenet_reader as reader
        train_reader = reader.train()
        val_reader = reader.val()
        class_dim = 1000
        image_shape = "3,224,224"
    else:
        raise ValueError("{} is not supported.".format(args.data))

    image_shape = [int(m) for m in image_shape.split(",")]
    assert args.model in model_list, "{} is not in lists: {}".format(
        args.model, model_list)
    image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
    # model definition
    model = models.__dict__[args.model]()
    out = model.net(input=image, class_dim=class_dim)
    cost = fluid.layers.cross_entropy(input=out, label=label)
    avg_cost = fluid.layers.mean(x=cost)
    acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
    acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
    val_program = fluid.default_main_program().clone(for_test=True)
    opt = create_optimizer(args)
    opt.minimize(avg_cost)
    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    if args.pretrained_model:

        def if_exist(var):
            return os.path.exists(os.path.join(args.pretrained_model,
                                               var.name))


#        fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist)

    val_reader = paddle.batch(val_reader, batch_size=args.batch_size)
    train_reader = paddle.batch(train_reader,
                                batch_size=args.batch_size,
                                drop_last=True)

    train_feeder = feeder = fluid.DataFeeder([image, label], place)
    val_feeder = feeder = fluid.DataFeeder([image, label],
                                           place,
                                           program=val_program)

    def test(epoch, program):
        batch_id = 0
        acc_top1_ns = []
        acc_top5_ns = []
        for data in val_reader():
            start_time = time.time()
            acc_top1_n, acc_top5_n = exe.run(
                program,
                feed=train_feeder.feed(data),
                fetch_list=[acc_top1.name, acc_top5.name])
            end_time = time.time()
            if batch_id % args.log_period == 0:
                _logger.info(
                    "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}"
                    .format(epoch, batch_id, np.mean(acc_top1_n),
                            np.mean(acc_top5_n), end_time - start_time))
            acc_top1_ns.append(np.mean(acc_top1_n))
            acc_top5_ns.append(np.mean(acc_top5_n))
            batch_id += 1

        _logger.info(
            "Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format(
                epoch, np.mean(np.array(acc_top1_ns)),
                np.mean(np.array(acc_top5_ns))))
        return np.mean(np.array(acc_top1_ns))

    def train(epoch, program):

        build_strategy = fluid.BuildStrategy()
        exec_strategy = fluid.ExecutionStrategy()
        train_program = fluid.compiler.CompiledProgram(
            program).with_data_parallel(loss_name=avg_cost.name,
                                        build_strategy=build_strategy,
                                        exec_strategy=exec_strategy)

        batch_id = 0
        for data in train_reader():
            start_time = time.time()
            loss_n, acc_top1_n, acc_top5_n = exe.run(
                train_program,
                feed=train_feeder.feed(data),
                fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name])
            end_time = time.time()
            loss_n = np.mean(loss_n)
            acc_top1_n = np.mean(acc_top1_n)
            acc_top5_n = np.mean(acc_top5_n)
            if batch_id % args.log_period == 0:
                _logger.info(
                    "epoch[{}]-batch[{}] - loss: {}; acc_top1: {}; acc_top5: {}; time: {}"
                    .format(epoch, batch_id, loss_n, acc_top1_n, acc_top5_n,
                            end_time - start_time))
            batch_id += 1

    params = []
    for param in fluid.default_main_program().global_block().all_parameters():
        if "_sep_weights" in param.name:
            params.append(param.name)

    pruned_program_iter = fluid.default_main_program()
    pruned_val_program_iter = val_program

    for ratios in ratiolist:
        pruner = Pruner()
        pruned_val_program_iter = pruner.prune(pruned_val_program_iter,
                                               fluid.global_scope(),
                                               params=params,
                                               ratios=ratios,
                                               place=place,
                                               only_graph=True)

        pruned_program_iter = pruner.prune(pruned_program_iter,
                                           fluid.global_scope(),
                                           params=params,
                                           ratios=ratios,
                                           place=place)
        print("fops after pruning: {}".format(flops(pruned_program_iter)))
    fluid.io.load_vars(exe,
                       args.pretrained_model,
                       main_program=pruned_program_iter,
                       predicate=if_exist)

    pruner = AutoPruner(pruned_val_program_iter,
                        fluid.global_scope(),
                        place,
                        params=params,
                        init_ratios=[0.1] * len(params),
                        pruned_flops=0.1,
                        pruned_latency=None,
                        server_addr=("", 0),
                        init_temperature=100,
                        reduce_rate=0.85,
                        max_try_times=300,
                        max_client_num=10,
                        search_steps=100,
                        max_ratios=0.2,
                        min_ratios=0.,
                        is_server=True,
                        key="auto_pruner")

    while True:
        pruned_program, pruned_val_program = pruner.prune(
            pruned_program_iter, pruned_val_program_iter)
        for i in range(0):
            train(i, pruned_program)
        score = test(0, pruned_val_program)
        pruner.reward(score)
Esempio n. 19
0
    def test_concat(self):
        main_program = fluid.Program()
        startup_program = fluid.Program()
        #                                  X
        # conv1   conv2-->concat         conv3-->sum-->out
        #     |            ^ |                    ^
        #     |____________| |____________________|
        #
        with fluid.program_guard(main_program, startup_program):
            input = fluid.data(name="image", shape=[None, 3, 16, 16])
            conv1 = conv_bn_layer(input, 8, 3, "conv1")
            conv2 = conv_bn_layer(input, 8, 3, "conv2", sync_bn=True)
            tmp = fluid.layers.concat([conv1, conv2], axis=1)
            conv3 = conv_bn_layer(input, 16, 3, "conv3", bias=None)
            out = conv3 + tmp

        shapes = {}
        for param in main_program.global_block().all_parameters():
            shapes[param.name] = param.shape

        place = fluid.CPUPlace()
        exe = fluid.Executor(place)
        scope = fluid.Scope()
        exe.run(startup_program, scope=scope)
        pruner = Pruner()
        # test backward search of concat
        pruned_program, _, _ = pruner.prune(main_program,
                                            scope,
                                            params=["conv3_weights"],
                                            ratios=[0.5],
                                            place=place,
                                            lazy=False,
                                            only_graph=True,
                                            param_backup=None,
                                            param_shape_backup=None)
        shapes = {
            "conv3_weights": (8, 3, 3, 3),
            "conv2_weights": (4, 3, 3, 3),
            "conv1_weights": (4, 3, 3, 3)
        }
        for param in pruned_program.global_block().all_parameters():
            if "weights" in param.name and "conv2d" in param.name:
                self.assertTrue(shapes[param.name] == param.shape)

        # test forward search of concat
        pruned_program, _, _ = pruner.prune(
            main_program,
            scope,
            params=["conv1_weights", "conv2_weights"],
            ratios=[0.5, 0.5],
            place=place,
            lazy=False,
            only_graph=False,
            param_backup=None,
            param_shape_backup=None)

        shapes = {
            "conv1_weights": (4, 3, 3, 3),
            "conv1_bn_scale": (4, ),
            "conv1_bn_variance": (4, ),
            "conv1_bn_mean": (4, ),
            "conv1_bn_offset": (4, ),
            "conv2_weights": (4, 3, 3, 3),
            "sync_batch_norm_0.w_0": (4, ),
            "sync_batch_norm_0.w_1": (4, ),
            "conv2_bn_scale": (4, ),
            "conv2_bn_offset": (4, ),
            "conv3_weights": (8, 3, 3, 3),
            "conv3_bn_mean": (8, ),
            "conv3_bn_offset": (8, ),
            "conv3_bn_scale": (8, ),
            "conv3_bn_variance": (8, ),
            "conv3_out.b_0": (8, ),
        }

        for param in pruned_program.global_block().all_parameters():
            if "weights" in param.name and "conv2d" in param.name:
                self.assertTrue(shapes[param.name] == param.shape)
Esempio n. 20
0
def train(cfg):
    startup_prog = fluid.Program()
    train_prog = fluid.Program()
    drop_last = True

    dataset = SegDataset(file_list=cfg.DATASET.TRAIN_FILE_LIST,
                         mode=ModelPhase.TRAIN,
                         shuffle=True,
                         data_dir=cfg.DATASET.DATA_DIR)

    def data_generator():
        if args.use_mpio:
            data_gen = dataset.multiprocess_generator(
                num_processes=cfg.DATALOADER.NUM_WORKERS,
                max_queue_size=cfg.DATALOADER.BUF_SIZE)
        else:
            data_gen = dataset.generator()

        batch_data = []
        for b in data_gen:
            batch_data.append(b)
            if len(batch_data) == (cfg.BATCH_SIZE // cfg.NUM_TRAINERS):
                for item in batch_data:
                    yield item[0], item[1], item[2]
                batch_data = []
        # If use sync batch norm strategy, drop last batch if number of samples
        # in batch_data is less then cfg.BATCH_SIZE to avoid NCCL hang issues
        if not cfg.TRAIN.SYNC_BATCH_NORM:
            for item in batch_data:
                yield item[0], item[1], item[2]

    # Get device environment
    # places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places()
    # place = places[0]
    gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0))
    place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace()
    places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places()

    # Get number of GPU
    dev_count = cfg.NUM_TRAINERS if cfg.NUM_TRAINERS > 1 else len(places)
    print_info("#Device count: {}".format(dev_count))

    # Make sure BATCH_SIZE can divided by GPU cards
    assert cfg.BATCH_SIZE % dev_count == 0, (
        'BATCH_SIZE:{} not divisble by number of GPUs:{}'.format(
            cfg.BATCH_SIZE, dev_count))
    # If use multi-gpu training mode, batch data will allocated to each GPU evenly
    batch_size_per_dev = cfg.BATCH_SIZE // dev_count
    print_info("batch_size_per_dev: {}".format(batch_size_per_dev))

    data_loader, avg_loss, lr, pred, grts, masks = build_model(
        train_prog, startup_prog, phase=ModelPhase.TRAIN)
    data_loader.set_sample_generator(data_generator,
                                     batch_size=batch_size_per_dev,
                                     drop_last=drop_last)

    exe = fluid.Executor(place)
    exe.run(startup_prog)

    exec_strategy = fluid.ExecutionStrategy()
    # Clear temporary variables every 100 iteration
    if args.use_gpu:
        exec_strategy.num_threads = fluid.core.get_cuda_device_count()
    exec_strategy.num_iteration_per_drop_scope = 100
    build_strategy = fluid.BuildStrategy()

    if cfg.NUM_TRAINERS > 1 and args.use_gpu:
        dist_utils.prepare_for_multi_process(exe, build_strategy, train_prog)
        exec_strategy.num_threads = 1

    if cfg.TRAIN.SYNC_BATCH_NORM and args.use_gpu:
        if dev_count > 1:
            # Apply sync batch norm strategy
            print_info("Sync BatchNorm strategy is effective.")
            build_strategy.sync_batch_norm = True
        else:
            print_info(
                "Sync BatchNorm strategy will not be effective if GPU device"
                " count <= 1")

    pruned_params = cfg.SLIM.PRUNE_PARAMS.strip().split(',')
    pruned_ratios = cfg.SLIM.PRUNE_RATIOS

    if isinstance(pruned_ratios, float):
        pruned_ratios = [pruned_ratios] * len(pruned_params)
    elif isinstance(pruned_ratios, (list, tuple)):
        pruned_ratios = list(pruned_ratios)
    else:
        raise ValueError(
            'expect SLIM.PRUNE_RATIOS type is float, list, tuple, '
            'but received {}'.format(type(pruned_ratios)))

    # Resume training
    begin_epoch = cfg.SOLVER.BEGIN_EPOCH
    if cfg.TRAIN.RESUME_MODEL_DIR:
        begin_epoch = load_checkpoint(exe, train_prog)
    # Load pretrained model
    elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR):
        print_info('Pretrained model dir: ', cfg.TRAIN.PRETRAINED_MODEL_DIR)
        load_vars = []
        load_fail_vars = []

        def var_shape_matched(var, shape):
            """
            Check whehter persitable variable shape is match with current network
            """
            var_exist = os.path.exists(
                os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name))
            if var_exist:
                var_shape = parse_shape_from_file(
                    os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name))
                return var_shape == shape
            return False

        for x in train_prog.list_vars():
            if isinstance(x, fluid.framework.Parameter):
                shape = tuple(fluid.global_scope().find_var(
                    x.name).get_tensor().shape())
                if var_shape_matched(x, shape):
                    load_vars.append(x)
                else:
                    load_fail_vars.append(x)

        fluid.io.load_vars(exe,
                           dirname=cfg.TRAIN.PRETRAINED_MODEL_DIR,
                           vars=load_vars)
        for var in load_vars:
            print_info("Parameter[{}] loaded sucessfully!".format(var.name))
        for var in load_fail_vars:
            print_info(
                "Parameter[{}] don't exist or shape does not match current network, skip"
                " to load it.".format(var.name))
        print_info("{}/{} pretrained parameters loaded successfully!".format(
            len(load_vars),
            len(load_vars) + len(load_fail_vars)))
    else:
        print_info(
            'Pretrained model dir {} not exists, training from scratch...'.
            format(cfg.TRAIN.PRETRAINED_MODEL_DIR))

    fetch_list = [avg_loss.name, lr.name]
    if args.debug:
        # Fetch more variable info and use streaming confusion matrix to
        # calculate IoU results if in debug mode
        np.set_printoptions(precision=4,
                            suppress=True,
                            linewidth=160,
                            floatmode="fixed")
        fetch_list.extend([pred.name, grts.name, masks.name])
        cm = ConfusionMatrix(cfg.DATASET.NUM_CLASSES, streaming=True)

    if args.use_tb:
        if not args.tb_log_dir:
            print_info("Please specify the log directory by --tb_log_dir.")
            exit(1)

        from tb_paddle import SummaryWriter
        log_writer = SummaryWriter(args.tb_log_dir)

    pruner = Pruner()
    train_prog = pruner.prune(train_prog,
                              fluid.global_scope(),
                              params=pruned_params,
                              ratios=pruned_ratios,
                              place=place,
                              only_graph=False)[0]

    compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel(
        loss_name=avg_loss.name,
        exec_strategy=exec_strategy,
        build_strategy=build_strategy)

    global_step = 0
    all_step = cfg.DATASET.TRAIN_TOTAL_IMAGES // cfg.BATCH_SIZE
    if cfg.DATASET.TRAIN_TOTAL_IMAGES % cfg.BATCH_SIZE and drop_last != True:
        all_step += 1
    all_step *= (cfg.SOLVER.NUM_EPOCHS - begin_epoch + 1)

    avg_loss = 0.0
    timer = Timer()
    timer.start()
    if begin_epoch > cfg.SOLVER.NUM_EPOCHS:
        raise ValueError((
            "begin epoch[{}] is larger than cfg.SOLVER.NUM_EPOCHS[{}]").format(
                begin_epoch, cfg.SOLVER.NUM_EPOCHS))

    if args.use_mpio:
        print_info("Use multiprocess reader")
    else:
        print_info("Use multi-thread reader")

    for epoch in range(begin_epoch, cfg.SOLVER.NUM_EPOCHS + 1):
        data_loader.start()
        while True:
            try:
                if args.debug:
                    # Print category IoU and accuracy to check whether the
                    # traning process is corresponed to expectation
                    loss, lr, pred, grts, masks = exe.run(
                        program=compiled_train_prog,
                        fetch_list=fetch_list,
                        return_numpy=True)
                    cm.calculate(pred, grts, masks)
                    avg_loss += np.mean(np.array(loss))
                    global_step += 1

                    if global_step % args.log_steps == 0:
                        speed = args.log_steps / timer.elapsed_time()
                        avg_loss /= args.log_steps
                        category_acc, mean_acc = cm.accuracy()
                        category_iou, mean_iou = cm.mean_iou()

                        print_info((
                            "epoch={} step={} lr={:.5f} loss={:.4f} acc={:.5f} mIoU={:.5f} step/sec={:.3f} | ETA {}"
                        ).format(epoch, global_step, lr[0], avg_loss, mean_acc,
                                 mean_iou, speed,
                                 calculate_eta(all_step - global_step, speed)))
                        print_info("Category IoU: ", category_iou)
                        print_info("Category Acc: ", category_acc)
                        if args.use_tb:
                            log_writer.add_scalar('Train/mean_iou', mean_iou,
                                                  global_step)
                            log_writer.add_scalar('Train/mean_acc', mean_acc,
                                                  global_step)
                            log_writer.add_scalar('Train/loss', avg_loss,
                                                  global_step)
                            log_writer.add_scalar('Train/lr', lr[0],
                                                  global_step)
                            log_writer.add_scalar('Train/step/sec', speed,
                                                  global_step)
                        sys.stdout.flush()
                        avg_loss = 0.0
                        cm.zero_matrix()
                        timer.restart()
                else:
                    # If not in debug mode, avoid unnessary log and calculate
                    loss, lr = exe.run(program=compiled_train_prog,
                                       fetch_list=fetch_list,
                                       return_numpy=True)
                    avg_loss += np.mean(np.array(loss))
                    global_step += 1

                    if global_step % args.log_steps == 0 and cfg.TRAINER_ID == 0:
                        avg_loss /= args.log_steps
                        speed = args.log_steps / timer.elapsed_time()
                        print((
                            "epoch={} step={} lr={:.5f} loss={:.4f} step/sec={:.3f} | ETA {}"
                        ).format(epoch, global_step, lr[0], avg_loss, speed,
                                 calculate_eta(all_step - global_step, speed)))
                        if args.use_tb:
                            log_writer.add_scalar('Train/loss', avg_loss,
                                                  global_step)
                            log_writer.add_scalar('Train/lr', lr[0],
                                                  global_step)
                            log_writer.add_scalar('Train/speed', speed,
                                                  global_step)
                        sys.stdout.flush()
                        avg_loss = 0.0
                        timer.restart()

            except fluid.core.EOFException:
                data_loader.reset()
                break
            except Exception as e:
                print(e)

        if epoch % cfg.TRAIN.SNAPSHOT_EPOCH == 0 and cfg.TRAINER_ID == 0:

            ckpt_dir = save_prune_checkpoint(exe, train_prog, epoch)

            if args.do_eval:
                print("Evaluation start")
                _, mean_iou, _, mean_acc = evaluate(cfg=cfg,
                                                    ckpt_dir=ckpt_dir,
                                                    use_gpu=args.use_gpu,
                                                    use_mpio=args.use_mpio)
                if args.use_tb:
                    log_writer.add_scalar('Evaluate/mean_iou', mean_iou,
                                          global_step)
                    log_writer.add_scalar('Evaluate/mean_acc', mean_acc,
                                          global_step)

            # Use Tensorboard to visualize results
            if args.use_tb and cfg.DATASET.VIS_FILE_LIST is not None:
                visualize(cfg=cfg,
                          use_gpu=args.use_gpu,
                          vis_file_list=cfg.DATASET.VIS_FILE_LIST,
                          vis_dir="visual",
                          ckpt_dir=ckpt_dir,
                          log_writer=log_writer)

    # save final model
    if cfg.TRAINER_ID == 0:
        save_prune_checkpoint(exe, train_prog, 'final')
Esempio n. 21
0
def main():
    env = os.environ
    cfg = load_config(FLAGS.config)
    merge_config(FLAGS.opt)
    check_config(cfg)
    # check if set use_gpu=True in paddlepaddle cpu version
    check_gpu(cfg.use_gpu)
    check_version()

    main_arch = cfg.architecture

    if cfg.use_gpu:
        devices_num = fluid.core.get_cuda_device_count()
    else:
        devices_num = int(os.environ.get('CPU_NUM', 1))

    if 'FLAGS_selected_gpus' in env:
        device_id = int(env['FLAGS_selected_gpus'])
    else:
        device_id = 0
    place = fluid.CUDAPlace(device_id) if cfg.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)

    # build program
    model = create(main_arch)
    inputs_def = cfg['TrainReader']['inputs_def']
    train_feed_vars, train_loader = model.build_inputs(**inputs_def)
    train_fetches = model.train(train_feed_vars)
    loss = train_fetches['loss']

    start_iter = 0
    train_reader = create_reader(cfg.TrainReader,
                                 (cfg.max_iters - start_iter) * devices_num,
                                 cfg)
    # When iterable mode, set set_sample_list_generator(train_reader, place)
    train_loader.set_sample_list_generator(train_reader)

    eval_prog = fluid.Program()
    with fluid.program_guard(eval_prog, fluid.default_startup_program()):
        with fluid.unique_name.guard():
            model = create(main_arch)
            inputs_def = cfg['EvalReader']['inputs_def']
            test_feed_vars, eval_loader = model.build_inputs(**inputs_def)
            fetches = model.eval(test_feed_vars)
    eval_prog = eval_prog.clone(True)

    eval_reader = create_reader(cfg.EvalReader)
    # When iterable mode, set set_sample_list_generator(eval_reader, place)
    eval_loader.set_sample_list_generator(eval_reader)

    teacher_cfg = load_config(FLAGS.teacher_config)
    merge_config(FLAGS.opt)
    teacher_arch = teacher_cfg.architecture
    teacher_program = fluid.Program()
    teacher_startup_program = fluid.Program()

    with fluid.program_guard(teacher_program, teacher_startup_program):
        with fluid.unique_name.guard():
            teacher_feed_vars = OrderedDict()
            for name, var in train_feed_vars.items():
                teacher_feed_vars[name] = teacher_program.global_block(
                )._clone_variable(var, force_persistable=False)
            model = create(teacher_arch)
            train_fetches = model.train(teacher_feed_vars)
            teacher_loss = train_fetches['loss']

    exe.run(teacher_startup_program)
    assert FLAGS.teacher_pretrained, "teacher_pretrained should be set"
    checkpoint.load_params(exe, teacher_program, FLAGS.teacher_pretrained)
    teacher_program = teacher_program.clone(for_test=True)

    target_number = len(model.yolo_head.anchor_masks)

    data_name_map = {
        'image': 'image',
        'gt_bbox': 'gt_bbox',
        'gt_class': 'gt_class',
        'gt_score': 'gt_score'
    }
    for i in range(target_number):
        data_name_map['target{}'.format(i)] = 'target{}'.format(i)

    merge(teacher_program, fluid.default_main_program(), data_name_map, place)

    output_names = [
        [
            'strided_slice_0.tmp_0', 'strided_slice_1.tmp_0',
            'strided_slice_2.tmp_0', 'strided_slice_3.tmp_0',
            'strided_slice_4.tmp_0', 'transpose_0.tmp_0'
        ],
        [
            'strided_slice_5.tmp_0', 'strided_slice_6.tmp_0',
            'strided_slice_7.tmp_0', 'strided_slice_8.tmp_0',
            'strided_slice_9.tmp_0', 'transpose_2.tmp_0'
        ],
        [
            'strided_slice_10.tmp_0', 'strided_slice_11.tmp_0',
            'strided_slice_12.tmp_0', 'strided_slice_13.tmp_0',
            'strided_slice_14.tmp_0', 'transpose_4.tmp_0'
        ],
    ]

    yolo_output_names = []
    for i in range(target_number):
        yolo_output_names.extend(output_names[i])

    assert cfg.use_fine_grained_loss, \
        "Only support use_fine_grained_loss=True, Please set it in config file or '-o use_fine_grained_loss=true'"
    distill_loss = split_distill(yolo_output_names, 1000, target_number)
    loss = distill_loss + loss
    lr_builder = create('LearningRate')
    optim_builder = create('OptimizerBuilder')
    lr = lr_builder()
    opt = optim_builder(lr)
    opt.minimize(loss)

    exe.run(fluid.default_startup_program())
    checkpoint.load_params(exe, fluid.default_main_program(),
                           cfg.pretrain_weights)


    assert FLAGS.pruned_params is not None, \
        "FLAGS.pruned_params is empty!!! Please set it by '--pruned_params' option."
    pruned_params = FLAGS.pruned_params.strip().split(",")
    logger.info("pruned params: {}".format(pruned_params))
    pruned_ratios = [float(n) for n in FLAGS.pruned_ratios.strip().split(",")]
    logger.info("pruned ratios: {}".format(pruned_ratios))
    assert len(pruned_params) == len(pruned_ratios), \
        "The length of pruned params and pruned ratios should be equal."
    assert pruned_ratios > [0] * len(pruned_ratios) and pruned_ratios < [1] * len(pruned_ratios), \
        "The elements of pruned ratios should be in range (0, 1)."

    assert FLAGS.prune_criterion in ['l1_norm', 'geometry_median'], \
            "unsupported prune criterion {}".format(FLAGS.prune_criterion)
    pruner = Pruner(criterion=FLAGS.prune_criterion)
    distill_prog = pruner.prune(fluid.default_main_program(),
                                fluid.global_scope(),
                                params=pruned_params,
                                ratios=pruned_ratios,
                                place=place,
                                only_graph=False)[0]

    base_flops = flops(eval_prog)
    eval_prog = pruner.prune(eval_prog,
                             fluid.global_scope(),
                             params=pruned_params,
                             ratios=pruned_ratios,
                             place=place,
                             only_graph=True)[0]
    pruned_flops = flops(eval_prog)
    logger.info("FLOPs -{}; total FLOPs: {}; pruned FLOPs: {}".format(
        float(base_flops - pruned_flops) / base_flops, base_flops,
        pruned_flops))

    build_strategy = fluid.BuildStrategy()
    build_strategy.fuse_all_reduce_ops = False
    build_strategy.fuse_all_optimizer_ops = False
    build_strategy.fuse_elewise_add_act_ops = True
    # only enable sync_bn in multi GPU devices
    sync_bn = getattr(model.backbone, 'norm_type', None) == 'sync_bn'
    build_strategy.sync_batch_norm = sync_bn and devices_num > 1 \
        and cfg.use_gpu

    exec_strategy = fluid.ExecutionStrategy()
    # iteration number when CompiledProgram tries to drop local execution scopes.
    # Set it to be 1 to save memory usages, so that unused variables in
    # local execution scopes can be deleted after each iteration.
    exec_strategy.num_iteration_per_drop_scope = 1

    parallel_main = fluid.CompiledProgram(distill_prog).with_data_parallel(
        loss_name=loss.name,
        build_strategy=build_strategy,
        exec_strategy=exec_strategy)
    compiled_eval_prog = fluid.CompiledProgram(eval_prog)

    # parse eval fetches
    extra_keys = []
    if cfg.metric == 'COCO':
        extra_keys = ['im_info', 'im_id', 'im_shape']
    if cfg.metric == 'VOC':
        extra_keys = ['gt_bbox', 'gt_class', 'is_difficult']
    eval_keys, eval_values, eval_cls = parse_fetches(fetches, eval_prog,
                                                     extra_keys)

    # whether output bbox is normalized in model output layer
    is_bbox_normalized = False
    if hasattr(model, 'is_bbox_normalized') and \
            callable(model.is_bbox_normalized):
        is_bbox_normalized = model.is_bbox_normalized()
    map_type = cfg.map_type if 'map_type' in cfg else '11point'
    best_box_ap_list = [0.0, 0]  #[map, iter]
    cfg_name = os.path.basename(FLAGS.config).split('.')[0]
    save_dir = os.path.join(cfg.save_dir, cfg_name)

    train_loader.start()
    for step_id in range(start_iter, cfg.max_iters):
        teacher_loss_np, distill_loss_np, loss_np, lr_np = exe.run(
            parallel_main,
            fetch_list=[
                'teacher_' + teacher_loss.name, distill_loss.name, loss.name,
                lr.name
            ])
        if step_id % cfg.log_iter == 0:
            logger.info(
                "step {} lr {:.6f}, loss {:.6f}, distill_loss {:.6f}, teacher_loss {:.6f}"
                .format(step_id, lr_np[0], loss_np[0], distill_loss_np[0],
                        teacher_loss_np[0]))
        if step_id % cfg.snapshot_iter == 0 and step_id != 0 or step_id == cfg.max_iters - 1:
            save_name = str(
                step_id) if step_id != cfg.max_iters - 1 else "model_final"
            checkpoint.save(exe, distill_prog,
                            os.path.join(save_dir, save_name))
            # eval
            results = eval_run(exe, compiled_eval_prog, eval_loader, eval_keys,
                               eval_values, eval_cls, cfg)
            resolution = None
            box_ap_stats = eval_results(results, cfg.metric, cfg.num_classes,
                                        resolution, is_bbox_normalized,
                                        FLAGS.output_eval, map_type,
                                        cfg['EvalReader']['dataset'])

            if box_ap_stats[0] > best_box_ap_list[0]:
                best_box_ap_list[0] = box_ap_stats[0]
                best_box_ap_list[1] = step_id
                checkpoint.save(exe, distill_prog,
                                os.path.join("./", "best_model"))
            logger.info("Best test box ap: {}, in step: {}".format(
                best_box_ap_list[0], best_box_ap_list[1]))
    train_loader.reset()
Esempio n. 22
0
def compress(args):
    train_reader = None
    test_reader = None
    if args.data == "mnist":
        import paddle.dataset.mnist as reader
        train_reader = reader.train()
        val_reader = reader.test()
        class_dim = 10
        image_shape = "1,28,28"
    elif args.data == "imagenet":
        import imagenet_reader as reader
        train_reader = reader.train()
        val_reader = reader.val()
        class_dim = 1000
        image_shape = "3,224,224"
    else:
        raise ValueError("{} is not supported.".format(args.data))
    image_shape = [int(m) for m in image_shape.split(",")]
    assert args.model in model_list, "{} is not in lists: {}".format(
        args.model, model_list)
    image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
    # model definition
    model = models.__dict__[args.model]()
    out = model.net(input=image, class_dim=class_dim)
    cost = fluid.layers.cross_entropy(input=out, label=label)
    avg_cost = fluid.layers.mean(x=cost)
    acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
    acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
    val_program = fluid.default_main_program().clone(for_test=True)
    opt = create_optimizer(args)
    opt.minimize(avg_cost)
    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    if args.pretrained_model:

        def if_exist(var):
            return os.path.exists(os.path.join(args.pretrained_model,
                                               var.name))

        _logger.info("Load pretrained model from {}".format(
            args.pretrained_model))
        fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist)

    val_reader = paddle.fluid.io.batch(val_reader, batch_size=args.batch_size)
    train_reader = paddle.fluid.io.batch(train_reader,
                                         batch_size=args.batch_size,
                                         drop_last=True)

    train_feeder = feeder = fluid.DataFeeder([image, label], place)
    val_feeder = feeder = fluid.DataFeeder([image, label],
                                           place,
                                           program=val_program)

    def test(epoch, program):
        batch_id = 0
        acc_top1_ns = []
        acc_top5_ns = []
        for data in val_reader():
            start_time = time.time()
            acc_top1_n, acc_top5_n = exe.run(
                program,
                feed=train_feeder.feed(data),
                fetch_list=[acc_top1.name, acc_top5.name])
            end_time = time.time()
            if batch_id % args.log_period == 0:
                _logger.info(
                    "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}"
                    .format(epoch, batch_id, np.mean(acc_top1_n),
                            np.mean(acc_top5_n), end_time - start_time))
            acc_top1_ns.append(np.mean(acc_top1_n))
            acc_top5_ns.append(np.mean(acc_top5_n))
            batch_id += 1

        _logger.info(
            "Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format(
                epoch, np.mean(np.array(acc_top1_ns)),
                np.mean(np.array(acc_top5_ns))))

    def train(epoch, program):

        build_strategy = fluid.BuildStrategy()
        exec_strategy = fluid.ExecutionStrategy()
        train_program = fluid.compiler.CompiledProgram(
            program).with_data_parallel(loss_name=avg_cost.name,
                                        build_strategy=build_strategy,
                                        exec_strategy=exec_strategy)

        batch_id = 0
        for data in train_reader():
            start_time = time.time()
            loss_n, acc_top1_n, acc_top5_n = exe.run(
                train_program,
                feed=train_feeder.feed(data),
                fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name])
            end_time = time.time()
            loss_n = np.mean(loss_n)
            acc_top1_n = np.mean(acc_top1_n)
            acc_top5_n = np.mean(acc_top5_n)
            if batch_id % args.log_period == 0:
                _logger.info(
                    "epoch[{}]-batch[{}] - loss: {}; acc_top1: {}; acc_top5: {}; time: {}"
                    .format(epoch, batch_id, loss_n, acc_top1_n, acc_top5_n,
                            end_time - start_time))
            batch_id += 1

    test(0, val_program)

    params = get_pruned_params(args, fluid.default_main_program())
    _logger.info("FLOPs before pruning: {}".format(
        flops(fluid.default_main_program())))
    pruner = Pruner(args.criterion)
    pruned_val_program, _, _ = pruner.prune(val_program,
                                            fluid.global_scope(),
                                            params=params,
                                            ratios=[args.pruned_ratio] *
                                            len(params),
                                            place=place,
                                            only_graph=True)

    pruned_program, _, _ = pruner.prune(fluid.default_main_program(),
                                        fluid.global_scope(),
                                        params=params,
                                        ratios=[args.pruned_ratio] *
                                        len(params),
                                        place=place)
    _logger.info("FLOPs after pruning: {}".format(flops(pruned_program)))
    for i in range(args.num_epochs):
        train(i, pruned_program)
        if i % args.test_period == 0:
            test(i, pruned_val_program)
            save_model(exe, pruned_val_program,
                       os.path.join(args.model_path, str(i)))
        if args.save_inference:
            infer_model_path = os.path.join(args.model_path, "infer_models",
                                            str(i))
            fluid.io.save_inference_model(infer_model_path, ["image"], [out],
                                          exe, pruned_val_program)
            _logger.info(
                "Saved inference model into [{}]".format(infer_model_path))
Esempio n. 23
0
def get_prune_model(model_file, param_file, ratio, save_path):
    """
    Using the structured pruning algorithm to compress the network. 
    This interface is only used to evaluate the latency of the compressed network, and does not consider the loss of accuracy.
    Args:
        model_file(str), param_file(str): The inference model to be pruned.
        ratio(float): The ratio to prune the model.
        save_path(str): The save path of pruned model.
    """

    assert os.path.exists(model_file), f'{model_file} does not exist.'
    assert os.path.exists(
        param_file) or param_file is None, f'{param_file} does not exist.'
    paddle.enable_static()

    SKIP = ['image', 'feed', 'pool2d_0.tmp_0']

    folder = os.path.dirname(model_file)
    model_name = model_file.split('/')[-1]
    if param_file is None:
        param_name = None
    else:
        param_name = param_file.split('/')[-1]

    main_prog = static.Program()
    startup_prog = static.Program()
    place = paddle.CPUPlace()
    exe = paddle.static.Executor()
    scope = static.global_scope()
    exe.run(startup_prog)

    [inference_program, feed_target_names, fetch_targets] = (
        fluid.io.load_inference_model(
            folder, exe, model_filename=model_name, params_filename=param_name))

    prune_params = []
    graph = GraphWrapper(inference_program)
    for op in graph.ops():
        for inp in op.all_inputs():
            name = inp.name()
            if inp.name() in SKIP: continue
            if 'tmp' in inp.name(): continue
            cond_conv = len(inp._var.shape) == 4 and 'conv' in name
            # only prune conv
            if cond_conv:
                prune_params.append(name)

    # drop last conv
    prune_params.pop()
    ratios = [ratio] * len(prune_params)

    pruner = Pruner()
    main_program, _, _ = pruner.prune(
        inference_program,
        scope,
        params=prune_params,
        ratios=ratios,
        place=place,
        lazy=False,
        only_graph=False,
        param_backup=None,
        param_shape_backup=None)

    fluid.io.save_inference_model(
        save_path,
        feeded_var_names=feed_target_names,
        target_vars=fetch_targets,
        executor=exe,
        main_program=main_program,
        model_filename=model_name,
        params_filename=param_name)
Esempio n. 24
0
def main():
    config = program.load_config(FLAGS.config)
    program.merge_config(FLAGS.opt)
    logger.info(config)

    # check if set use_gpu=True in paddlepaddle cpu version
    use_gpu = config['Global']['use_gpu']
    program.check_gpu(use_gpu)

    alg = config['Global']['algorithm']
    assert alg in ['EAST', 'DB', 'Rosetta', 'CRNN', 'STARNet', 'RARE']
    if alg in ['Rosetta', 'CRNN', 'STARNet', 'RARE']:
        config['Global']['char_ops'] = CharacterOps(config['Global'])

    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
    startup_program = fluid.Program()
    train_program = fluid.Program()
    train_build_outputs = program.build(config,
                                        train_program,
                                        startup_program,
                                        mode='train')
    train_loader = train_build_outputs[0]
    train_fetch_name_list = train_build_outputs[1]
    train_fetch_varname_list = train_build_outputs[2]
    train_opt_loss_name = train_build_outputs[3]

    eval_program = fluid.Program()
    eval_build_outputs = program.build(config,
                                       eval_program,
                                       startup_program,
                                       mode='eval')
    eval_fetch_name_list = eval_build_outputs[1]
    eval_fetch_varname_list = eval_build_outputs[2]
    eval_program = eval_program.clone(for_test=True)

    train_reader = reader_main(config=config, mode="train")
    train_loader.set_sample_list_generator(train_reader, places=place)

    eval_reader = reader_main(config=config, mode="eval")

    exe = fluid.Executor(place)
    exe.run(startup_program)

    # compile program for multi-devices
    init_model(config, train_program, exe)

    sen = load_sensitivities("sensitivities_0.data")
    for i in skip_list:
        if i in sen.keys():
            sen.pop(i)
    back_bone_list = ['conv' + str(x) for x in range(1, 5)]
    for i in back_bone_list:
        for key in list(sen.keys()):
            if i + '_' in key:
                sen.pop(key)
    ratios = get_ratios_by_loss(sen, 0.03)
    logger.info("FLOPs before pruning: {}".format(flops(eval_program)))
    pruner = Pruner(criterion='geometry_median')
    print("ratios: {}".format(ratios))
    pruned_val_program, _, _ = pruner.prune(eval_program,
                                            fluid.global_scope(),
                                            params=ratios.keys(),
                                            ratios=ratios.values(),
                                            place=place,
                                            only_graph=True)

    pruned_program, _, _ = pruner.prune(train_program,
                                        fluid.global_scope(),
                                        params=ratios.keys(),
                                        ratios=ratios.values(),
                                        place=place)
    logger.info("FLOPs after pruning: {}".format(flops(pruned_val_program)))
    train_compile_program = program.create_multi_devices_program(
        pruned_program, train_opt_loss_name)


    train_info_dict = {'compile_program':train_compile_program,\
        'train_program':pruned_program,\
        'reader':train_loader,\
        'fetch_name_list':train_fetch_name_list,\
        'fetch_varname_list':train_fetch_varname_list}

    eval_info_dict = {'program':pruned_val_program,\
        'reader':eval_reader,\
        'fetch_name_list':eval_fetch_name_list,\
        'fetch_varname_list':eval_fetch_varname_list}

    if alg in ['EAST', 'DB']:
        program.train_eval_det_run(config,
                                   exe,
                                   train_info_dict,
                                   eval_info_dict,
                                   is_slim="prune")
    else:
        program.train_eval_rec_run(config, exe, train_info_dict,
                                   eval_info_dict)