Esempio n. 1
0
def get_sparse_model(model_file, param_file, ratio, save_path):
    """
    Using the unstructured sparse algorithm to compress the network. 
    This interface is only used to evaluate the latency of the compressed network, and does not consider the loss of accuracy.
    Args:
        model_file(str), param_file(str): The inference model to be pruned.
        ratio(float): The ratio to prune the model.
        save_path(str): The save path of pruned model.
    """
    assert os.path.exists(model_file), f'{model_file} does not exist.'
    assert os.path.exists(
        param_file) or param_file is None, f'{param_file} does not exist.'
    paddle.enable_static()

    SKIP = ['image', 'feed', 'pool2d_0.tmp_0']

    folder = os.path.dirname(model_file)
    model_name = model_file.split('/')[-1]
    if param_file is None:
        param_name = None
    else:
        param_name = param_file.split('/')[-1]

    main_prog = static.Program()
    startup_prog = static.Program()
    exe = paddle.static.Executor(paddle.CPUPlace())
    exe.run(startup_prog)

    [inference_program, feed_target_names, fetch_targets] = (
        fluid.io.load_inference_model(
            folder, exe, model_filename=model_name, params_filename=param_name))
    thresholds = {}

    graph = GraphWrapper(inference_program)
    for op in graph.ops():
        for inp in op.all_inputs():
            name = inp.name()
            if inp.name() in SKIP: continue
            if 'tmp' in inp.name(): continue
            # 1x1_conv
            cond_conv = len(inp._var.shape) == 4 and inp._var.shape[
                2] == 1 and inp._var.shape[3] == 1
            cond_fc = False

            if cond_fc or cond_conv:
                array = np.array(paddle.static.global_scope().find_var(name)
                                 .get_tensor())
                flatten = np.abs(array.flatten())
                index = min(len(flatten) - 1, int(ratio * len(flatten)))
                ind = np.unravel_index(
                    np.argsort(
                        flatten, axis=None), flatten.shape)
                thresholds[name] = ind[0][:index]

    for op in graph.ops():
        for inp in op.all_inputs():
            name = inp.name()
            if name in SKIP: continue
            if 'tmp' in inp.name(): continue

            cond_conv = (len(inp._var.shape) == 4 and inp._var.shape[2] == 1 and
                         inp._var.shape[3] == 1)
            cond_fc = False

            # only support 1x1_conv now
            if not (cond_conv or cond_fc): continue
            array = np.array(paddle.static.global_scope().find_var(name)
                             .get_tensor())
            if thresholds.get(name) is not None:
                np.put(array, thresholds.get(name), 0)
            assert (abs(1 - np.count_nonzero(array) / array.size - ratio) < 1e-2
                    ), 'The model sparsity is abnormal.'
            paddle.static.global_scope().find_var(name).get_tensor().set(
                array, paddle.CPUPlace())

    fluid.io.save_inference_model(
        save_path,
        feeded_var_names=feed_target_names,
        target_vars=fetch_targets,
        executor=exe,
        main_program=inference_program,
        model_filename=model_name,
        params_filename=param_name)
    print("The pruned model is saved in: ", save_path)
Esempio n. 2
0
def merge(teacher_program,
          student_program,
          data_name_map,
          place,
          scope=None,
          name_prefix='teacher_'):
    """Merge teacher program into student program and add a uniform prefix to the
    names of all vars in teacher program

    Args:
        teacher_program(Program): The input teacher model paddle program 
        student_program(Program): The input student model paddle program
        data_map_map(dict): Mapping of teacher input interface name and student
                            input interface name, where key of dict is the
                            input name of teacher_program, and value is the
                            input name of student_program.
        place(CPUPlace()|CUDAPlace(N)): This parameter represents
                                                    paddle run on which device.
        scope(Scope): This parameter indicates the variable scope used by
                      the program. If not specified, the default global scope
                      will be used. Default: None
        name_prefix(str): Name prefix added for all vars of the teacher program.
                          Default: 'teacher_'

    Returns:
        None
    """
    if scope == None:
        scope = paddle.static.global_scope()
    teacher_program = teacher_program.clone(for_test=True)
    for teacher_var in teacher_program.list_vars():
        skip_rename = False
        if teacher_var.name != 'fetch' and teacher_var.name != 'feed':
            if teacher_var.name in data_name_map.keys():
                new_name = data_name_map[teacher_var.name]
                if new_name == teacher_var.name:
                    skip_rename = True
            else:
                new_name = name_prefix + teacher_var.name
            if not skip_rename:
                # scope var rename
                old_var = scope.var(teacher_var.name).get_tensor()
                renamed_var = scope.var(new_name).get_tensor()
                renamed_var.set(np.array(old_var), place)

                # program var rename
                renamed_var = teacher_program.global_block()._rename_var(
                    teacher_var.name, new_name)

    for teacher_var in teacher_program.list_vars():
        if teacher_var.name != 'fetch' and teacher_var.name != 'feed':
            # student program add var
            new_var = student_program.global_block()._clone_variable(
                teacher_var, force_persistable=False)
            new_var.stop_gradient = True

    for block in teacher_program.blocks:
        for op in block.ops:
            if op.type != 'feed' and op.type != 'fetch':
                inputs = {}
                outputs = {}
                attrs = {}
                for input_name in op.input_names:
                    inputs[input_name] = [
                        block.var(in_var_name)
                        for in_var_name in op.input(input_name)
                    ]

                for output_name in op.output_names:
                    outputs[output_name] = [
                        block.var(out_var_name)
                        for out_var_name in op.output(output_name)
                    ]
                for attr_name in op.attr_names:
                    attrs[attr_name] = op.attr(attr_name)
                student_program.global_block().append_op(type=op.type,
                                                         inputs=inputs,
                                                         outputs=outputs,
                                                         attrs=attrs)

    student_graph = GraphWrapper(student_program)
    for op in student_graph.ops():
        belongsto_teacher = False
        for inp in op.all_inputs():
            if 'teacher' in inp.name():
                belongsto_teacher = True
                break
        if belongsto_teacher:
            op._op._set_attr("skip_quant", True)
Esempio n. 3
0
def sensitivity(program,
                place,
                param_names,
                eval_func,
                sensitivities_file=None,
                pruned_ratios=None):
    scope = fluid.global_scope()
    graph = GraphWrapper(program)
    sensitivities = load_sensitivities(sensitivities_file)

    if pruned_ratios is None:
        pruned_ratios = np.arange(0.1, 1, step=0.1)

    total_evaluate_iters = 0
    for name in param_names:
        if name not in sensitivities:
            sensitivities[name] = {}
            total_evaluate_iters += len(list(pruned_ratios))
        else:
            total_evaluate_iters += (len(list(pruned_ratios)) -
                                     len(sensitivities[name]))
    eta = '-'
    start_time = time.time()
    baseline = eval_func(graph.program)
    cost = time.time() - start_time
    eta = cost * (total_evaluate_iters - 1)
    current_iter = 1
    for name in sensitivities:
        for ratio in pruned_ratios:
            if ratio in sensitivities[name]:
                logging.debug('{}, {} has computed.'.format(name, ratio))
                continue

            progress = float(current_iter) / total_evaluate_iters
            progress = "%.2f%%" % (progress * 100)
            logging.info(
                "Total evaluate iters={}, current={}, progress={}, eta={}".
                format(
                    total_evaluate_iters, current_iter, progress,
                    seconds_to_hms(
                        int(cost * (total_evaluate_iters - current_iter)))),
                use_color=True)
            current_iter += 1

            pruner = Pruner()
            logging.info("sensitive - param: {}; ratios: {}".format(
                name, ratio))
            pruned_program, param_backup, _ = pruner.prune(
                program=graph.program,
                scope=scope,
                params=[name],
                ratios=[ratio],
                place=place,
                lazy=True,
                only_graph=False,
                param_backup=True)
            pruned_metric = eval_func(pruned_program)
            loss = (baseline - pruned_metric) / baseline
            logging.info("pruned param: {}; {}; loss={}".format(
                name, ratio, loss))

            sensitivities[name][ratio] = loss

            with open(sensitivities_file, 'wb') as f:
                pickle.dump(sensitivities, f)

            for param_name in param_backup.keys():
                param_t = scope.find_var(param_name).get_tensor()
                param_t.set(param_backup[param_name], place)
    return sensitivities
Esempio n. 4
0
def get_prune_model(model_file, param_file, ratio, save_path):
    """
    Using the structured pruning algorithm to compress the network. 
    This interface is only used to evaluate the latency of the compressed network, and does not consider the loss of accuracy.
    Args:
        model_file(str), param_file(str): The inference model to be pruned.
        ratio(float): The ratio to prune the model.
        save_path(str): The save path of pruned model.
    """

    assert os.path.exists(model_file), f'{model_file} does not exist.'
    assert os.path.exists(
        param_file) or param_file is None, f'{param_file} does not exist.'
    paddle.enable_static()

    SKIP = ['image', 'feed', 'pool2d_0.tmp_0']

    folder = os.path.dirname(model_file)
    model_name = model_file.split('/')[-1]
    if param_file is None:
        param_name = None
    else:
        param_name = param_file.split('/')[-1]

    main_prog = static.Program()
    startup_prog = static.Program()
    place = paddle.CPUPlace()
    exe = paddle.static.Executor()
    scope = static.global_scope()
    exe.run(startup_prog)

    [inference_program, feed_target_names, fetch_targets
     ] = (fluid.io.load_inference_model(folder,
                                        exe,
                                        model_filename=model_name,
                                        params_filename=param_name))

    prune_params = []
    graph = GraphWrapper(inference_program)
    for op in graph.ops():
        for inp in op.all_inputs():
            name = inp.name()
            if inp.name() in SKIP: continue
            if 'tmp' in inp.name(): continue
            cond_conv = len(inp._var.shape) == 4 and 'conv' in name
            # only prune conv
            if cond_conv:
                prune_params.append(name)

    # drop last conv
    prune_params.pop()
    ratios = [ratio] * len(prune_params)

    pruner = Pruner()
    main_program, _, _ = pruner.prune(inference_program,
                                      scope,
                                      params=prune_params,
                                      ratios=ratios,
                                      place=place,
                                      lazy=False,
                                      only_graph=False,
                                      param_backup=None,
                                      param_shape_backup=None)

    fluid.io.save_inference_model(save_path,
                                  feeded_var_names=feed_target_names,
                                  target_vars=fetch_targets,
                                  executor=exe,
                                  main_program=main_program,
                                  model_filename=model_name,
                                  params_filename=param_name)