def get_sparse_model(model_file, param_file, ratio, save_path): """ Using the unstructured sparse algorithm to compress the network. This interface is only used to evaluate the latency of the compressed network, and does not consider the loss of accuracy. Args: model_file(str), param_file(str): The inference model to be pruned. ratio(float): The ratio to prune the model. save_path(str): The save path of pruned model. """ assert os.path.exists(model_file), f'{model_file} does not exist.' assert os.path.exists( param_file) or param_file is None, f'{param_file} does not exist.' paddle.enable_static() SKIP = ['image', 'feed', 'pool2d_0.tmp_0'] folder = os.path.dirname(model_file) model_name = model_file.split('/')[-1] if param_file is None: param_name = None else: param_name = param_file.split('/')[-1] main_prog = static.Program() startup_prog = static.Program() exe = paddle.static.Executor(paddle.CPUPlace()) exe.run(startup_prog) [inference_program, feed_target_names, fetch_targets] = ( fluid.io.load_inference_model( folder, exe, model_filename=model_name, params_filename=param_name)) thresholds = {} graph = GraphWrapper(inference_program) for op in graph.ops(): for inp in op.all_inputs(): name = inp.name() if inp.name() in SKIP: continue if 'tmp' in inp.name(): continue # 1x1_conv cond_conv = len(inp._var.shape) == 4 and inp._var.shape[ 2] == 1 and inp._var.shape[3] == 1 cond_fc = False if cond_fc or cond_conv: array = np.array(paddle.static.global_scope().find_var(name) .get_tensor()) flatten = np.abs(array.flatten()) index = min(len(flatten) - 1, int(ratio * len(flatten))) ind = np.unravel_index( np.argsort( flatten, axis=None), flatten.shape) thresholds[name] = ind[0][:index] for op in graph.ops(): for inp in op.all_inputs(): name = inp.name() if name in SKIP: continue if 'tmp' in inp.name(): continue cond_conv = (len(inp._var.shape) == 4 and inp._var.shape[2] == 1 and inp._var.shape[3] == 1) cond_fc = False # only support 1x1_conv now if not (cond_conv or cond_fc): continue array = np.array(paddle.static.global_scope().find_var(name) .get_tensor()) if thresholds.get(name) is not None: np.put(array, thresholds.get(name), 0) assert (abs(1 - np.count_nonzero(array) / array.size - ratio) < 1e-2 ), 'The model sparsity is abnormal.' paddle.static.global_scope().find_var(name).get_tensor().set( array, paddle.CPUPlace()) fluid.io.save_inference_model( save_path, feeded_var_names=feed_target_names, target_vars=fetch_targets, executor=exe, main_program=inference_program, model_filename=model_name, params_filename=param_name) print("The pruned model is saved in: ", save_path)
def merge(teacher_program, student_program, data_name_map, place, scope=None, name_prefix='teacher_'): """Merge teacher program into student program and add a uniform prefix to the names of all vars in teacher program Args: teacher_program(Program): The input teacher model paddle program student_program(Program): The input student model paddle program data_map_map(dict): Mapping of teacher input interface name and student input interface name, where key of dict is the input name of teacher_program, and value is the input name of student_program. place(CPUPlace()|CUDAPlace(N)): This parameter represents paddle run on which device. scope(Scope): This parameter indicates the variable scope used by the program. If not specified, the default global scope will be used. Default: None name_prefix(str): Name prefix added for all vars of the teacher program. Default: 'teacher_' Returns: None """ if scope == None: scope = paddle.static.global_scope() teacher_program = teacher_program.clone(for_test=True) for teacher_var in teacher_program.list_vars(): skip_rename = False if teacher_var.name != 'fetch' and teacher_var.name != 'feed': if teacher_var.name in data_name_map.keys(): new_name = data_name_map[teacher_var.name] if new_name == teacher_var.name: skip_rename = True else: new_name = name_prefix + teacher_var.name if not skip_rename: # scope var rename old_var = scope.var(teacher_var.name).get_tensor() renamed_var = scope.var(new_name).get_tensor() renamed_var.set(np.array(old_var), place) # program var rename renamed_var = teacher_program.global_block()._rename_var( teacher_var.name, new_name) for teacher_var in teacher_program.list_vars(): if teacher_var.name != 'fetch' and teacher_var.name != 'feed': # student program add var new_var = student_program.global_block()._clone_variable( teacher_var, force_persistable=False) new_var.stop_gradient = True for block in teacher_program.blocks: for op in block.ops: if op.type != 'feed' and op.type != 'fetch': inputs = {} outputs = {} attrs = {} for input_name in op.input_names: inputs[input_name] = [ block.var(in_var_name) for in_var_name in op.input(input_name) ] for output_name in op.output_names: outputs[output_name] = [ block.var(out_var_name) for out_var_name in op.output(output_name) ] for attr_name in op.attr_names: attrs[attr_name] = op.attr(attr_name) student_program.global_block().append_op(type=op.type, inputs=inputs, outputs=outputs, attrs=attrs) student_graph = GraphWrapper(student_program) for op in student_graph.ops(): belongsto_teacher = False for inp in op.all_inputs(): if 'teacher' in inp.name(): belongsto_teacher = True break if belongsto_teacher: op._op._set_attr("skip_quant", True)
def sensitivity(program, place, param_names, eval_func, sensitivities_file=None, pruned_ratios=None): scope = fluid.global_scope() graph = GraphWrapper(program) sensitivities = load_sensitivities(sensitivities_file) if pruned_ratios is None: pruned_ratios = np.arange(0.1, 1, step=0.1) total_evaluate_iters = 0 for name in param_names: if name not in sensitivities: sensitivities[name] = {} total_evaluate_iters += len(list(pruned_ratios)) else: total_evaluate_iters += (len(list(pruned_ratios)) - len(sensitivities[name])) eta = '-' start_time = time.time() baseline = eval_func(graph.program) cost = time.time() - start_time eta = cost * (total_evaluate_iters - 1) current_iter = 1 for name in sensitivities: for ratio in pruned_ratios: if ratio in sensitivities[name]: logging.debug('{}, {} has computed.'.format(name, ratio)) continue progress = float(current_iter) / total_evaluate_iters progress = "%.2f%%" % (progress * 100) logging.info( "Total evaluate iters={}, current={}, progress={}, eta={}". format( total_evaluate_iters, current_iter, progress, seconds_to_hms( int(cost * (total_evaluate_iters - current_iter)))), use_color=True) current_iter += 1 pruner = Pruner() logging.info("sensitive - param: {}; ratios: {}".format( name, ratio)) pruned_program, param_backup, _ = pruner.prune( program=graph.program, scope=scope, params=[name], ratios=[ratio], place=place, lazy=True, only_graph=False, param_backup=True) pruned_metric = eval_func(pruned_program) loss = (baseline - pruned_metric) / baseline logging.info("pruned param: {}; {}; loss={}".format( name, ratio, loss)) sensitivities[name][ratio] = loss with open(sensitivities_file, 'wb') as f: pickle.dump(sensitivities, f) for param_name in param_backup.keys(): param_t = scope.find_var(param_name).get_tensor() param_t.set(param_backup[param_name], place) return sensitivities
def get_prune_model(model_file, param_file, ratio, save_path): """ Using the structured pruning algorithm to compress the network. This interface is only used to evaluate the latency of the compressed network, and does not consider the loss of accuracy. Args: model_file(str), param_file(str): The inference model to be pruned. ratio(float): The ratio to prune the model. save_path(str): The save path of pruned model. """ assert os.path.exists(model_file), f'{model_file} does not exist.' assert os.path.exists( param_file) or param_file is None, f'{param_file} does not exist.' paddle.enable_static() SKIP = ['image', 'feed', 'pool2d_0.tmp_0'] folder = os.path.dirname(model_file) model_name = model_file.split('/')[-1] if param_file is None: param_name = None else: param_name = param_file.split('/')[-1] main_prog = static.Program() startup_prog = static.Program() place = paddle.CPUPlace() exe = paddle.static.Executor() scope = static.global_scope() exe.run(startup_prog) [inference_program, feed_target_names, fetch_targets ] = (fluid.io.load_inference_model(folder, exe, model_filename=model_name, params_filename=param_name)) prune_params = [] graph = GraphWrapper(inference_program) for op in graph.ops(): for inp in op.all_inputs(): name = inp.name() if inp.name() in SKIP: continue if 'tmp' in inp.name(): continue cond_conv = len(inp._var.shape) == 4 and 'conv' in name # only prune conv if cond_conv: prune_params.append(name) # drop last conv prune_params.pop() ratios = [ratio] * len(prune_params) pruner = Pruner() main_program, _, _ = pruner.prune(inference_program, scope, params=prune_params, ratios=ratios, place=place, lazy=False, only_graph=False, param_backup=None, param_shape_backup=None) fluid.io.save_inference_model(save_path, feeded_var_names=feed_target_names, target_vars=fetch_targets, executor=exe, main_program=main_program, model_filename=model_name, params_filename=param_name)