def convert_acnet_weights(train_weights, deploy_weights, eps): train_dict = read_hdf5(train_weights) print(train_dict.keys()) deploy_dict = {} square_conv_var_names = [ name for name in train_dict.keys() if SQUARE_KERNEL_KEYWORD in name ] for square_name in square_conv_var_names: square_kernel = train_dict[square_name] square_mean = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'square_bn.running_mean')] square_std = np.sqrt(train_dict[square_name.replace( SQUARE_KERNEL_KEYWORD, 'square_bn.running_var')] + eps) square_gamma = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'square_bn.weight')] square_beta = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'square_bn.bias')] ver_kernel = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'ver_conv.weight')] ver_mean = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'ver_bn.running_mean')] ver_std = np.sqrt(train_dict[square_name.replace( SQUARE_KERNEL_KEYWORD, 'ver_bn.running_var')] + eps) ver_gamma = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'ver_bn.weight')] ver_beta = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'ver_bn.bias')] hor_kernel = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'hor_conv.weight')] hor_mean = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'hor_bn.running_mean')] hor_std = np.sqrt(train_dict[square_name.replace( SQUARE_KERNEL_KEYWORD, 'hor_bn.running_var')] + eps) hor_gamma = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'hor_bn.weight')] hor_beta = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'hor_bn.bias')] fused_bias = square_beta + ver_beta + hor_beta - square_mean * square_gamma / square_std \ - ver_mean * ver_gamma / ver_std - hor_mean * hor_gamma / hor_std fused_kernel = _fuse_kernel(square_kernel, square_gamma, square_std) _add_to_square_kernel(fused_kernel, _fuse_kernel(ver_kernel, ver_gamma, ver_std)) _add_to_square_kernel(fused_kernel, _fuse_kernel(hor_kernel, hor_gamma, hor_std)) deploy_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'fused_conv.weight')] = fused_kernel deploy_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'fused_conv.bias')] = fused_bias for k, v in train_dict.items(): if 'hor_' not in k and 'ver_' not in k and 'square_' not in k: deploy_dict[k] = v save_hdf5(deploy_dict, deploy_weights)
def mask_out_weights(initialized_weights, masked_weights, mask_dict): origin_hdf5_dict = read_hdf5(initialized_weights) save_dict = OrderedDict() for name, value in origin_hdf5_dict.items(): if name in mask_dict: save_dict[name] = value * mask_dict[name] print('mask', name) else: save_dict[name] = value save_hdf5(save_dict, masked_weights)
def load_by_order(self, path): hdf5_dict = read_hdf5(path) assigned_params = 0 kernel_idx = 0 sigma_idx = 0 mu_idx = 0 gamma_idx = 0 beta_idx = 0 for k, v in self.state.model.named_parameters(): if k in hdf5_dict: value = hdf5_dict[k] else: if 'conv.weight' in k: order_key = 'kernel{}'.format(kernel_idx) kernel_idx += 1 elif 'bn.weight' in k: order_key = 'gamma{}'.format(gamma_idx) gamma_idx += 1 elif 'bn.bias' in k: order_key = 'beta{}'.format(beta_idx) beta_idx += 1 else: order_key = None value = None if order_key is None else hdf5_dict[order_key] if value is not None: self.set_value(v, value) assigned_params += 1 for k, v in self.state.model.named_buffers(): if k in hdf5_dict: value = hdf5_dict[k] else: if 'bn.running_mean' in k: order_key = 'mu{}'.format(mu_idx) mu_idx += 1 elif 'bn.running_var' in k: order_key = 'sigma{}'.format(sigma_idx) sigma_idx += 1 else: order_key = None value = None if order_key is None else hdf5_dict[order_key] if value is not None: self.set_value(v, value) assigned_params += 1 msg = 'Assigned {} params '.format(assigned_params) if path is not None: msg += ' from hdf5: {}'.format(path) self.echo(msg)
def load_hdf5(self, path): hdf5_dict = read_hdf5(path) assigned_params = 0 for k, v in self.state.model.named_parameters(): if k in hdf5_dict: self.set_value(v, hdf5_dict[k]) else: print('param {} not found in hdf5') for k, v in self.state.model.named_buffers(): if k in hdf5_dict: self.set_value(v, hdf5_dict[k]) else: print('buffer {} not found in hdf5') assigned_params += 1 print('Assigned {} params from hdf5: {}'.format(assigned_params, path))
def get_mask_by_magnitude(weights_path, nonzero_ratio): hdf5_dict = read_hdf5(weights_path) to_concat = [] for value in hdf5_dict.values(): if value.ndim in [2, 4]: to_concat.append(np.abs(value.ravel())) all_abs_weights = np.concatenate(to_concat) num_zero = int(len(all_abs_weights) * (1 - nonzero_ratio)) abs_thresh = sorted(all_abs_weights)[num_zero] mask_dict = OrderedDict() for name, value in hdf5_dict.items(): if value.ndim in [2, 4]: mask = np.abs(value) >= abs_thresh mask_dict[name] = mask return mask_dict
from utils.misc import read_hdf5 import sys import numpy as np di = read_hdf5(sys.argv[1]) num_kernel_params = 0 conv_kernel_cnt = 0 matrix_param_cnt = 0 vec_param_cnt = 0 bias_cnt = 0 beta_cnt = 0 gamma_cnt = 0 mu_cnt = 0 var_cnt = 0 for name, array in di.items(): if array.ndim in [2, 4]: num_kernel_params += array.size print(name, array.shape, np.mean(array), np.std(array), ' positive {}, negative {}, zeros {}'.format(np.sum(array > 0), np.sum(array < 0), np.sum(array == 0))) if 'res' in name: print(array[:4, :4]) elif 'diag' in name: print(array) if array.ndim == 2: matrix_param_cnt += array.size elif array.ndim == 1: vec_param_cnt += array.size elif array.ndim == 4: conv_kernel_cnt += array.size
def convert_rfnet_weights(train_weights, deploy_weights, eps): train_dict = read_hdf5(train_weights) print(train_dict.keys()) deploy_dict = {} main_conv_var_names = [ name for name in train_dict.keys() if SQUARE_KERNEL_KEYWORD in name ] for main_name in main_conv_var_names: # main_prev = train_dict[main_name.replace(SQUARE_KERNEL_KEYWORD, 'main_alter.weight')] main_kernel = train_dict[main_name] # main_kernel = _merge_kernel(main_kernel, main_prev) main_mean = train_dict[main_name.replace(SQUARE_KERNEL_KEYWORD, 'main_bn.running_mean')] main_std = np.sqrt(train_dict[main_name.replace( SQUARE_KERNEL_KEYWORD, 'main_bn.running_var')] + eps) main_gamma = train_dict[main_name.replace(SQUARE_KERNEL_KEYWORD, 'main_bn.weight')] main_beta = train_dict[main_name.replace(SQUARE_KERNEL_KEYWORD, 'main_bn.bias')] left_prev = train_dict[main_name.replace(SQUARE_KERNEL_KEYWORD, "left_alter.weight")] left_kernel = train_dict[main_name.replace(SQUARE_KERNEL_KEYWORD, 'left_conv.weight')] left_kernel = _merge_kernel(left_kernel, left_prev) left_mean = train_dict[main_name.replace(SQUARE_KERNEL_KEYWORD, 'left_bn.running_mean')] left_std = np.sqrt(train_dict[main_name.replace( SQUARE_KERNEL_KEYWORD, 'left_bn.running_var')] + eps) left_gamma = train_dict[main_name.replace(SQUARE_KERNEL_KEYWORD, 'left_bn.weight')] left_beta = train_dict[main_name.replace(SQUARE_KERNEL_KEYWORD, 'left_bn.bias')] right_prev = train_dict[main_name.replace(SQUARE_KERNEL_KEYWORD, "right_alter.weight")] right_kernel = train_dict[main_name.replace(SQUARE_KERNEL_KEYWORD, 'right_conv.weight')] right_kernel = _merge_kernel(right_kernel, right_prev) right_mean = train_dict[main_name.replace(SQUARE_KERNEL_KEYWORD, 'right_bn.running_mean')] right_std = np.sqrt(train_dict[main_name.replace( SQUARE_KERNEL_KEYWORD, 'right_bn.running_var')] + eps) right_gamma = train_dict[main_name.replace(SQUARE_KERNEL_KEYWORD, 'right_bn.weight')] right_beta = train_dict[main_name.replace(SQUARE_KERNEL_KEYWORD, 'right_bn.bias')] fused_bias = main_beta + left_beta + right_beta - main_mean * main_gamma / main_std \ - left_mean * left_gamma / left_std - right_mean * right_gamma / right_std fused_kernel = _fuse_kernel(main_kernel, main_gamma, main_std) _add_to_main_kernel(fused_kernel, _fuse_kernel(left_kernel, left_gamma, left_std)) _add_to_main_kernel(fused_kernel, _fuse_kernel(right_kernel, right_gamma, right_std)) deploy_dict[main_name.replace(SQUARE_KERNEL_KEYWORD, 'fused_conv.weight')] = fused_kernel deploy_dict[main_name.replace(SQUARE_KERNEL_KEYWORD, 'fused_conv.bias')] = fused_bias for k, v in train_dict.items(): if 'right_' not in k and 'left_' not in k and 'main_' not in k: deploy_dict[k] = v save_hdf5(deploy_dict, deploy_weights)
def convert_ksnet_weights(train_weights, deploy_weights, eps): train_dict = read_hdf5(train_weights) print(train_dict.keys()) deploy_dict = {} square_conv_var_names = [ name for name in train_dict.keys() if SQUARE_KERNEL_KEYWORD in name ] for square_name in square_conv_var_names: square_kernel = train_dict[square_name] square_mean = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'square_bn.running_mean')] square_std = np.sqrt(train_dict[square_name.replace( SQUARE_KERNEL_KEYWORD, 'square_bn.running_var')] + eps) square_gamma = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'square_bn.weight')] square_beta = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'square_bn.bias')] ver_kernel = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'ver_conv.weight')] ver_mask = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'ver_conv.mask')] ver_kernel = ver_kernel * ver_mask ver_mean = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'ver_bn.running_mean')] ver_std = np.sqrt(train_dict[square_name.replace( SQUARE_KERNEL_KEYWORD, 'ver_bn.running_var')] + eps) ver_gamma = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'ver_bn.weight')] ver_beta = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'ver_bn.bias')] hor_kernel = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'hor_conv.weight')] hor_mask = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'hor_conv.mask')] hor_kernel = hor_kernel * hor_mask hor_mean = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'hor_bn.running_mean')] hor_std = np.sqrt(train_dict[square_name.replace( SQUARE_KERNEL_KEYWORD, 'hor_bn.running_var')] + eps) hor_gamma = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'hor_bn.weight')] hor_beta = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'hor_bn.bias')] lx_kernel = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'lx_conv.weight')] lx_mask = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'lx_conv.mask')] lx_kernel = lx_kernel * lx_mask lx_mean = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'lx_bn.running_mean')] lx_std = np.sqrt(train_dict[square_name.replace( SQUARE_KERNEL_KEYWORD, 'lx_bn.running_var')] + eps) lx_gamma = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'lx_bn.weight')] lx_beta = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'lx_bn.bias')] rx_kernel = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'rx_conv.weight')] rx_mask = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'rx_conv.mask')] rx_kernel = rx_kernel * rx_mask rx_mean = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'rx_bn.running_mean')] rx_std = np.sqrt(train_dict[square_name.replace( SQUARE_KERNEL_KEYWORD, 'rx_bn.running_var')] + eps) rx_gamma = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'rx_bn.weight')] rx_beta = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'rx_bn.bias')] fused_bias = square_beta + ver_beta + hor_beta + lx_beta + rx_beta fused_bias -= square_mean * square_gamma / square_std fused_bias -= ver_mean * ver_gamma / ver_std + hor_mean * hor_gamma / hor_std fused_bias -= lx_mean * lx_gamma / lx_std + rx_mean * rx_gamma / rx_std fused_kernel = _fuse_kernel(square_kernel, square_gamma, square_std) _add_to_square_kernel(fused_kernel, _fuse_kernel(ver_kernel, ver_gamma, ver_std)) _add_to_square_kernel(fused_kernel, _fuse_kernel(hor_kernel, hor_gamma, hor_std)) _add_to_square_kernel(fused_kernel, _fuse_kernel(lx_kernel, lx_gamma, lx_std)) _add_to_square_kernel(fused_kernel, _fuse_kernel(rx_kernel, rx_gamma, rx_std)) deploy_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'fused_conv.weight')] = fused_kernel deploy_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'fused_conv.bias')] = fused_bias for k, v in train_dict.items(): if 'hor_' not in k and 'ver_' not in k and 'square_' not in k and "rx_" not in k and "lx_" not in k: deploy_dict[k] = v save_hdf5(deploy_dict, deploy_weights)
def load_part(self, part_key, path): hdf5_dict = read_hdf5(path) self.load_from_weights_dict(hdf5_dict, load_weights_keyword=None, path=path, ignore_keyword=part_key)
def load_hdf5(self, path, load_weights_keyword=None): hdf5_dict = read_hdf5(path) self.load_from_weights_dict(hdf5_dict, load_weights_keyword, path=path)