def step(self, action): # Pseudo prune and get the corresponding statistics. The real pruning happens till the end of all pseudo pruning if self.visited[self.cur_ind]: action = self.strategy_dict[self.prunable_idx[self.cur_ind]][0] preserve_idx = self.index_buffer[self.cur_ind] else: action = self._action_wall(action) # percentage to preserve preserve_idx = None # prune and update action # 调用完成后,对应的layer的weight已经经过剪枝 # 通道被剪部分对应权重已经变为0 # 返回 # action:本层对应的压缩率 # d_prime:被压缩的通道,压缩后的通道数 # preserve_idx:保留的这些个通道 action, d_prime, preserve_idx = self.prune_kernel( self.prunable_idx[self.cur_ind], action, preserve_idx) # 这部分不会执行,因为self.shared_idx永远是空的 if not self.visited[self.cur_ind]: for group in self.shared_idx: if self.cur_ind in group: # set the shared ones for g_idx in group: self.strategy_dict[ self.prunable_idx[g_idx]][0] = action self.strategy_dict[self.prunable_idx[g_idx - 1]][1] = action self.visited[g_idx] = True self.index_buffer[g_idx] = preserve_idx.copy() # 要想看看更多的信息其实 if ture: 也行 if self.export_model: # export checkpoint print('# Pruning {}: ratio: {}, d_prime: {}'.format( self.cur_ind, action, d_prime)) # 保存本层的压缩率与保留的通道数 self.strategy.append(action) # save action to strategy self.d_prime_list.append(d_prime) self.strategy_dict[self.prunable_idx[self.cur_ind]][0] = action if self.cur_ind > 0: self.strategy_dict[self.prunable_idx[self.cur_ind - 1]][1] = action # all the actions are made if self._is_final_layer(): assert len(self.strategy) == len(self.prunable_idx) current_flops = self._cur_flops() acc_t1 = time.time() acc = self._validate(self.val_loader, self.model) acc_t2 = time.time() self.val_time = acc_t2 - acc_t1 compress_ratio = current_flops * 1. / self.org_flops info_set = { 'compress_ratio': compress_ratio, 'accuracy': acc, 'strategy': self.strategy.copy() } reward = self.reward(self, acc, current_flops) if reward > self.best_reward: self.best_reward = reward self.best_strategy = self.strategy.copy() self.best_d_prime_list = self.d_prime_list.copy() prGreen( 'New best reward: {:.4f}, acc: {:.4f}, compress: {:.4f}'. format(self.best_reward, acc, compress_ratio)) prGreen('New best policy: {}'.format(self.best_strategy)) prGreen('New best d primes: {}'.format(self.best_d_prime_list)) obs = self.layer_embedding[ self.cur_ind, :].copy() # actually the same as the last state done = True if self.export_model: # export state dict torch.save(self.model.state_dict(), self.export_path) return None, None, None, None return obs, reward, done, info_set info_set = None reward = 0 done = False self.visited[self.cur_ind] = True # set to visited self.cur_ind += 1 # the index of next layer # build next state (in-place modify) self.layer_embedding[self.cur_ind][-3] = self._cur_reduced( ) * 1. / self.org_flops # reduced self.layer_embedding[self.cur_ind][-2] = sum( self.flops_list[self.cur_ind + 1:]) * 1. / self.org_flops # rest self.layer_embedding[self.cur_ind][-1] = self.strategy[ -1] # last action obs = self.layer_embedding[self.cur_ind, :].copy() return obs, reward, done, info_set
def step(self, action): # Pseudo prune and get the corresponding statistics. The real pruning happens till the end of all pseudo pruning if self.visited[self.cur_ind]: action = self.strategy_dict[self.prunable_idx[self.cur_ind]][0] preserve_idx = self.index_buffer[self.cur_ind] else: action = self._action_wall(action) # percentage to preserve,利用已裁掉计算量与剩余计算量,对action数值进行限制,对应论文中的伪代码 preserve_idx = None # prune and update action ,整个模型裁剪过程,获取mask,并对模型进行权值处理,内部直接按权值大小进行剪枝 action, d_prime, preserve_idx = self.prune_kernel(self.prunable_idx[self.cur_ind], action, preserve_idx) if not self.visited[self.cur_ind]: for group in self.shared_idx: #mobileV1时 self.shared_idx为空的 if self.cur_ind in group: # set the shared ones for g_idx in group: self.strategy_dict[self.prunable_idx[g_idx]][0] = action self.strategy_dict[self.prunable_idx[g_idx - 1]][1] = action self.visited[g_idx] = True self.index_buffer[g_idx] = preserve_idx.copy() if self.export_model: # export checkpoint print('# Pruning {}: ratio: {}, d_prime: {}'.format(self.cur_ind, action, d_prime)) self.strategy.append(action) # save action to strategy self.d_prime_list.append(d_prime) self.strategy_dict[self.prunable_idx[self.cur_ind]][0] = action if self.cur_ind > 0: self.strategy_dict[self.prunable_idx[self.cur_ind - 1]][1] = action ## 通道号必须对上,需要弄懂作者的裁剪思路; # all the actions are made if self._is_final_layer(): assert len(self.strategy) == len(self.prunable_idx) current_flops = self._cur_flops() acc_t1 = time.time() acc = self._validate(self.val_loader, self.model) acc_t2 = time.time() self.val_time = acc_t2 - acc_t1 compress_ratio = current_flops * 1. / self.org_flops info_set = {'compress_ratio': compress_ratio, 'accuracy': acc, 'strategy': self.strategy.copy()} reward = self.reward(self, acc, current_flops) #获得总的reward,函数定义在rewards.py内的acc_reward if reward > self.best_reward: self.best_reward = reward self.best_strategy = self.strategy.copy() self.best_d_prime_list = self.d_prime_list.copy() prGreen('New best reward: {:.4f}, acc: {:.4f}, compress: {:.4f}'.format(self.best_reward, acc, compress_ratio)) prGreen('New best policy: {}'.format(self.best_strategy)) prGreen('New best d primes: {}'.format(self.best_d_prime_list)) obs = self.layer_embedding[self.cur_ind, :].copy() # actually the same as the last state done = True if self.export_model: # export state dict torch.save(self.model.state_dict(), self.export_path) return None, None, None, None return obs, reward, done, info_set info_set = None reward = 0 done = False self.visited[self.cur_ind] = True # set to visited self.cur_ind += 1 # the index of next layer # build next state (in-place modify) self.layer_embedding[self.cur_ind][-3] = self._cur_reduced() * 1. / self.org_flops # reduced self.layer_embedding[self.cur_ind][-2] = sum(self.flops_list[self.cur_ind + 1:]) * 1. / self.org_flops # rest self.layer_embedding[self.cur_ind][-1] = self.strategy[-1] # last action obs = self.layer_embedding[self.cur_ind, :].copy() return obs, reward, done, info_set
def step(self, action): action = self._action_wall(action) # percentage to preserve # viturally conduct the pruning process action = self.shrink_action(action, self.cur_ind) self.strategy.append(action) self.strategy_dict[self.prunable_idx[self.cur_ind]] = action if self._is_final_layer(): assert len(self.strategy) == len(self.prunable_idx) current_flops = self._cur_flops() acc_t1 = time.time() # mask_t1 = time self.model_masked = self.vgg_masked(self.strategy) acc = self._validate(self.val_loader, self.model_masked) self.acc_baseline[self.cur_beta_idx] = self.gama * ( self.acc_baseline[self.cur_beta_idx]) + (1 - self.gama) * acc acc_t2 = time.time() self.val_time = acc_t2 - acc_t1 compress_ratio = current_flops * 1. / self.org_flops info_set = { 'compress_ratio': compress_ratio, 'accuracy': acc, 'strategy': self.strategy.copy(), 'd_prime': self.d_prime_list.copy() } reward = (acc - self.acc_baseline[self.cur_beta_idx]) * 0.01 # reward = acc*0.01 # reward = self.reward(self, self.beta,acc, current_flops) if reward > self.best_reward[self.cur_beta_idx]: self.best_reward[self.cur_beta_idx] = reward self.best_strategy[self.cur_beta_idx] = self.strategy.copy() self.best_d_prime_list[ self.cur_beta_idx] = self.d_prime_list.copy() prGreen('best action for beta={}'.format(self.beta)) prGreen( 'New best reward: {:.4f}, acc: {:.4f}, compress: {:.4f}'. format(reward, acc, compress_ratio)) prGreen('New best policy: {}'.format(self.strategy)) prGreen('New best d primes: {}'.format(self.d_prime_list)) obs = self.layer_embedding[ self.cur_ind, :].copy() # actually the same as the last state done = True # if self.export_model: # export state dict # torch.save(self.model.state_dict(), self.export_path) # return None, None, None, None return obs, reward, done, info_set info_set = None reward = 0 done = False self.cur_ind += 1 # the index of next layer # build next state (in-place modify) self.layer_embedding[self.cur_ind][-4] = self._cur_reduced( ) * 1. / self.org_flops # reduced # if self._is_final_layer: # self.layer_embedding[self.cur_ind][-3] = 0.0 # rest # else: # self.layer_embedding[self.cur_ind][-3] = sum(self.flops_list[self.prunable_idx[self.cur_ind]+1:]) * 1. / self.org_flops # rest self.layer_embedding[self.cur_ind][-3] = self.following_changeable( self.prunable_idx[ self.cur_ind]) / self.org_flops # following changeable flops self.layer_embedding[self.cur_ind][-2] = self.strategy[ -1] # last action obs = self.layer_embedding[self.cur_ind, :].copy() return obs, reward, done, info_set