def _prepare_distill(self): if self.distill_config.teacher_model == None: _logger.error( 'If you want to add distill, please input instance of teacher model' ) ### instance model by user can input super-param easily. assert isinstance(self.distill_config.teacher_model, Layer) # load teacher parameter if self.distill_config.teacher_model_path != None: param_state_dict, _ = paddle.load_dygraph( self.distill_config.teacher_model_path) self.distill_config.teacher_model.set_dict(param_state_dict) self.ofa_teacher_model = OFABase(self.distill_config.teacher_model) self.ofa_teacher_model.model.eval() # add hook if mapping layers is not None # if mapping layer is None, return the output of the teacher model, # if mapping layer is NOT None, add hook and compute distill loss about mapping layers. mapping_layers = getattr(self.distill_config, 'mapping_layers', None) if mapping_layers != None: if isinstance(self.model, DataParallel): for idx, name in enumerate(mapping_layers): if name[:7] != '_layers': mapping_layers[idx] = '_layers.' + name self._mapping_layers = mapping_layers self.netAs = [] for name, sublayer in self.model.named_sublayers(): if name in self._mapping_layers: if self.distill_config.mapping_op != None: if self.distill_config.mapping_op.lower() == 'conv2d': netA = SuperConv2D( getattr(sublayer, '_num_filters', sublayer._out_channels), getattr(sublayer, '_num_filters', sublayer._out_channels), 1) elif self.distill_config.mapping_op.lower( ) == 'linear': netA = SuperLinear( getattr(sublayer, '_output_dim', sublayer._out_features), getattr(sublayer, '_output_dim', sublayer._out_features)) else: raise NotImplementedError( "Not Support Op: {}".format( self.distill_config.mapping_op.lower())) else: netA = None if netA != None: self.netAs_param.extend(netA.parameters()) self.netAs.append(netA)
def _prepare_distill(self): self.Tacts, self.Sacts = {}, {} if self.distill_config.teacher_model == None: logging.error( 'If you want to add distill, please input instance of teacher model' ) ### instance model by user can input super-param easily. assert isinstance(self.distill_config.teacher_model, paddle.fluid.dygraph.Layer) # load teacher parameter if self.distill_config.teacher_model_path != None: param_state_dict, _ = paddle.load_dygraph( self.distill_config.teacher_model_path) self.distill_config.teacher_model.set_dict(param_state_dict) self.ofa_teacher_model = OFABase(self.distill_config.teacher_model) self.ofa_teacher_model.model.eval() # add hook if mapping layers is not None # if mapping layer is None, return the output of the teacher model, # if mapping layer is NOT None, add hook and compute distill loss about mapping layers. mapping_layers = getattr(self.distill_config, 'mapping_layers', None) if mapping_layers != None: self.netAs = [] for name, sublayer in self.model.named_sublayers(): if name in mapping_layers: netA = SuperConv2D(sublayer._num_filters, sublayer._num_filters, filter_size=1) self.netAs_param.extend(netA.parameters()) self.netAs.append(netA) def get_activation(mem, name): def get_output_hook(layer, input, output): mem[name] = output return get_output_hook def add_hook(net, mem, mapping_layers): for idx, (n, m) in enumerate(net.named_sublayers()): if n in mapping_layers: m.register_forward_post_hook(get_activation(mem, n)) add_hook(self.model, self.Sacts, mapping_layers) add_hook(self.ofa_teacher_model.model, self.Tacts, mapping_layers)
def _prepare_distill(self): self.Tacts, self.Sacts = {}, {} if self.distill_config.teacher_model == None: logging.error( 'If you want to add distill, please input instance of teacher model' ) ### instance model by user can input super-param easily. assert isinstance(self.distill_config.teacher_model, Layer) # load teacher parameter if self.distill_config.teacher_model_path != None: param_state_dict, _ = paddle.load_dygraph( self.distill_config.teacher_model_path) self.distill_config.teacher_model.set_dict(param_state_dict) self.ofa_teacher_model = OFABase(self.distill_config.teacher_model) self.ofa_teacher_model.model.eval() # add hook if mapping layers is not None # if mapping layer is None, return the output of the teacher model, # if mapping layer is NOT None, add hook and compute distill loss about mapping layers. mapping_layers = getattr(self.distill_config, 'mapping_layers', None) if mapping_layers != None: self.netAs = [] for name, sublayer in self.model.named_sublayers(): if name in mapping_layers: if self.distill_config.mapping_op != None: if self.distill_config.mapping_op.lower() == 'conv2d': netA = SuperConv2D( getattr(sublayer, '_num_filters', sublayer._out_channels), getattr(sublayer, '_num_filters', sublayer._out_channels), 1) elif self.distill_config.mapping_op.lower( ) == 'linear': netA = SuperLinear( getattr(sublayer, '_output_dim', sublayer._out_features), getattr(sublayer, '_output_dim', sublayer._out_features)) else: raise NotImplementedError( "Not Support Op: {}".format( self.distill_config.mapping_op.lower())) else: netA = None if netA != None: self.netAs_param.extend(netA.parameters()) self.netAs.append(netA) def get_activation(mem, name): def get_output_hook(layer, input, output): mem[name] = output return get_output_hook def add_hook(net, mem, mapping_layers): for idx, (n, m) in enumerate(net.named_sublayers()): if n in mapping_layers: m.register_forward_post_hook(get_activation(mem, n)) add_hook(self.model, self.Sacts, mapping_layers) add_hook(self.ofa_teacher_model.model, self.Tacts, mapping_layers)