def init_optimizer(self, ): # build optimizer in train mode if self.mode == 'train': steps_per_epoch = len(self.loader) self.lr = create('LearningRate')(steps_per_epoch) self.optimizer = create('OptimizerBuilder')( self.lr, [self.model, self.distill_model])
def build_slim_model(cfg, mode='train'): assert cfg.slim == 'QAT', 'Only QAT is supported now' model = create(cfg.architecture) if mode == 'train': load_pretrain_weight(model, cfg.pretrain_weights) slim = create(cfg.slim) cfg['slim_type'] = cfg.slim # TODO: fix quant export model in framework. if mode == 'test' and cfg.slim == 'QAT': slim.quant_config['activation_preprocess_type'] = None cfg['model'] = slim(model) cfg['slim'] = slim if mode != 'train': load_pretrain_weight(cfg['model'], cfg.weights) return cfg
def __init__(self, cfg, mode='train'): self.cfg = cfg assert mode.lower() in ['train', 'eval', 'test'], \ "mode should be 'train', 'eval' or 'test'" self.mode = mode.lower() self.optimizer = None # init distillation config self.distill_model = None self.distill_loss = None # build data loader self.dataset = cfg['{}Dataset'.format(self.mode.capitalize())] if self.mode == 'train': self.loader = create('{}Reader'.format(self.mode.capitalize()))( self.dataset, cfg.worker_num) self.model = create(cfg.architecture) #normalize params for deploy self.model.load_meanstd(cfg['TestReader']['sample_transforms']) # EvalDataset build with BatchSampler to evaluate in single device if self.mode == 'eval': self._eval_batch_sampler = paddle.io.BatchSampler( self.dataset, batch_size=self.cfg.EvalReader['batch_size']) self.loader = create('{}Reader'.format(self.mode.capitalize()))( self.dataset, cfg.worker_num, self._eval_batch_sampler) # TestDataset build after user set images, skip loader creation here self._nranks = dist.get_world_size() self._local_rank = dist.get_rank() self.status = {} self.start_epoch = 0 self.end_epoch = 0 if 'epoch' not in cfg else cfg.epoch # initial default callbacks self._init_callbacks() # initial default metrics self._init_metrics() self._reset_metrics()
def build_teacher_model(config): model = create(config.architecture) if config.get('pretrain_weights', None): load_pretrain_weight(model, config.pretrain_weights) logger.debug("Load weights {} to start training".format( config.pretrain_weights)) if config.get('weights', None): load_pretrain_weight(model, config.weights) logger.debug("Load weights {} to start training".format( config.weights)) if config.get("freeze_parameters", True): for param in model.parameters(): param.trainable = False model.train() return model
def predict(self, images, draw_threshold=0.5, output_dir='output', save_txt=False): self.dataset.set_images(images) loader = create('TestReader')(self.dataset, 0) imid2path = self.dataset.get_imid2path() anno_file = self.dataset.get_anno() clsid2catid, catid2name = get_categories(self.cfg.metric, anno_file=anno_file) # Run Infer self.status['mode'] = 'test' self.model.eval() results = [] for step_id, data in enumerate(loader): self.status['step_id'] = step_id # forward outs = self.model(data) for key in ['im_shape', 'scale_factor', 'im_id']: outs[key] = data[key] for key, value in outs.items(): if hasattr(value, 'numpy'): outs[key] = value.numpy() results.append(outs) for outs in results: batch_res = get_infer_results(outs, clsid2catid) bbox_num = outs['bbox_num'] start = 0 for i, im_id in enumerate(outs['im_id']): image_path = imid2path[int(im_id)] image = Image.open(image_path).convert('RGB') image = ImageOps.exif_transpose(image) self.status['original_image'] = np.array(image.copy()) end = start + bbox_num[i] bbox_res = batch_res['bbox'][start:end] \ if 'bbox' in batch_res else None keypoint_res = batch_res['keypoint'][start:end] \ if 'keypoint' in batch_res else None image = visualize_results(image, bbox_res, keypoint_res, int(im_id), catid2name, draw_threshold) self.status['result_image'] = np.array(image.copy()) if self._compose_callback: self._compose_callback.on_step_end(self.status) # save image with detection save_name = self._get_save_image_name(output_dir, image_path) logger.info( "Detection bbox results save in {}".format(save_name)) image.save(save_name, quality=95) if save_txt: save_path = os.path.splitext(save_name)[0] + '.txt' results = {} results["im_id"] = im_id if bbox_res: results["bbox_res"] = bbox_res if keypoint_res: results["keypoint_res"] = keypoint_res save_result(save_path, results, catid2name, draw_threshold) start = end
def train(self, validate=False): assert self.mode == 'train', "Model not in 'train' mode" Init_mark = False model = self.model if self._nranks > 1: model = paddle.DataParallel(self.model, find_unused_parameters=self.cfg.get( "find_unused_parameters", False)) self.status.update({ 'epoch_id': self.start_epoch, 'step_id': 0, 'steps_per_epoch': len(self.loader) }) self.status['batch_time'] = stats.SmoothedValue(self.cfg.log_iter, fmt='{avg:.4f}') self.status['data_time'] = stats.SmoothedValue(self.cfg.log_iter, fmt='{avg:.4f}') self.status['training_staus'] = stats.TrainingStats(self.cfg.log_iter) self._compose_callback.on_train_begin(self.status) for epoch_id in range(self.start_epoch, self.cfg.epoch): self.status['mode'] = 'train' self.status['epoch_id'] = epoch_id self._compose_callback.on_epoch_begin(self.status) self.loader.dataset.set_epoch(epoch_id) model.train() iter_tic = time.time() for step_id, data in enumerate(self.loader): self.status['data_time'].update(time.time() - iter_tic) self.status['step_id'] = step_id self._compose_callback.on_step_begin(self.status) data['epoch_id'] = epoch_id # model forward outputs = model(data) if self.distill_model is not None: teacher_outputs = self.distill_model(data) distill_loss = self.distill_loss(outputs, teacher_outputs, data) loss = outputs['loss'] + teacher_outputs[ "loss"] + distill_loss else: loss = outputs['loss'] # model backward loss.backward() self.optimizer.step() curr_lr = self.optimizer.get_lr() self.lr.step() self.optimizer.clear_grad() self.status['learning_rate'] = curr_lr if self._nranks < 2 or self._local_rank == 0: loss_dict = {"loss": outputs['loss']} if self.distill_model is not None: loss_dict.update({ "loss_student": outputs['loss'], "loss_teacher": teacher_outputs["loss"], "loss_distill": distill_loss, "loss": loss }) self.status['training_staus'].update(loss_dict) self.status['batch_time'].update(time.time() - iter_tic) self._compose_callback.on_step_end(self.status) iter_tic = time.time() self._compose_callback.on_epoch_end(self.status) if validate and self._local_rank == 0 \ and ((epoch_id + 1) % self.cfg.snapshot_epoch == 0 \ or epoch_id == self.end_epoch - 1): print("begin to eval...") if not hasattr(self, '_eval_loader'): # build evaluation dataset and loader self._eval_dataset = self.cfg.EvalDataset self._eval_batch_sampler = \ paddle.io.BatchSampler( self._eval_dataset, batch_size=self.cfg.EvalReader['batch_size']) self._eval_loader = create('EvalReader')( self._eval_dataset, self.cfg.worker_num, batch_sampler=self._eval_batch_sampler) # if validation in training is enabled, metrics should be re-init # Init_mark makes sure this code will only execute once if validate and Init_mark == False: Init_mark = True self._init_metrics(validate=validate) self._reset_metrics() with paddle.no_grad(): self.status['save_best_model'] = True self._eval_with_loader(self._eval_loader) self._compose_callback.on_train_end(self.status)