def evaluate_process(self, loader: paddle.io.DataLoader) -> dict: self.model.eval() avg_loss = num_samples = 0 sum_metrics = defaultdict(int) avg_metrics = defaultdict(int) for batch_idx, batch in enumerate(loader): result = self.validation_step(batch, batch_idx) loss = result.get('loss', None) metrics = result.get('metrics', {}) bs = batch[0].shape[0] num_samples += bs if loss: avg_loss += loss.numpy()[0] * bs for metric, value in metrics.items(): sum_metrics[metric] += value.numpy()[0] * bs # print avg metrics and loss print_msg = '[Evaluation result]' if loss: avg_loss /= num_samples print_msg += ' avg_loss={:.4f}'.format(avg_loss) for metric, value in sum_metrics.items(): avg_metrics[metric] = value / num_samples print_msg += ' avg_{}={:.4f}'.format(metric, avg_metrics[metric]) logger.eval(print_msg) if loss: return {'loss': avg_loss, 'metrics': avg_metrics} return {'metrics': avg_metrics}
def evaluate(self, eval_dataset: paddle.io.Dataset, batch_size: int = 1, num_workers: int = 0): ''' Run evaluation and returns metrics. Args: eval_dataset(paddle.io.Dataset) : The validation dataset batch_size(int) : Batch size of per step, default is 1. num_workers(int) : Number of subprocess to load data, default is 0. ''' batch_sampler = paddle.io.DistributedBatchSampler( eval_dataset, batch_size=batch_size, shuffle=False, drop_last=False) loader = paddle.io.DataLoader( eval_dataset, batch_sampler=batch_sampler, num_workers=num_workers, return_list=True) self.model.eval() avg_loss = num_samples = 0 sum_metrics = defaultdict(int) avg_metrics = defaultdict(int) with logger.processing('Evaluation on validation dataset'): for batch_idx, batch in enumerate(loader): result = self.validation_step(batch, batch_idx) loss = result.get('loss', None) metrics = result.get('metrics', {}) bs = batch[0].shape[0] num_samples += bs if loss: avg_loss += loss.numpy()[0] * bs for metric, value in metrics.items(): sum_metrics[metric] += value.numpy()[0] * bs # print avg metrics and loss print_msg = '[Evaluation result]' if loss: avg_loss /= num_samples print_msg += ' avg_loss={:.4f}'.format(avg_loss) for metric, value in sum_metrics.items(): avg_metrics[metric] = value / num_samples print_msg += ' avg_{}={:.4f}'.format(metric, avg_metrics[metric]) logger.eval(print_msg) if loss: return {'loss': avg_loss, 'metrics': avg_metrics} return {'metrics': avg_metrics}
def _default_eval_end_event(self, run_states: List[RunState]): ''' Paddlehub default handler for eval_end_event, it will complete visualization and metrics calculation Args: run_states (object): the results in eval phase ''' eval_scores, eval_loss, run_speed = self._calculate_metrics(run_states) if 'train' in self._envs: self.vdl_writer.add_scalar( tag='Loss_{}'.format(self.phase), value=eval_loss, step=self._envs['train'].current_step) log_scores = '' for metric in eval_scores: if 'train' in self._envs: self.vdl_writer.add_scalar( tag='{}_{}'.format(metric, self.phase), value=eval_scores[metric], step=self._envs['train'].current_step) log_scores += '{}={:.5f} '.format(metric, eval_scores[metric]) logger.eval('[{} dataset evaluation result] loss={:.5f} {}[step/sec: {:.2f}]'.format( self.phase, eval_loss, log_scores, run_speed)) eval_scores_items = eval_scores.items() if len(eval_scores_items): # The first metric will be chose to eval main_metric, main_value = list(eval_scores_items)[0] else: logger.warning('None of metrics has been implemented, loss will be used to evaluate.') # The larger, the better main_metric, main_value = 'negative loss', -eval_loss if self.phase in ['dev', 'val'] and main_value > self.best_score: self.best_score = main_value model_saved_dir = os.path.join(self.config.checkpoint_dir, 'best_model') logger.eval('best model saved to {} [best {}={:.5f}]'.format(model_saved_dir, main_metric, main_value)) self.save_inference_model(dirname=model_saved_dir)
def evaluate(self, eval_dataset: paddle.io.Dataset, batch_size: int = 1, num_workers: int = 0, collate_fn: Callable = None): ''' Run evaluation and returns metrics. Args: eval_dataset(paddle.io.Dataset) : The validation dataset batch_size(int) : Batch size of per step, default is 1. num_workers(int) : Number of subprocess to load data, default is 0. collate_fn(callable): function to generate mini-batch data by merging the sample list. None for only stack each fields of sample in axis 0(same as :attr::`np.stack(..., axis=0)`). Default None ''' if self.local_rank == 0: batch_sampler = paddle.io.BatchSampler(eval_dataset, batch_size=batch_size, shuffle=False, drop_last=False) loader = paddle.io.DataLoader(eval_dataset, batch_sampler=batch_sampler, num_workers=num_workers, return_list=True, collate_fn=collate_fn) self.model.eval() avg_loss = num_samples = 0 sum_metrics = defaultdict(int) avg_metrics = defaultdict(int) with logger.processing('Evaluation on validation dataset'): for batch_idx, batch in enumerate(loader): result = self.validation_step(batch, batch_idx) loss = result.get('loss', None) metrics = result.get('metrics', {}) bs = batch[0].shape[0] num_samples += bs if loss: avg_loss += loss.numpy()[0] * bs for metric, value in metrics.items(): sum_metrics[metric] += value * bs # print avg metrics and loss print_msg = '[Evaluation result]' if loss: avg_loss /= num_samples print_msg += ' avg_loss={:.4f}'.format(avg_loss) for metric, value in sum_metrics.items(): avg_metrics[metric] = value / num_samples print_msg += ' avg_{}={:.4f}'.format(metric, avg_metrics[metric]) logger.eval(print_msg) if loss: return {'loss': avg_loss, 'metrics': avg_metrics} return {'metrics': avg_metrics}
def train(self, train_dataset: paddle.io.Dataset, epochs: int = 1, batch_size: int = 1, num_workers: int = 0, eval_dataset: paddle.io.Dataset = None, log_interval: int = 10, save_interval: int = 10, collate_fn: Callable = None): ''' Train a model with specific config. Args: train_dataset(paddle.io.Dataset) : Dataset to train the model epochs(int) : Number of training loops, default is 1. batch_size(int) : Batch size of per step, default is 1. num_workers(int) : Number of subprocess to load data, default is 0. eval_dataset(paddle.io.Dataset) : The validation dataset, deafult is None. If set, the Trainer will execute evaluate function every `save_interval` epochs. log_interval(int) : Log the train infomation every `log_interval` steps. save_interval(int) : Save the checkpoint every `save_interval` epochs. collate_fn(callable): function to generate mini-batch data by merging the sample list. None for only stack each fields of sample in axis 0(same as :attr::`np.stack(..., axis=0)`). Default None ''' batch_sampler = paddle.io.DistributedBatchSampler( train_dataset, batch_size=batch_size, shuffle=True, drop_last=False) loader = paddle.io.DataLoader(train_dataset, batch_sampler=batch_sampler, num_workers=num_workers, return_list=True, use_buffer_reader=True, collate_fn=collate_fn) steps_per_epoch = len(batch_sampler) timer = Timer(steps_per_epoch * epochs) timer.start() for i in range(epochs): self.current_epoch += 1 avg_loss = 0 avg_metrics = defaultdict(int) self.model.train() for batch_idx, batch in enumerate(loader): loss, metrics = self.training_step(batch, batch_idx) self.optimizer_step(self.current_epoch, batch_idx, self.optimizer, loss) self.optimizer_zero_grad(self.current_epoch, batch_idx, self.optimizer) # calculate metrics and loss avg_loss += loss.numpy()[0] for metric, value in metrics.items(): if isinstance(value, paddle.Tensor): value = value.numpy() avg_metrics[metric] += value timer.count() if (batch_idx + 1) % log_interval == 0 and self.local_rank == 0: lr = self.optimizer.get_lr() avg_loss /= log_interval if self.use_vdl: self.log_writer.add_scalar(tag='TRAIN/loss', step=timer.current_step, value=avg_loss) print_msg = 'Epoch={}/{}, Step={}/{}'.format( self.current_epoch, epochs, batch_idx + 1, steps_per_epoch) print_msg += ' loss={:.4f}'.format(avg_loss) for metric, value in avg_metrics.items(): value /= log_interval if self.use_vdl: self.log_writer.add_scalar( tag='TRAIN/{}'.format(metric), step=timer.current_step, value=value) if isinstance(value, np.ndarray): value = value.item() print_msg += ' {}={:.4f}'.format(metric, value) print_msg += ' lr={:.6f} step/sec={:.2f} | ETA {}'.format( lr, timer.timing, timer.eta) logger.train(print_msg) avg_loss = 0 avg_metrics = defaultdict(int) if self.current_epoch % save_interval == 0 and batch_idx + 1 == steps_per_epoch and self.local_rank == 0: if eval_dataset: result = self.evaluate(eval_dataset, batch_size, num_workers, collate_fn=collate_fn) eval_loss = result.get('loss', None) eval_metrics = result.get('metrics', {}) if self.use_vdl: if eval_loss: self.log_writer.add_scalar( tag='EVAL/loss', step=timer.current_step, value=eval_loss) for metric, value in eval_metrics.items(): self.log_writer.add_scalar( tag='EVAL/{}'.format(metric), step=timer.current_step, value=value) if not self.best_metrics or self.compare_metrics( self.best_metrics, eval_metrics): self.best_metrics = eval_metrics best_model_path = os.path.join( self.checkpoint_dir, 'best_model') self.save_model(best_model_path) self._save_metrics() metric_msg = [ '{}={:.4f}'.format(metric, value) for metric, value in self.best_metrics.items() ] metric_msg = ' '.join(metric_msg) logger.eval( 'Saving best model to {} [best {}]'.format( best_model_path, metric_msg)) self._save_checkpoint()
def train(self, train_dataset: paddle.io.Dataset, epochs: int = 1, batch_size: int = 1, num_workers: int = 0, eval_dataset: paddle.io.Dataset = None, log_interval: int = 10, save_interval: int = 10): ''' Train a model with specific config. Args: train_dataset(paddle.io.Dataset) : Dataset to train the model epochs(int) : Number of training loops, default is 1. batch_size(int) : Batch size of per step, default is 1. num_workers(int) : Number of subprocess to load data, default is 0. eval_dataset(paddle.io.Dataset) : The validation dataset, deafult is None. If set, the Trainer will execute evaluate function every `save_interval` epochs. log_interval(int) : Log the train infomation every `log_interval` steps. save_interval(int) : Save the checkpoint every `save_interval` epochs. ''' batch_sampler, loader = self.init_train(train_dataset, batch_size, num_workers) steps_per_epoch = len(batch_sampler) timer = Timer(steps_per_epoch * epochs) timer.start() for i in range(epochs): loader.dataset.set_epoch(epochs) self.current_epoch += 1 self.train_one_epoch(loader, timer, self.current_epoch, epochs, log_interval, steps_per_epoch) # todo, why paddlehub put save, eval in batch? if self.current_epoch % save_interval == 0 and self.local_rank == 0: if eval_dataset: result = self.evaluate(eval_dataset, batch_size, num_workers) eval_loss = result.get('loss', None) eval_metrics = result.get('metrics', {}) if self.use_vdl: if eval_loss: self.log_writer.add_scalar(tag='EVAL/loss', step=timer.current_step, value=eval_loss) for metric, value in eval_metrics.items(): self.log_writer.add_scalar( tag='EVAL/{}'.format(metric), step=timer.current_step, value=value) if not self.best_metrics or self.compare_metrics( self.best_metrics, eval_metrics): self.best_metrics = eval_metrics best_model_path = os.path.join(self.checkpoint_dir, 'best_model') self.save_model(best_model_path) self._save_metrics() metric_msg = [ '{}={:.4f}'.format(metric, value) for metric, value in self.best_metrics.items() ] metric_msg = ' '.join(metric_msg) logger.eval('Saving best model to {} [best {}]'.format( best_model_path, metric_msg)) self._save_checkpoint()