class TopKAccuracy(object): r"""Top K accuracy of an output. Counts a prediction as correct if the target value is in the top ``k`` predictions, false otherwise, and returns the number of correct instances relative to total instances (0.0 to 100.0). Args: topk (int, optional): The number of top predictions to consider. Default: ``1`` """ def __init__(self, topk=1): self.topk = topk self.reset() def __call__(self, output, target): """Computes the precision@k for the specified values of k Args: output (:obj:`torch.Tensor`): Predictions of a model target (:obj:`torch.Tensor`): Target labels Example: >>> m = nn.Softmax() >>> input = torch.randn(10, 50) >>> preds = m(input) >>> targets = torch.randint(0, 1, (10,50)) >>> topk = TopKAccuracy(5) >>> precision = topk(preds, targets) Returns: float """ batch_size = target.size(0) _, pred = output.topk(self.topk, 1, True, True) pred = pred.t() correct = pred.eq(target.view(1, -1).expand_as(pred)) correct_k = correct[:self.topk].view(-1).float().sum(0, keepdim=True) return correct_k.mul_(100.0 / batch_size) def reset(self): """Reset metric tracking stats""" self.top = AverageMeter() def update(self, prec, size): """Add new measurement to running stats""" self.top.update(prec, size) def average(self): """Average stats.""" return global_average(self.top.sum, self.top.count) @property def name(self): """str: Name of this metric.""" return "Prec@{}".format(self.topk)
def validation_round( session, validation_set_init_op, loss_op, metrics, batch_size, num_batches_per_epoch_for_validation, tracker, ): """ Handles one full iteration of validation on the whole validation set. Args: session (obj): The tensorflow session validation_set_init_op (obj): The trainset initialisation tf operation loss_op (obj): The tensorflow loss operation metrics (list): List of metrics to track batch_size (int): The batch size num_batches_per_epoch_for_validation (int): Maximum number of batches to validate for per epoch, default: `None` (all batches) tracker (`obj`:mlbench_core.utils.Tracker): Tracker object to use """ session.run(validation_set_init_op) tracker.validation() loss_meter = AverageMeter() metrics_meter = [AverageMeter() for _ in metrics] for i_batch in range(num_batches_per_epoch_for_validation): out = session.run({ "metrics": [m.metric_op for m in metrics], "loss": loss_op }) # Update tracker loss_meter.update(out["loss"], n=batch_size) for meter, o in zip(metrics_meter, out["metrics"]): meter.update(o, n=batch_size) logging.debug("{}/{} Validation loss={:10.3e} | metrics: [{}]".format( tracker.current_epoch, i_batch, loss_meter.avg, ",".join([format(m.avg, "10.3e") for m in metrics_meter]), )) tracker.record_loss(loss_meter.avg, log_to_api=True) if tracker.rank == 0: tracker.record_stat("global_loss", loss_meter.avg, log_to_api=True) for i, metric, meter in zip(range(len(metrics)), metrics, metrics_meter): tracker.record_metric(metric, meter.avg, log_to_api=True) if tracker.rank == 0: tracker.record_stat("global_{}".format(metric.name), meter.avg, log_to_api=True)
def train_one_epoch(self, tracker): """Train a model for an epoch and use tracker to log stats.""" logging.info("Initialize training dataset.") self.sess.run(self.train_set_init_op) loss_meter = AverageMeter() metrics_meter = [AverageMeter() for _ in self.metrics] for i_batch in range(self.num_batches_per_epoch_for_train): # for i_batch in range(1): tracker.batch_stats = [("start", time.time())] out = self.sess.run({ "metrics": [m['value'] for m in self.metrics], "loss": self.loss, "train_op": self.train_op, }) tracker.batch_stats.append(('end', time.time())) # Update tracker loss_meter.update(out["loss"], n=self.batch_size) for meter, o in zip(metrics_meter, out['metrics']): meter.update(o, n=self.batch_size) # Print logging information. logging.debug( "E{}:B{}/{} loss={:10.3e} | metrics: [{}] | best epoch {} ({:10.3e})" .format( tracker.current_epoch, i_batch, self.num_batches_per_epoch_for_train, loss_meter.avg, ",".join([format(m.avg, "10.3e") for m in metrics_meter]), tracker.best_epoch, tracker.best_epoch_value)) # Record training loss and metrics. tracker.cumu_time_train.append(tracker.batch_stats[-1][1] - tracker.batch_stats[0][1]) LogMetrics.log(self.run_id, self.rank, tracker.current_epoch, 'train_loss', loss_meter.avg, tracker=tracker, time=sum(tracker.cumu_time_train)) for metric, meter in zip(self.metrics, metrics_meter): LogMetrics.log(self.run_id, self.rank, tracker.current_epoch, 'train_' + metric['name'], meter.avg, tracker=tracker, time=sum(tracker.cumu_time_train)) logging.info("Finish training for one epoch.")
def _validate( dataloader, model, loss_function, metrics, dtype, transform_target_type=None, use_cuda=False, max_batch_per_epoch=None, ): """Evaluate the model on the test dataset. Args: dataloader (:obj:`torch.utils.data.DataLoader`): The validation set model (`obj`:torch.nn.Module): The model to train loss_function (`obj`:torch.nn.Module): The loss function metrics (list): List of metrics to track dtype (str): The datatype to use, one of `fp32`or `fp64` transform_target_type (str): Datatype to convert data to, default: `None` use_cuda (bool): Whether to use GPU for training, default: `False` max_batch_per_epoch (int): Maximum number of batches tot rain for per epoch, default: `None` (all batches) """ # Initialize the accumulators for loss and metrics losses = AverageMeter() for metric in metrics: metric.reset() # Each worker computer their own losses and metrics with torch.no_grad(): data_iter = iterate_dataloader( dataloader, dtype, max_batch_per_epoch, use_cuda, transform_target_type ) for data, target in data_iter: # Inference output = model(data) # Compute loss loss = loss_function(output, target) # Update loss losses.update(loss.item(), data.size(0)) # Update metrics for metric in metrics: metric_value = metric(loss, output, target) metric.update(metric_value, data.size(0)) # Aggregate metrics and loss for all workers metrics_averages = {metric: metric.average().item() for metric in metrics} loss_average = global_average(losses.sum, losses.count).item() return metrics_averages, loss_average
def valid_one_epoch(self, tracker): self.sess.run(self.validation_set_init_op) loss_meter = AverageMeter() metrics_meter = [AverageMeter() for _ in self.metrics] for i_batch in range(self.num_batches_per_epoch_for_validation): out = self.sess.run({ "metrics": [m['value'] for m in self.metrics], "loss": self.loss }) # Update tracker loss_meter.update(out["loss"], n=self.batch_size) for meter, o in zip(metrics_meter, out['metrics']): meter.update(o, n=self.batch_size) logging.debug( "{}/{} Validation loss={:10.3e} | metrics: [{}]".format( tracker.current_epoch, i_batch, loss_meter.avg, ",".join([format(m.avg, "10.3e") for m in metrics_meter]))) LogMetrics.log(self.run_id, self.rank, tracker.current_epoch, 'val_loss', loss_meter.avg, tracker=tracker, time=sum(tracker.cumu_time_train)) for i, metric, meter in zip(range(len(self.metrics)), self.metrics, metrics_meter): metric_name = 'val_' + metric['name'] # Here we implicitly assume the larger metrics value means better results if ((i == 0) and (len([r for r in tracker.records if r['name'] == metric_name]) == 0 or meter.avg > max([ float(r.value) for r in tracker.records if r['name'] == metric_name ]))): tracker.best_epoch = tracker.current_epoch tracker.best_epoch_value = meter.avg LogMetrics.log(self.run_id, self.rank, tracker.current_epoch, metric_name, meter.avg, tracker=tracker, time=sum(tracker.cumu_time_train))
def validation_round( val_loader, metrics, model, loss_func, iter_size, translator, tracker=None, use_cuda=False, ): # Set tracker and model in eval mode model.eval() if tracker: tracker.validation() tracker.validation_start() losses = AverageMeter() # Reset metrics for metric in metrics: metric.reset() with torch.no_grad(): for (data, target) in val_loader: data, target = prepare_batch(data, target, use_cuda=use_cuda) output = compute_model_output(model, data, target) # Compute loss loss, loss_per_token = compute_loss(data, target, output, loss_func, iter_size) # Update loss losses.update(loss_per_token, 1) # Update metrics translated, targets = translator.translate(data, target) for metric in metrics: metric_value = metric(translated, targets) size = data[0].shape[1] metric.update(metric_value, size) metrics_averages = {metric: metric.average().item() for metric in metrics} loss_average = global_average(losses.sum, losses.count).item() if tracker: tracker.validation_end() return metrics_averages, loss_average
class MLBenchMetric(object): def __init__(self): self.average_meter = AverageMeter() @abstractmethod def __call__(self, loss, output, target): pass def reset(self): self.average_meter = AverageMeter() def update(self, perc, size): self.average_meter.update(perc, size) def average(self): return global_average(self.average_meter.sum, self.average_meter.count)
def validation_round(loader, metrics, criterion, translator, tracker, use_cuda=False): """Performs one round of validation for the Transformer model Args: loader (:obj:`torch.utils.data.DataLoader`): Data loader metrics (list): List of metrics for evaluation criterion (:obj:`torch.nn.Module): Loss function translator (:obj:`mlbench_core.models.pytorch.transformer.SequenceGenerator`): Translator module tracker (:obj:`mlbench_core.utils.Tracker`): Current Tracker use_cuda (bool): Use GPU acceleration. Default: `False`. Returns: (dict of :obj:`mlbench_core.evaluation.pytorch.MLBenchMetric`: float, float): The metrics averages over all workers, and the loss average. """ model = translator.model model.eval() tracker.validation() tracker.validation_start() losses = AverageMeter() for metric in metrics: metric.reset() with torch.no_grad(): for batch in loader: batch = prepare_batch(batch, use_cuda=use_cuda) output = model(**batch["net_input"]) loss, sample_size = compute_loss(batch, output, criterion) losses.update(loss.item() / sample_size, 1) translated, targets = translator.translate_batch(batch) for metric in metrics: metric_value = metric(translated, targets) size = batch["target"].size(0) # Number of translated sentences metric.update(metric_value, size) metric_averages = {metric: metric.average().item() for metric in metrics} loss_average = global_average(losses.sum, losses.count) tracker.validation_end() return metric_averages, loss_average
def validate(self, dataloader): r"""Validate the quality of the model in terms of loss and metrics. Args: dataloader (:obj:`torch.utils.data.DataLoader`): The validation set """ # Turn on evaluation mode for the model self.model.eval() # Initialize the accumulators for loss and metrics losses = AverageMeter() for metric in self.metrics: metric.reset() # Each worker computer their own losses and metrics with torch.no_grad(): data_iter = iterate_dataloader(dataloader, self.dtype, self.max_batch_per_epoch, self.use_cuda, self.transform_target_type) for data, target in data_iter: # Inference output = self.model(data) # Compute loss loss = self.loss_function(output, target) # Update loss losses.update(loss.item(), data.size(0)) # Update metrics for metric in self.metrics: metric_value = metric(output, target) metric.update(metric_value, data.size(0)) # Aggregate metrics and loss for all workers metrics_averages = { metric.name: metric.average().item() for metric in self.metrics } loss_average = global_average(losses.sum, losses.count).item() return metrics_averages, loss_average
def validation_round(loader, metrics, criterion, translator, tracker=None, use_cuda=False): model = translator.model model.eval() if tracker: tracker.validation() tracker.validation_start() losses = AverageMeter() for metric in metrics: metric.reset() with torch.no_grad(): for batch in loader: batch = prepare_batch(batch, use_cuda=use_cuda) output = model(**batch["net_input"]) loss, sample_size = compute_loss(batch, output, criterion) losses.update(loss.item() / sample_size, 1) translated, targets = translator.translate_batch(batch) for metric in metrics: metric_value = metric(loss.item(), translated, targets) size = batch["target"].size( 0) # Number of translated sentences metric.update(metric_value, size) metric_averages = {metric: metric.average().item() for metric in metrics} loss_average = global_average(losses.sum, losses.count) if tracker: tracker.validation_end() return metric_averages, loss_average
def train_round( session, train_set_init_op, train_op, loss_op, metrics, batch_size, num_batches_per_epoch_for_train, tracker, lr_scheduler_level=None, lr_tensor=None, ): """ Performs num_batches_per_epoch_for_train batches of training (or full trainset if not specified) Args: session (obj): The tensorflow session train_set_init_op (obj): The trainset initialisation tf operation train_op (obj): The tensorflow training operation loss_op (obj): The tensorflow loss operation metrics (list): List of metrics to track batch_size (int): The batch size num_batches_per_epoch_for_train (int): Maximum number of batches tot rain for per epoch, default: `None` (all batches) tracker (`obj`:mlbench_core.utils.Tracker): Tracker object to use lr_scheduler_level (str): Learning Rate scheduler mode, one of `batch` or `epoch` lr_tensor (obj): The learningrate schedule tensorflow operation """ logging.info("Initialize training dataset.") session.run(train_set_init_op) tracker.train() loss_meter = AverageMeter() metrics_meter = [AverageMeter() for _ in metrics] if lr_scheduler_level == "epoch" and lr_tensor is not None: lr = session.run(lr_tensor) logging.debug("Epoch {} Learning Rate : {:10.3e}".format( tracker.current_epoch, lr)) for i_batch in range(num_batches_per_epoch_for_train): # for i_batch in range(1): tracker.batch_start() if lr_scheduler_level == "batch" and lr_tensor is not None: lr = session.run(lr_tensor) logging.debug("Epoch {} Learning Rate : {:10.3e}".format( tracker.current_epoch, lr)) out = session.run({ "metrics": [m.metric_op for m in metrics], "loss": loss_op, "train_op": train_op, }) tracker.batch_end() # Update tracker loss_meter.update(out["loss"], n=batch_size) tracker.record_loss(loss_meter.avg, log_to_api=True) for metric, meter, o in zip(metrics, metrics_meter, out["metrics"]): meter.update(o, n=batch_size) tracker.record_metric(metric, meter.avg, log_to_api=True) # Print logging information. progress = i_batch / num_batches_per_epoch_for_train progress += tracker.current_epoch status = "Epoch {:5.2f} Batch {:4}: ".format(progress, i_batch) logging.info(status + str(tracker)) # Record training loss and metrics. tracker.record_loss(loss_meter.avg, log_to_api=True) for metric, meter in zip(metrics, metrics_meter): tracker.record_metric(metric, meter.avg, log_to_api=True) logging.info("Finish training for one epoch.")
def validation_round( dataloader, model, loss_function, metrics, dtype, tracker=None, transform_target_type=False, use_cuda=False, max_batches=None, ): """Evaluate the model on the test dataset. Args: dataloader (`obj`:torch.utils.data.DataLoader): The validation set model (`obj`:torch.nn.Module): The model to train loss_function (`obj`:torch.nn.Module): The loss function metrics (list): List of metrics to track dtype (str): The datatype to use, one of `fp32`or `fp64` tracker (`obj`:mlbench_core.utils.Tracker | None): Tracker object to use. transform_target_type (bool): Convert target to `dtype`. Default `False` use_cuda (bool): Whether to use GPU for training, default: `False` max_batches (int | None): Maximum number of batches to validate on Returns: (dict, float): Dictionary of average of each metric, and average validation loss """ model.eval() if tracker: tracker.validation() tracker.validation_start() # Initialize the accumulators for loss and metrics losses = AverageMeter() for metric in metrics: metric.reset() # Each worker computer their own losses and metrics with torch.no_grad(): data_iter = iterate_dataloader(dataloader, dtype, max_batches, use_cuda, transform_target_type) for data, target in data_iter: output = model(data) # Compute loss loss = loss_function(output, target) # Update loss losses.update(loss.item(), data.size(0)) # Update metrics for metric in metrics: metric_value = metric(output, target) metric.update(metric_value, data.size(0)) # Aggregate metrics and loss for all workers metrics_averages = {metric: metric.average().item() for metric in metrics} loss_average = global_average(losses.sum, losses.count).item() if tracker: tracker.validation_end() return metrics_averages, loss_average
def validation_round( val_set, model, batch_size, metrics, loss_function, tracker, use_cuda=False ): """Performs a validation round Args: val_set (:obj:): Validation set model (:obj:`torch.nn.Module`): Model to evaluate batch_size (int): Validation batch size metrics (list): List of metrics to compute loss_function (:obj:`torch.nn.Module`): Loss function tracker (:obj:`mlbench_core.utils.Tracker`): Tracker object use_cuda (bool): Use GPU acceleration Returns: (dict, float): Metric averages and total loss average """ # finish one epoch training and to decide if we want to val our model. tracker.validation() tracker.validation_start() # each worker finish one epoch training. model.eval() losses = AverageMeter() for metric in metrics: metric.reset() # Each worker computer their own losses and metrics with torch.no_grad(): hidden = model.init_hidden(batch_size) num_batches = val_set.num_batches() for batch_idx in range(num_batches): data, target = val_set.get_batch(batch_idx, cuda=use_cuda) batch_seq_len = data.size(0) # Inference output, hidden = model(data, hidden) # Compute loss loss = loss_function(output, target) # Update loss losses.update(loss.item(), batch_seq_len) hidden = repackage_hidden(hidden) # Update metrics for metric in metrics: metric_value = metric(output, target) metric.update(metric_value, 1) # Aggregate metrics and loss for all workers loss_average = global_average(losses.sum, losses.count) metrics_averages = { metric: torch.exp(loss_average).item() if metric.name == "Perplexity" else metric.average().item() for metric in metrics } logger.info( "Got loss {}, avg metric={}".format( loss_average, [m.average().item() for m in metrics if m.name == "Perplexity"][0], ) ) tracker.validation_end() return metrics_averages, loss_average.item()