def stats(self): ''' Return the dataset stats ''' metric_store = metrics.MetricStore(default_format_str='l') examples = metrics.Metric('Examples', metrics.format_int, 'g') examples.update(len(self)) metric_store.add(examples) if self.skipped: skipped = metrics.Metric('Skipped', metrics.format_percent, 'g') skipped.update(self.skipped, self.skipped + len(self)) metric_store.add(skipped) vocab_size = metrics.Metric('Vocab Size', metrics.format_int, 'g') vocab_size.update(self.vocab_size) metric_store.add(vocab_size) input_lengths, target_lengths = zip(*[(len(d['input']), len(d['target'])) for d in self.data]) input_length = metrics.Metric('Input Length', metrics.format_int, 'l(max)') input_length.updates(input_lengths) metric_store.add(input_length) target_length = metrics.Metric('Target Length', metrics.format_int, 'l(max)') target_length.updates(target_lengths) metric_store.add(target_length) return metric_store
def reset_metrics(self): """ Initialize the metrics """ self.metric_store = metrics.MetricStore() self.metric_store.add(metrics.Metric("ppl", "format_dynamic_float")) self.metric_store.add(metrics.Metric("ntok", "format_int", "a")) self.metric_store.add(metrics.Metric("nll", "format_float")) self.metric_store.add(metrics.Metric("oom", "format_int", "t"))
def __init__(self, config, model, dataloader, device): self.model = model self.config = config self.device = device self.stopped_early = False self.dataloader = dataloader self.validation_dataloader = dataloader self.last_checkpoint_time = time.time() if 'cuda' in device.type: self.model = nn.DataParallel(model.cuda()) self.optimizer = optim.Adam(model.parameters(), config.base_lr, betas=(0.9, 0.98), eps=1e-9) if config.lr_scheduler == 'warmup': self.lr_scheduler = LambdaLR( self.optimizer, WarmupLRSchedule( config.warmup_steps ) ) elif config.lr_scheduler == 'linear': self.lr_scheduler = LambdaLR( self.optimizer, LinearLRSchedule( config.base_lr, config.final_lr, config.max_steps ) ) elif config.lr_scheduler == 'exponential': self.lr_scheduler = ExponentialLR( self.optimizer, config.lr_decay ) else: raise ValueError('Unknown learning rate scheduler!') # Initialize the metrics metrics_path = os.path.join(self.config.checkpoint_directory, 'train_metrics.pt') self.metric_store = metrics.MetricStore(metrics_path) self.metric_store.add(metrics.Metric('oom', metrics.format_int, 't')) self.metric_store.add(metrics.Metric('nll', metrics.format_float, max_history=1000)) self.metric_store.add(metrics.Metric('lr', metrics.format_scientific, 'g', max_history=1)) self.metric_store.add(metrics.Metric('num_tok', metrics.format_int, 'a', max_history=1000)) if self.config.early_stopping: self.metric_store.add(metrics.Metric('vnll', metrics.format_float, 'g')) self.modules = { 'model': model, 'optimizer': self.optimizer, 'lr_scheduler': self.lr_scheduler }
def _initialize_metrics(self): """ Initialize the metrics """ self.metric_store = metrics.MetricStore() self.metric_store.add( metrics.Metric("lr", "format_scientific", "g", max_history=1)) self.metric_store.add( metrics.Metric("ppl", "format_dynamic_float", max_history=1000)) self.metric_store.add( metrics.Metric("ntok", "format_int", "a", max_history=1000)) self.metric_store.add(metrics.Metric("oom", "format_int", "t")) self.metric_store.add( metrics.Metric("nll", "format_float", max_history=1000)) self.experiment = initialize_experiment(self.args, ("data", "model", "optim"), self.args.experiment_name)
def __init__(self, config, model, dataloader, device, valid_dataloader=None, clip=0.25): self.model = model self.config = config self.device = device self.stopped_early = False self.clip = clip self.dataloader = dataloader self.validation_dataloader = valid_dataloader self.last_checkpoint_time = time.time() if 'cuda' in device.type: self.model = nn.DataParallel(model.cuda()) if torch.cuda.device_count() == 1 else NewDataParallel(config.bsz_gpu0, model.cuda()) if self.config.optimizer == "adam": self.optimizer = optim.Adam(model.parameters(), config.base_lr, betas=(config.beta_1, config.beta_2), eps=1e-08) # for transformer if config.lr_scheduler == 'warmup': self.lr_scheduler = LambdaLR( self.optimizer, WarmupLRSchedule( config.warmup_steps ) ) elif config.lr_scheduler == 'linear': self.lr_scheduler = LambdaLR( self.optimizer, LinearLRSchedule( config.base_lr, config.final_lr, config.max_steps ) ) elif config.lr_scheduler == "cosine": self.lr_scheduler = CosineAnnealingLR(self.optimizer, config.max_steps, eta_min=config.final_lr) elif config.lr_scheduler == 'cyclic': self.lr_scheduler = CyclicLR(self.optimizer, cycle_momentum=False, base_lr=1e-7, max_lr=config.base_lr, step_size_up=4000, step_size_down=12000) elif config.lr_scheduler == 'customize': self.lr_scheduler = CosineAnnealingLR(self.optimizer, config.max_steps, eta_min=config.final_lr) else: raise ValueError('Unknown learning rate scheduler!') elif self.config.optimizer == "sgd": print("using sgd optimizer") self.optimizer = optim.SGD(model.parameters(), lr=config.base_lr, momentum=0.99) self.lr_scheduler = CosineAnnealingLR(self.optimizer, config.max_steps, eta_min=config.final_lr) else: raise ValueError('Unknown optimizer!') # Initialize the metrics metrics_path = os.path.join(self.config.checkpoint_directory, 'train_metrics.pt') self.metric_store = metrics.MetricStore(metrics_path) self.metric_store.add(metrics.Metric('oom', metrics.format_int, 't')) self.metric_store.add(metrics.Metric('nll', metrics.format_float, max_history=1000)) self.metric_store.add(metrics.Metric('ppl', metrics.format_float, max_history=1000)) self.metric_store.add(metrics.Metric('lr', metrics.format_scientific, 'g', max_history=1)) self.metric_store.add(metrics.Metric('num_tok', metrics.format_int, 'a', max_history=1000)) if self.config.early_stopping: self.metric_store.add(metrics.Metric('vnll', metrics.format_float, 'g')) self.modules = { 'model': model, 'optimizer': self.optimizer, 'lr_scheduler': self.lr_scheduler } self.step = 0