def stats(self): ''' Return the dataset stats ''' metric_store = metrics.MetricStore(default_format_str='l') examples = metrics.Metric('Examples', metrics.format_int, 'g') examples.update(len(self)) metric_store.add(examples) if self.skipped: skipped = metrics.Metric('Skipped', metrics.format_percent, 'g') skipped.update(self.skipped, self.skipped + len(self)) metric_store.add(skipped) vocab_size = metrics.Metric('Vocab Size', metrics.format_int, 'g') vocab_size.update(self.vocab_size) metric_store.add(vocab_size) input_lengths, target_lengths = zip(*[(len(d['input']), len(d['target'])) for d in self.data]) input_length = metrics.Metric('Input Length', metrics.format_int, 'l(max)') input_length.updates(input_lengths) metric_store.add(input_length) target_length = metrics.Metric('Target Length', metrics.format_int, 'l(max)') target_length.updates(target_lengths) metric_store.add(target_length) return metric_store
def reset_metrics(self): """ Initialize the metrics """ self.metric_store = metrics.MetricStore() self.metric_store.add(metrics.Metric("ppl", "format_dynamic_float")) self.metric_store.add(metrics.Metric("ntok", "format_int", "a")) self.metric_store.add(metrics.Metric("nll", "format_float")) self.metric_store.add(metrics.Metric("oom", "format_int", "t"))
def __init__(self, config, model, dataloader, device): self.model = model self.config = config self.device = device self.stopped_early = False self.dataloader = dataloader self.validation_dataloader = dataloader self.last_checkpoint_time = time.time() if 'cuda' in device.type: self.model = nn.DataParallel(model.cuda()) self.optimizer = optim.Adam(model.parameters(), config.base_lr, betas=(0.9, 0.98), eps=1e-9) if config.lr_scheduler == 'warmup': self.lr_scheduler = LambdaLR( self.optimizer, WarmupLRSchedule( config.warmup_steps ) ) elif config.lr_scheduler == 'linear': self.lr_scheduler = LambdaLR( self.optimizer, LinearLRSchedule( config.base_lr, config.final_lr, config.max_steps ) ) elif config.lr_scheduler == 'exponential': self.lr_scheduler = ExponentialLR( self.optimizer, config.lr_decay ) else: raise ValueError('Unknown learning rate scheduler!') # Initialize the metrics metrics_path = os.path.join(self.config.checkpoint_directory, 'train_metrics.pt') self.metric_store = metrics.MetricStore(metrics_path) self.metric_store.add(metrics.Metric('oom', metrics.format_int, 't')) self.metric_store.add(metrics.Metric('nll', metrics.format_float, max_history=1000)) self.metric_store.add(metrics.Metric('lr', metrics.format_scientific, 'g', max_history=1)) self.metric_store.add(metrics.Metric('num_tok', metrics.format_int, 'a', max_history=1000)) if self.config.early_stopping: self.metric_store.add(metrics.Metric('vnll', metrics.format_float, 'g')) self.modules = { 'model': model, 'optimizer': self.optimizer, 'lr_scheduler': self.lr_scheduler }
def evaluate_epoch(self, epoch, experiment, stats_file, verbose=0): ''' Evaluate a single epoch ''' neg_log_likelihood = metrics.Metric('nll', metrics.format_float) def get_description(): mode_name = 'Test' if self.dataset.split == 'test' else 'Validate' description = f'{mode_name} #{epoch}' if verbose > 0: description += f' {neg_log_likelihood}' if verbose > 1: description += f' [{profile.mem_stat_string(["allocated"])}]' return description batches = tqdm( self.dataloader, unit='batch', dynamic_ncols=True, desc=get_description(), file=sys.stdout # needed to make tqdm_wrap_stdout work ) with tqdm_wrap_stdout(): for batch in batches: # run the data through the model batches.set_description_str(get_description()) nll, length, stats = self.evaluate(batch) self.update_stats(stats, self.stats, self.count) if length: neg_log_likelihood.update(nll / length) experiment.log_metric('nll', neg_log_likelihood.average) self.save_stats(stats_file) return neg_log_likelihood.average
def setUp(self): """Initialises a mocker and a Metric object to be tested.""" self.mock = mock.Mock() self.test = metrics.Metric() self.config = configurator.Config()
def _initialize_metrics(self): """ Initialize the metrics """ self.metric_store = metrics.MetricStore() self.metric_store.add( metrics.Metric("lr", "format_scientific", "g", max_history=1)) self.metric_store.add( metrics.Metric("ppl", "format_dynamic_float", max_history=1000)) self.metric_store.add( metrics.Metric("ntok", "format_int", "a", max_history=1000)) self.metric_store.add(metrics.Metric("oom", "format_int", "t")) self.metric_store.add( metrics.Metric("nll", "format_float", max_history=1000)) self.experiment = initialize_experiment(self.args, ("data", "model", "optim"), self.args.experiment_name)
def evaluate_epoch(self, epoch, experiment, verbose=0): ''' Evaluate a single epoch ''' neg_log_likelihood = metrics.Metric('nll', metrics.format_float) def get_description(): mode_name = 'Test' if self.config.split == 'test' else 'Validate' description = f'{mode_name} #{epoch}' if verbose > 0: description += f' {neg_log_likelihood}' if verbose > 1: description += f' [{profile.mem_stat_string(["allocated"])}]' return description batches = tqdm( self.dataloader, unit='batch', dynamic_ncols=True, desc=get_description(), file=sys.stdout # needed to make tqdm_wrap_stdout work ) batches = [batch for batch in batches] batches_ = torch.cat(batches, dim=0) batches = [] total_len = batches_.shape[0] seq_len = self.config.batch_length tgt_len = self.config.target_length lid, rid = 0, seq_len while True: batches.append((lid, rid)) lid += tgt_len rid += tgt_len if rid >= total_len: break nll_sum, num_tok_sum = 0, 0 for batch in tqdm(batches): lid, rid = batch[0], batch[1] batch = batches_[lid:rid] if type(batch) is not torch.Tensor: batch = ([b.squeeze(0) for b in batch]) else: batch = (batch, ) nll, num_tok = self.evaluate(batch, tgt_len=tgt_len) nll_sum += nll num_tok_sum += num_tok nll = nll_sum / num_tok_sum ppl = np.exp(nll) bpc = nll / math.log(2) print("nll {:5} ppl {:5} bpc: {:5}".format(nll, ppl, bpc)) return nll_sum / num_tok_sum
def evaluate_epoch(self, epoch, experiment, verbose=0): ''' Evaluate a single epoch ''' neg_log_likelihood = metrics.Metric('nll', metrics.format_float) def get_description(): mode_name = 'Test' if self.config.split == 'test' else 'Validate' description = f'{mode_name} #{epoch}' if verbose > 0: description += f' {neg_log_likelihood}' if verbose > 1: description += f' [{profile.mem_stat_string(["allocated"])}]' return description assert self.config.return_rank is True data_dir = self.dataloader.dataset.data_dir fold = self.dataloader.dataset.split with open(os.path.join(data_dir, f'{fold}.txt'), 'r') as f: lines = f.readlines() max_len = max(len(l.split()) for l in lines) batches = [] tok2id = self.dataloader.dataset.tok2id for line in lines: l_sp = line.strip().split() data = torch.tensor([tok2id[w] if w in tok2id else tok2id['<unk>'] for w in l_sp + ['<pad>']*(max_len - len(l_sp))]) batches.append((data, len(l_sp))) res_dict = {} nll_sum, num_tok_sum = 0, 0 acc_tok_sum = 0 num_example = 0 pred = [] for bi, (batch, b_len) in enumerate(tqdm(batches)): batch = batch[:, None] nll, num_tok, rank = self.evaluate(batch) nll_sum += nll num_tok_sum += num_tok if rank[b_len-2] == 0: acc_tok_sum += 1 pred.append(1) else: pred.append(0) num_example += 1 nll = nll_sum / num_tok_sum ppl = np.exp(nll) print(acc_tok_sum / num_example) print(ppl) with open(os.path.join(self.config.checkpoint_directory, f'lambada-acc-{fold}.txt'), 'w') as f: for p in pred: f.write(f'{p}\n')
def __init__(self, connection, machine_id): """Initialises the thread and packet info which will be sent to the RabbitMQ queue.""" threading.Thread.__init__(self) self.r_handler = reader.Reader() self.m_handler = metrics.Metric() self.rabbit_connection = connection self.packet = {'ID': machine_id} send_time = int(self.r_handler.get_c_value()[0]) time.sleep(send_time)
def stats(self): ''' Return the dataset stats ''' metric_store = super(AnnotatedTextDataset, self).stats if self.annotation is TextAnnotation.NONE or self.split == 'train': return metric_store spans = metrics.Metric('Constituent Spans', metrics.format_float, 'l(max)') for datum in self.data: _, target_spans = self.segmenters[-1](datum['target_annotation']) if target_spans: spans.updates(target_spans) metric_store.add(spans) return metric_store
def evaluate(epoch, test_len, batch_size, x_test, y_test): predictions = np.array([], dtype=np.uint8) scores = None for i in range(0, test_len, batch_size): x_batch = x_test[i:i + batch_size] y_batch = y_test[i:i + batch_size] pre_xo = sess.run(one_hot_x, feed_dict={pre_x: x_batch}) x_batch = pre_xo.reshape(x_batch.shape[0], max_len, vocab_size, 1) y_batch = sess.run(one_hot_y, feed_dict={pre_y: y_batch}) preds, scr = eval_step(x_batch, y_batch) predictions = np.concatenate([predictions, preds]) if scores is not None: scores = np.concatenate([scores, scr]) else: scores = scr m = metrics.Metric(y_test, predictions, classes=classes) accuracies.append( [epoch, m.accuracy_M, m.accuracy_m, m.accuracy]) return predictions, scores
def __init__(self, config, model, dataloader, device, valid_dataloader=None, clip=0.25): self.model = model self.config = config self.device = device self.stopped_early = False self.clip = clip self.dataloader = dataloader self.validation_dataloader = valid_dataloader self.last_checkpoint_time = time.time() if 'cuda' in device.type: self.model = nn.DataParallel(model.cuda()) if torch.cuda.device_count() == 1 else NewDataParallel(config.bsz_gpu0, model.cuda()) if self.config.optimizer == "adam": self.optimizer = optim.Adam(model.parameters(), config.base_lr, betas=(config.beta_1, config.beta_2), eps=1e-08) # for transformer if config.lr_scheduler == 'warmup': self.lr_scheduler = LambdaLR( self.optimizer, WarmupLRSchedule( config.warmup_steps ) ) elif config.lr_scheduler == 'linear': self.lr_scheduler = LambdaLR( self.optimizer, LinearLRSchedule( config.base_lr, config.final_lr, config.max_steps ) ) elif config.lr_scheduler == "cosine": self.lr_scheduler = CosineAnnealingLR(self.optimizer, config.max_steps, eta_min=config.final_lr) elif config.lr_scheduler == 'cyclic': self.lr_scheduler = CyclicLR(self.optimizer, cycle_momentum=False, base_lr=1e-7, max_lr=config.base_lr, step_size_up=4000, step_size_down=12000) elif config.lr_scheduler == 'customize': self.lr_scheduler = CosineAnnealingLR(self.optimizer, config.max_steps, eta_min=config.final_lr) else: raise ValueError('Unknown learning rate scheduler!') elif self.config.optimizer == "sgd": print("using sgd optimizer") self.optimizer = optim.SGD(model.parameters(), lr=config.base_lr, momentum=0.99) self.lr_scheduler = CosineAnnealingLR(self.optimizer, config.max_steps, eta_min=config.final_lr) else: raise ValueError('Unknown optimizer!') # Initialize the metrics metrics_path = os.path.join(self.config.checkpoint_directory, 'train_metrics.pt') self.metric_store = metrics.MetricStore(metrics_path) self.metric_store.add(metrics.Metric('oom', metrics.format_int, 't')) self.metric_store.add(metrics.Metric('nll', metrics.format_float, max_history=1000)) self.metric_store.add(metrics.Metric('ppl', metrics.format_float, max_history=1000)) self.metric_store.add(metrics.Metric('lr', metrics.format_scientific, 'g', max_history=1)) self.metric_store.add(metrics.Metric('num_tok', metrics.format_int, 'a', max_history=1000)) if self.config.early_stopping: self.metric_store.add(metrics.Metric('vnll', metrics.format_float, 'g')) self.modules = { 'model': model, 'optimizer': self.optimizer, 'lr_scheduler': self.lr_scheduler } self.step = 0
def __call__(self): """ Run the training! """ # Must be called first self.try_init_amp() model = self.modules["model"] optimizer = self.modules["optimizer"] scheduler = self.modules["scheduler"] if self.args.optim.use_gradient_checkpointing: model.enable_gradient_checkpointing() model = nn.DataParallel(model) dataloader = get_dataloader( self.args.data, self.dataset, num_devices=len(model.device_ids), shuffle=True, ) def get_description(): return f"Train {self.metric_store}" max_steps = self.args.optim.max_steps accumulation_steps = self.args.optim.gradient_accumulation_steps progress = tqdm( unit="step", initial=self.step, dynamic_ncols=True, desc=get_description(), total=max_steps, file=sys.stdout, # needed to make tqdm_wrap_stdout work ) with ExitStack() as stack: # pylint:disable=no-member stack.enter_context(tqdm_wrap_stdout()) stack.enter_context(chunked_scattering()) stack.enter_context(self.experiment.train()) # pylint:enable=no-member if self.args.optim.early_stopping: # If using early stopping, must evaluate regularly to determine # if training should stop early, so setup an Evaluator eval_args = copy.deepcopy(self.args) eval_args.data.batch_size = self.args.optim.eval_batch_size evaluator = Evaluator(eval_args) evaluator.model = model evaluator.load_dataset("validation") evaluator.initialize_experiment(experiment=self.experiment) # Make sure we are tracking validation nll self.metric_store.add( metrics.Metric("vnll", "format_float", "g(m)")) # And store a local variable for easy access vnll_metric = self.metric_store["vnll"] loss = 0 num_tokens = 0 for step, batch in enumerate(cycle(dataloader), 1): try: step_loss = self.compute_gradients_and_loss( batch, model, optimizer) run_optimizer = (step % accumulation_steps) == 0 if run_optimizer: # Run an optimization step optimizer.step() scheduler.step() # Update learning rate schedule model.zero_grad() # Update loss and num tokens after running an optimization # step, in case it results in an out of memory error loss += step_loss num_tokens += batch["num_tokens"] if run_optimizer: # Since we ran the optimizer, increment current step self.step += 1 self.experiment.set_step(self.step) progress.update() # update our metrics as well self.update_metrics( loss / accumulation_steps, num_tokens, scheduler.get_lr()[0], ) num_tokens = 0 loss = 0 # and finally check if we should save if (self.args.save_steps > 0 and self.step % self.args.save_steps == 0): # First save the current checkpoint self.save() # Then if we are implementing early stopping, see # if we achieved a new best if self.args.optim.early_stopping: evaluator.reset_metrics() with ExitStack() as eval_stack: # pylint:disable=no-member eval_stack.enter_context( tqdm_unwrap_stdout()) eval_stack.enter_context( release_cuda_memory( collect_tensors(optimizer.state))) # pylint:enable=no-member vnll = evaluator() vnll_metric.update(vnll) # Save the updated metrics self.save_metrics() if vnll == vnll_metric.min: self.on_new_best() # Try to combat OOM errors caused by doing evaluation # in the same loop with training. This manifests in out # of memory errors after the first or second evaluation # run. refresh_cuda_memory() if not self.prune_checkpoints(): logging.info("Stopping early") break if self.step >= max_steps: logging.info("Finished training") break except RuntimeError as rte: if "out of memory" in str(rte): self.metric_store["oom"].update(1) logging.warning(str(rte)) else: progress.close() raise rte progress.set_description_str(get_description()) progress.close()
db.x_train, db.y_train, db.x_test, db.y_test, db.vocab_size, db.max_len, db.classes, db.num_classes, options.architecture, options.activation_functions, options.widths, options.strides, options.dilations, options.feature_maps, get_optimizer(options.optimizer, options.learning_rate), options.l2, options.train_batch_size, options.test_batch_size, options.epochs, options.dropout) report_out += training_out model_data = {'y': db.y_test, 'scores': best_result[-1], 'classes': db.classes} pickle.dump(model_data, open('model_data_' + options.root.split('/')[-1] + '.p', 'wb')) m = metrics.Metric(labels, best_result[1], classes=db.classes, filename_prefix=options.prefix) classification_report = m.get_report() if options.verbose: print(classification_report) report_out += classification_report if options.save_graph: print("Saving graph") m.save_confusion_matrix(title=options.graph_title) if options.save_graph: print("Saving graph") m.save_learning_curve(accuracies, 'Learning Curve - ' + str(options.root.split('/')[-1]), acc_type=0)