def run_train_epoch_context(self, train_dataset_with_metadata, train_global_state: TrainGlobalState, populate_after=True, verbose=True): train_dataloader = self.get_train_dataloader( train_dataset_with_metadata=train_dataset_with_metadata, do_override_labels=True, verbose=verbose, ) for batch, batch_metadata in maybe_tqdm(train_dataloader, desc="Training", verbose=verbose): self.run_train_step( batch=batch, batch_metadata=batch_metadata, train_global_state=train_global_state, ) yield batch, train_global_state if populate_after: self.populate_llp_state( train_dataloader=train_dataloader, verbose=verbose, ) self.log_writer.write_entry( "populate_logs", combine_dicts([ populate_logs(llp_state=self.llp_state, llp_params=self.llp_params), { "epoch": train_global_state.epoch, }, ]))
def run_val(self, val_examples, verbose=True): if not self.rparams.local_rank == -1: return self.model.eval() val_dataloader = self.get_eval_dataloader(val_examples) total_eval_loss = 0 nb_eval_steps, nb_eval_examples = 0, 0 all_logits = [] for step, (batch, batch_metadata) in enumerate( maybe_tqdm(val_dataloader, desc="Evaluating (Val)", verbose=verbose)): batch = batch.to(self.device) with torch.no_grad(): logits = self.model.forward_batch(batch).logits tmp_eval_loss = self.loss_criterion(logits, batch.label_ids) logits = logits.detach().cpu().numpy() total_eval_loss += tmp_eval_loss.mean().item() nb_eval_examples += len(batch) nb_eval_steps += 1 all_logits.append(logits) eval_loss = total_eval_loss / nb_eval_steps all_logits = np.concatenate(all_logits, axis=0) return { "logits": all_logits, "loss": eval_loss, "metrics": evaluate.compute_task_metrics(self.task, all_logits, val_examples), }
def run_test(self, test_examples, verbose=True): test_dataloader = self.get_dataloader( examples=test_examples, batch_size=self.rparams.eval_batch_size, shuffle=False, verbose=True, ) self.model.eval() all_logits = [] for step, batch in enumerate( maybe_tqdm(test_dataloader, desc="Predictions (Test)", verbose=verbose)): batch = batch.to(self.device) with torch.no_grad(): logits = forward_batch_basic( model=self.model, batch=batch, omit_label_ids=True, )[0] logits = logits.detach().cpu().numpy() all_logits.append(logits) all_logits = np.concatenate(all_logits, axis=0) return all_logits
def populate_llp_state(self, train_dataloader, verbose=True): self.model.eval() with torch.no_grad(): for batch, metadata in maybe_tqdm(train_dataloader, desc="Populating big_m", verbose=verbose): batch = batch.to(self.device) embedding = self.model.forward_batch(batch).embedding self.llp_state.big_m_tensor[metadata["example_id"]] = embedding self.propagate_labels(verbose=verbose)
def read_glove(cls, path, vocab_size=None, verbose=False): embeddings = {} with open(path, "r", encoding="utf-8") as f: for line in maybe_tqdm(f, total=vocab_size, verbose=verbose, desc="GloVe"): if vocab_size is not None and len(embeddings) == vocab_size: break word, vec = line.split(" ", 1) embeddings[word] = np.array(list(map(float, vec.split()))) return cls(embeddings)
def run_train_epoch_context(self, train_dataloader, train_global_state: TrainGlobalState, verbose=True): for batch, batch_metadata in maybe_tqdm(train_dataloader, desc="Training", verbose=verbose): self.run_train_step( batch=batch, train_global_state=train_global_state, ) yield batch, train_global_state train_global_state.step_epoch()
def run_test(self, test_examples, verbose=True): test_dataloader = self.get_eval_dataloader(test_examples) self.model.eval() all_logits = [] for step, (batch, batch_metadata) in enumerate( maybe_tqdm(test_dataloader, desc="Predictions (Test)", verbose=verbose)): batch = batch.to(self.device) with torch.no_grad(): logits = self.model.forward_batch(batch).logits logits = logits.detach().cpu().numpy() all_logits.append(logits) all_logits = np.concatenate(all_logits, axis=0) return all_logits
def convert_examples_to_dataset(examples, tokenizer, feat_spec, task, verbose=False): data_rows = [ example.tokenize(tokenizer).featurize(tokenizer, feat_spec) for example in maybe_tqdm(examples, desc="Tokenizing", verbose=verbose) ] full_batch = task.Batch.from_data_rows(data_rows) dataset_with_metadata = full_batch_to_dataset(full_batch) dataset_with_metadata.metadata["descriptors"].append( DataDescriptor("other_metadata", "example_id", None)) dataset_with_metadata.metadata["other"]["example_id"] = list( range(len(examples))) return dataset_with_metadata
def run_train_epoch_context(self, train_dataset_with_metadata, uda_task_data, train_global_state: TrainGlobalState, populate_after=True, verbose=True): self.model.train() sup_dataloader = self.get_sup_dataloader( train_dataset_with_metadata=train_dataset_with_metadata, do_override_labels=True, verbose=verbose, ) unsup_dataloaders = self.get_unsup_dataloaders( sup_dataloader=sup_dataloader, uda_task_data=uda_task_data, ) dataloader_triplet = self.form_dataloader_triplet( sup_dataloader=sup_dataloader, unsup_orig_loader=unsup_dataloaders.unsup_orig, unsup_aug_loader=unsup_dataloaders.unsup_aug, ) train_iterator = enumerate(maybe_tqdm(zip( dataloader_triplet.sup, dataloader_triplet.unsup_orig, dataloader_triplet.unsup_aug ), total=len(dataloader_triplet.sup), desc="Training", verbose=verbose)) for sup_batch_m, unsup_orig_batch_m, unsup_aug_batch_m in train_iterator: batch_m_triplet = uda_runner.TrainDataTriplet( sup=sup_batch_m.to(self.device), unsup_orig=unsup_orig_batch_m.to(self.device), unsup_aug=unsup_aug_batch_m.to(self.device), ) self.run_train_step( batch_m_triplet=batch_m_triplet, train_global_state=train_global_state, ) yield batch_m_triplet, train_global_state if populate_after: self.populate_llp_state( train_dataloader=sup_dataloader, verbose=verbose, ) self.log_writer.write_entry("populate_logs", combine_dicts([ llp_runner.populate_logs(llp_state=self.llp_state, llp_params=self.llp_params), { "epoch": train_global_state.epoch, }, ]))
def run_val(val_examples, val_dataloader, model, task, loss_criterion, device, local_rank, verbose): if not local_rank == -1: return model.eval() total_eval_loss = 0 nb_eval_steps, nb_eval_examples = 0, 0 all_logits = [] for step, (batch, batch_metadata) in enumerate( maybe_tqdm(val_dataloader, desc="Evaluating (Val)", verbose=verbose)): batch = batch.to(device) with torch.no_grad(): logits = forward_batch_delegate( model=model, batch=batch, omit_label_ids=True, task_type=task.TASK_TYPE, )[0] tmp_eval_loss = compute_loss_from_model_output( logits=logits, loss_criterion=loss_criterion, batch=batch, task_type=task.TASK_TYPE, ) logits = logits.detach().cpu().numpy() total_eval_loss += tmp_eval_loss.mean().item() nb_eval_examples += len(batch) nb_eval_steps += 1 all_logits.append(logits) eval_loss = total_eval_loss / nb_eval_steps all_logits = np.concatenate(all_logits, axis=0) return { "logits": all_logits, "loss": eval_loss, "metrics": evaluate.compute_task_metrics(task, all_logits, val_examples), }
def run_train_epoch_context(self, dataloader_duplet: TrainDataDuplet, train_global_state: TrainGlobalState, verbose=True): train_iterator = maybe_tqdm(zip( dataloader_duplet.sup, dataloader_duplet.unsup, ), desc="Training", verbose=verbose, total=len(dataloader_duplet.sup)) for sup_batch, unsup_batch in train_iterator: batch_duplet = TrainDataDuplet( sup=sup_batch, unsup=unsup_batch, ) self.run_train_step( batch_duplet=batch_duplet, train_global_state=train_global_state, ) yield batch_duplet, train_global_state train_global_state.step_epoch()
def run_train_epoch_context(self, dataloader_triplet, train_global_state: TrainGlobalState, verbose=True): train_iterator = maybe_tqdm(zip(dataloader_triplet.sup, dataloader_triplet.unsup_orig, dataloader_triplet.unsup_aug), desc="Training", verbose=verbose, total=len(dataloader_triplet.sup)) for sup_batch, unsup_orig_batch, unsup_aug_batch in train_iterator: batch_triplet = TrainDataTriplet( # batch, batch_metadata hack sup=sup_batch, unsup_orig=unsup_orig_batch, unsup_aug=unsup_aug_batch, ) self.run_train_step( batch_triplet=batch_triplet, train_global_state=train_global_state, ) yield batch_triplet, train_global_state train_global_state.step_epoch()