Ejemplo n.º 1
0
 def run_train_epoch_context(self,
                             train_dataset_with_metadata,
                             train_global_state: TrainGlobalState,
                             populate_after=True,
                             verbose=True):
     train_dataloader = self.get_train_dataloader(
         train_dataset_with_metadata=train_dataset_with_metadata,
         do_override_labels=True,
         verbose=verbose,
     )
     for batch, batch_metadata in maybe_tqdm(train_dataloader,
                                             desc="Training",
                                             verbose=verbose):
         self.run_train_step(
             batch=batch,
             batch_metadata=batch_metadata,
             train_global_state=train_global_state,
         )
         yield batch, train_global_state
     if populate_after:
         self.populate_llp_state(
             train_dataloader=train_dataloader,
             verbose=verbose,
         )
         self.log_writer.write_entry(
             "populate_logs",
             combine_dicts([
                 populate_logs(llp_state=self.llp_state,
                               llp_params=self.llp_params),
                 {
                     "epoch": train_global_state.epoch,
                 },
             ]))
Ejemplo n.º 2
0
    def run_val(self, val_examples, verbose=True):
        if not self.rparams.local_rank == -1:
            return
        self.model.eval()
        val_dataloader = self.get_eval_dataloader(val_examples)
        total_eval_loss = 0
        nb_eval_steps, nb_eval_examples = 0, 0
        all_logits = []
        for step, (batch, batch_metadata) in enumerate(
                maybe_tqdm(val_dataloader,
                           desc="Evaluating (Val)",
                           verbose=verbose)):
            batch = batch.to(self.device)

            with torch.no_grad():
                logits = self.model.forward_batch(batch).logits
                tmp_eval_loss = self.loss_criterion(logits, batch.label_ids)

            logits = logits.detach().cpu().numpy()
            total_eval_loss += tmp_eval_loss.mean().item()

            nb_eval_examples += len(batch)
            nb_eval_steps += 1
            all_logits.append(logits)
        eval_loss = total_eval_loss / nb_eval_steps
        all_logits = np.concatenate(all_logits, axis=0)

        return {
            "logits":
            all_logits,
            "loss":
            eval_loss,
            "metrics":
            evaluate.compute_task_metrics(self.task, all_logits, val_examples),
        }
Ejemplo n.º 3
0
    def run_test(self, test_examples, verbose=True):
        test_dataloader = self.get_dataloader(
            examples=test_examples,
            batch_size=self.rparams.eval_batch_size,
            shuffle=False,
            verbose=True,
        )
        self.model.eval()
        all_logits = []
        for step, batch in enumerate(
                maybe_tqdm(test_dataloader,
                           desc="Predictions (Test)",
                           verbose=verbose)):
            batch = batch.to(self.device)
            with torch.no_grad():
                logits = forward_batch_basic(
                    model=self.model,
                    batch=batch,
                    omit_label_ids=True,
                )[0]
            logits = logits.detach().cpu().numpy()
            all_logits.append(logits)

        all_logits = np.concatenate(all_logits, axis=0)
        return all_logits
Ejemplo n.º 4
0
 def populate_llp_state(self, train_dataloader, verbose=True):
     self.model.eval()
     with torch.no_grad():
         for batch, metadata in maybe_tqdm(train_dataloader,
                                           desc="Populating big_m",
                                           verbose=verbose):
             batch = batch.to(self.device)
             embedding = self.model.forward_batch(batch).embedding
             self.llp_state.big_m_tensor[metadata["example_id"]] = embedding
     self.propagate_labels(verbose=verbose)
Ejemplo n.º 5
0
 def read_glove(cls, path, vocab_size=None, verbose=False):
     embeddings = {}
     with open(path, "r", encoding="utf-8") as f:
         for line in maybe_tqdm(f,
                                total=vocab_size,
                                verbose=verbose,
                                desc="GloVe"):
             if vocab_size is not None and len(embeddings) == vocab_size:
                 break
             word, vec = line.split(" ", 1)
             embeddings[word] = np.array(list(map(float, vec.split())))
     return cls(embeddings)
Ejemplo n.º 6
0
 def run_train_epoch_context(self,
                             train_dataloader,
                             train_global_state: TrainGlobalState,
                             verbose=True):
     for batch, batch_metadata in maybe_tqdm(train_dataloader,
                                             desc="Training",
                                             verbose=verbose):
         self.run_train_step(
             batch=batch,
             train_global_state=train_global_state,
         )
         yield batch, train_global_state
     train_global_state.step_epoch()
Ejemplo n.º 7
0
    def run_test(self, test_examples, verbose=True):
        test_dataloader = self.get_eval_dataloader(test_examples)
        self.model.eval()
        all_logits = []
        for step, (batch, batch_metadata) in enumerate(
                maybe_tqdm(test_dataloader,
                           desc="Predictions (Test)",
                           verbose=verbose)):
            batch = batch.to(self.device)
            with torch.no_grad():
                logits = self.model.forward_batch(batch).logits
            logits = logits.detach().cpu().numpy()
            all_logits.append(logits)

        all_logits = np.concatenate(all_logits, axis=0)
        return all_logits
Ejemplo n.º 8
0
def convert_examples_to_dataset(examples,
                                tokenizer,
                                feat_spec,
                                task,
                                verbose=False):
    data_rows = [
        example.tokenize(tokenizer).featurize(tokenizer, feat_spec)
        for example in maybe_tqdm(examples, desc="Tokenizing", verbose=verbose)
    ]
    full_batch = task.Batch.from_data_rows(data_rows)
    dataset_with_metadata = full_batch_to_dataset(full_batch)
    dataset_with_metadata.metadata["descriptors"].append(
        DataDescriptor("other_metadata", "example_id", None))
    dataset_with_metadata.metadata["other"]["example_id"] = list(
        range(len(examples)))
    return dataset_with_metadata
Ejemplo n.º 9
0
 def run_train_epoch_context(self, train_dataset_with_metadata, uda_task_data,
                             train_global_state: TrainGlobalState,
                             populate_after=True, verbose=True):
     self.model.train()
     sup_dataloader = self.get_sup_dataloader(
         train_dataset_with_metadata=train_dataset_with_metadata,
         do_override_labels=True, verbose=verbose,
     )
     unsup_dataloaders = self.get_unsup_dataloaders(
         sup_dataloader=sup_dataloader,
         uda_task_data=uda_task_data,
     )
     dataloader_triplet = self.form_dataloader_triplet(
         sup_dataloader=sup_dataloader,
         unsup_orig_loader=unsup_dataloaders.unsup_orig,
         unsup_aug_loader=unsup_dataloaders.unsup_aug,
     )
     train_iterator = enumerate(maybe_tqdm(zip(
         dataloader_triplet.sup,
         dataloader_triplet.unsup_orig,
         dataloader_triplet.unsup_aug
     ), total=len(dataloader_triplet.sup), desc="Training", verbose=verbose))
     for sup_batch_m, unsup_orig_batch_m, unsup_aug_batch_m in train_iterator:
         batch_m_triplet = uda_runner.TrainDataTriplet(
             sup=sup_batch_m.to(self.device),
             unsup_orig=unsup_orig_batch_m.to(self.device),
             unsup_aug=unsup_aug_batch_m.to(self.device),
         )
         self.run_train_step(
             batch_m_triplet=batch_m_triplet,
             train_global_state=train_global_state,
         )
         yield batch_m_triplet, train_global_state
     if populate_after:
         self.populate_llp_state(
             train_dataloader=sup_dataloader,
             verbose=verbose,
         )
         self.log_writer.write_entry("populate_logs", combine_dicts([
             llp_runner.populate_logs(llp_state=self.llp_state, llp_params=self.llp_params),
             {
                 "epoch": train_global_state.epoch,
             },
         ]))
Ejemplo n.º 10
0
def run_val(val_examples, val_dataloader, model, task, loss_criterion, device,
            local_rank, verbose):
    if not local_rank == -1:
        return
    model.eval()
    total_eval_loss = 0
    nb_eval_steps, nb_eval_examples = 0, 0
    all_logits = []
    for step, (batch, batch_metadata) in enumerate(
            maybe_tqdm(val_dataloader,
                       desc="Evaluating (Val)",
                       verbose=verbose)):
        batch = batch.to(device)

        with torch.no_grad():
            logits = forward_batch_delegate(
                model=model,
                batch=batch,
                omit_label_ids=True,
                task_type=task.TASK_TYPE,
            )[0]
            tmp_eval_loss = compute_loss_from_model_output(
                logits=logits,
                loss_criterion=loss_criterion,
                batch=batch,
                task_type=task.TASK_TYPE,
            )

        logits = logits.detach().cpu().numpy()
        total_eval_loss += tmp_eval_loss.mean().item()

        nb_eval_examples += len(batch)
        nb_eval_steps += 1
        all_logits.append(logits)
    eval_loss = total_eval_loss / nb_eval_steps
    all_logits = np.concatenate(all_logits, axis=0)

    return {
        "logits": all_logits,
        "loss": eval_loss,
        "metrics": evaluate.compute_task_metrics(task, all_logits,
                                                 val_examples),
    }
Ejemplo n.º 11
0
    def run_train_epoch_context(self,
                                dataloader_duplet: TrainDataDuplet,
                                train_global_state: TrainGlobalState, verbose=True):
        train_iterator = maybe_tqdm(zip(
            dataloader_duplet.sup,
            dataloader_duplet.unsup,
        ), desc="Training", verbose=verbose, total=len(dataloader_duplet.sup))

        for sup_batch, unsup_batch in train_iterator:
            batch_duplet = TrainDataDuplet(
                sup=sup_batch,
                unsup=unsup_batch,
            )
            self.run_train_step(
                batch_duplet=batch_duplet,
                train_global_state=train_global_state,
            )
            yield batch_duplet, train_global_state
        train_global_state.step_epoch()
Ejemplo n.º 12
0
 def run_train_epoch_context(self,
                             dataloader_triplet,
                             train_global_state: TrainGlobalState,
                             verbose=True):
     train_iterator = maybe_tqdm(zip(dataloader_triplet.sup,
                                     dataloader_triplet.unsup_orig,
                                     dataloader_triplet.unsup_aug),
                                 desc="Training",
                                 verbose=verbose,
                                 total=len(dataloader_triplet.sup))
     for sup_batch, unsup_orig_batch, unsup_aug_batch in train_iterator:
         batch_triplet = TrainDataTriplet(
             # batch, batch_metadata hack
             sup=sup_batch,
             unsup_orig=unsup_orig_batch,
             unsup_aug=unsup_aug_batch,
         )
         self.run_train_step(
             batch_triplet=batch_triplet,
             train_global_state=train_global_state,
         )
         yield batch_triplet, train_global_state
     train_global_state.step_epoch()