Beispiel #1
0
 def handle(self) -> None:
     # Load archive
     archive = load_archive(Path(self.argument("archive")))
     vocab = archive.model.vocab
     if self.option("log-model-info"):
         # Log model config
         logger.info("Config: {}".format(
             json.dumps(archive.config.as_flat_dict(),
                        indent=2,
                        ensure_ascii=False)))
         # Log model metrics
         log_metrics("Trained model", archive.metrics)
     # Parse options
     num_samples = int(self.option("num-samples"))
     items = self.parse_items()
     # Prepare data for the model
     dataset_reader_params = archive.config.get("dataset_reader")
     dataset_reader_params["sample_masking"] = False
     dataset_reader = DatasetReader.from_params(**dataset_reader_params)
     collate_batch = CollateBatch.by_name(dataset_reader_params.get("type"))
     input_dict = collate_batch(
         Batch([
             vocab.encode(dataset_reader.item_to_instance(item))
             for item in items
         ])).as_dict()
     # Set posterior samples
     archive.model.set_samples(samples=1)
     # Run it
     output_dict = archive.model.interpolate(input_dict["src_tokens"],
                                             samples=num_samples,
                                             random=self.option("random"))
     # Make it readable
     samples = archive.model.make_output_human_readable(output_dict)
     print(samples)
Beispiel #2
0
 def _train_batch(self, batch: CollateBatch) -> Dict[str, Any]:
     batch: Dict[str, torch.Tensor] = batch.to_device(
         device=self._cuda_device, non_blocking=True
     )
     output_dict = self._pytorch_model(**batch).pop("loss_info")
     loss = output_dict.get("batch-loss")
     loss.backward()
     # Gradient Clipping
     if self._grad_norm is not None:
         clip_grad_norm_(self._model.parameters(), self._grad_norm)
     if self._grad_clip is not None:
         clip_grad_value_(self._model.parameters(), self._grad_clip)
     # Update step
     self._perform_one_step()
     metrics = self._model.get_metrics()
     # Add metrics from output dict
     metrics.update(
         {k: v.item() if isinstance(v, torch.Tensor) else v for k, v in output_dict.items()}
     )
     # Add Learning rate
     if self._encoder_scheduler is not None:
         metrics["encoder_lr"] = self._encoder_scheduler.get_current_lr()[0]
         metrics["decoder_lr"] = self._decoder_scheduler.get_current_lr()[0]
     else:
         metrics["lr"] = self._scheduler.get_current_lr()[0]
     return metrics
Beispiel #3
0
 def _validate_batch(self, batch: CollateBatch) -> Dict[str, Any]:
     batch: Dict[str,
                 torch.Tensor] = batch.to_device(device=self._cuda_device,
                                                 non_blocking=True)
     output_dict = self._pytorch_model(**batch).pop("loss_info")
     metrics = self._model.get_metrics()
     # Add metrics from output dict
     metrics.update({
         k: v.item() if isinstance(v, torch.Tensor) else v
         for k, v in output_dict.items()
     })
     # Add Learning rate
     metrics["encoder-lr"] = self._encoder_scheduler.get_current_lr()[0]
     metrics["decoder-lr"] = self._decoder_scheduler.get_current_lr()[0]
     return metrics
Beispiel #4
0
 def _train_batch(self, batch: CollateBatch) -> Dict[str, Any]:
     break_aggressive = not self._aggressive
     # Zero gradients there
     self._encoder_optimizer.zero_grad()
     self._decoder_optimizer.zero_grad()
     # Construct batch
     batch: Dict[str,
                 torch.Tensor] = batch.to_device(device=self._cuda_device,
                                                 non_blocking=True)
     output_dict = self._pytorch_model(**batch).pop("loss_info")
     loss = output_dict.pop("batch-loss")
     loss.backward()
     # Gradient Clipping
     if self._grad_norm is not None:
         clip_grad_norm_(self._model.parameters(), self._grad_norm)
     # Start aggressive training
     if self._aggressive and self._step < self._max_aggressive_iters:
         self._burn_cur_loss += loss.item()
         # Encoder step
         self._encoder_scheduler.step()
         self._encoder_optimizer.step()
         # In every 15 steps check if finish aggressive training
         if self._step % 15 == 0:
             if self._burn_pre_loss - self._burn_cur_loss < 0:
                 break_aggressive = True
             self._burn_pre_loss = self._burn_cur_loss
             self._burn_cur_loss = 0
         self._step += 1
     if break_aggressive:
         # Encoder step if not agressive training
         if not self._aggressive:
             self._encoder_scheduler.step()
             self._encoder_optimizer.step()
         self._decoder_scheduler.step()
         self._decoder_optimizer.step()
     metrics = self._model.get_metrics()
     metrics["batch-loss"] = loss.item()
     # Add metrics from output dict
     metrics.update({
         k: v.item() if isinstance(v, torch.Tensor) else v
         for k, v in output_dict.items()
     })
     # Add Learning rate
     metrics["encoder-lr"] = self._encoder_scheduler.get_current_lr()[0]
     metrics["decoder-lr"] = self._decoder_scheduler.get_current_lr()[0]
     return metrics
Beispiel #5
0
def train_worker(process_rank: int,
                 config: Params,
                 world_size: int = 1) -> None:
    is_distributed = world_size > 1
    # Construct Datasets
    # TODO: Move Vocabulary creation before process spawn
    dataset_type = config["dataset_reader"]["type"]
    dataset_reader = DatasetReader.from_params(**config.pop("dataset_reader"))
    train_dataset = dataset_reader.read(config["train_data_path"])
    valid_dataset = dataset_reader.read(config["valid_data_path"])
    # Construct Vocabulary
    vocab_path = os.path.join(config["serialization_dir"], "vocabulary")
    if not os.path.exists(vocab_path):
        logger.debug(f"No Vocabulary found at path: {vocab_path}. "
                     f"Then we would construct it from datasets.")
        vocab = Vocabulary(
            datasets={
                "train": train_dataset,
                "valid": valid_dataset
            },
            namespaces={
                "tokens": Namespace(processing_type="padding_oov"),
                "target": Namespace(processing_type="padding_oov"),
            },
            dependent_namespaces=[["tokens", "target"]],
        )
        # Save only on master
        if process_rank == 0:
            vocab.save(
                path=os.path.join(config["serialization_dir"], "vocabulary"))
    else:
        logger.debug(f"Found Vocabulary at path: {vocab_path}, loading it.")
        vocab = Vocabulary.from_files(vocab_path)
    train_dataset.encode_with(vocab)
    valid_dataset.encode_with(vocab)
    logger.debug(f"Seeding with seed: {config['seed']}")
    seed_everything(config["seed"])
    # Construct Iterators
    logger.debug("Construct DataIterators.")
    train_dataloader = DataIterator(
        train_dataset,
        collate_fn=CollateBatch.by_name(dataset_type),
        **config["data_loader"],
    )
    valid_dataloader = DataIterator(
        valid_dataset,
        collate_fn=CollateBatch.by_name(dataset_type),
        **config["data_loader"],
    )
    # Construct modules
    logger.debug("Instantiating Modules from config.")
    device = util.int_to_device(config["cuda_devices"][process_rank])
    model_params = config.pop("model")
    # Load model from the archive for finetune
    model_params = VAELmModel.prepare_with_iterator(model_params,
                                                    iterator=train_dataloader)
    if "from_archive" in model_params:
        archive = load_archive(Path(model_params.get("from_archive")))
        model = archive.model.to(device)
    else:
        model = VAELmModel.from_params(vocab=vocab, **model_params).to(device)
    # Instantiate Trainer
    logger.debug("Instantiating Trainer.")
    trainer = Trainer.from_params(
        model=model,
        distributed=device != torch.device("cpu") and world_size != 1,
        cuda_device=device,
        local_rank=process_rank,
        world_size=world_size,
        serialization_dir=config["serialization_dir"],
        **config.pop("trainer"),
    )
    # Let setup get ready for all workers.
    if is_distributed:
        dist.barrier()
    # Run training
    logger.debug("Run Trainer.")
    result = trainer.train(
        train_dataloader=train_dataloader,
        validation_dataloader=valid_dataloader,
    )
    if config.get("evaluate_on_test", False):
        logger.info("Evaluating on test.")
        test_data_path = config.get("test_data_path")
        if not test_data_path:
            logger.error(
                "You set evaluate_on_test=True but didn't pass test_data_path to evaluate on."
            )
            return
        test_dataset = dataset_reader.read(config["test_data_path"])
        test_dataset.encode_with(vocab)
        test_dataloader = DataIterator(
            test_dataset,
            collate_fn=CollateBatch.by_name(dataset_type),
            shuffle=False,
            **config["data_loader"],
        )
        # Wait for all processes to get ready to start evaluation.
        if is_distributed:
            dist.barrier()
        result = trainer.evaluate(test_dataloader, desc="Testing")
    logger.success("Finished!!!")
    return result
Beispiel #6
0
 def _train_batch(self, batch: CollateBatch,
                  sampler: Callable) -> Dict[str, Any]:
     aggressive_steps = 1
     burn_pre_loss = 1e4
     burn_cur_loss = 0
     # Aggressive steps only if we use KL in Loss and number of steps is less then threshold
     while (self._aggressive
            and aggressive_steps < self._max_aggressive_iters
            and self._model.is_kl_used):
         sample = sampler()
         sample: Dict[str, torch.Tensor] = sample.to_device(
             device=self._cuda_device, non_blocking=True)
         output_dict = self._pytorch_model(manual_kl_step=True,
                                           **sample).pop("loss_info")
         loss = output_dict.get("batch-loss")
         burn_cur_loss += loss.item()
         loss.backward()
         # Gradient Clipping
         if self._grad_norm is not None:
             clip_grad_norm_(self._model.parameters(), self._grad_norm)
         if self._grad_clip is not None:
             clip_grad_value_(self._model.parameters(), self._grad_clip)
         # Update only encoder
         self._encoder_optimizer.step()
         self._encoder_optimizer.zero_grad()
         # In each 15 steps check accumulated loss
         if aggressive_steps % 15 == 0:
             if burn_pre_loss - burn_cur_loss < 0:
                 break
             burn_pre_loss, burn_cur_loss = burn_cur_loss, 0
         aggressive_steps += 1
     # Step on batch
     batch: Dict[str,
                 torch.Tensor] = batch.to_device(device=self._cuda_device,
                                                 non_blocking=True)
     output_dict = self._pytorch_model(manual_kl_step=True,
                                       **batch).pop("loss_info")
     loss = output_dict.get("batch-loss")
     loss.backward()
     # Gradient Clipping
     if self._grad_norm is not None:
         clip_grad_norm_(self._model.parameters(), self._grad_norm)
     if self._grad_clip is not None:
         clip_grad_value_(self._model.parameters(), self._grad_clip)
     # Update encoder if we aggressive training stopped and we don't use KL
     if not self._aggressive or not self._model.is_kl_used:
         self._encoder_scheduler.step()
         self._encoder_optimizer.step()
         self._encoder_optimizer.zero_grad()
     else:
         # Update scheduler here then learning rate would not be very small
         self._encoder_scheduler.step()
     # Update only decoder
     self._decoder_scheduler.step()
     self._decoder_optimizer.step()
     self._decoder_optimizer.zero_grad()
     # Perform manual KL Scheduler step
     self._model.kl_scheduler_step()
     # Get metrics for tqdm
     metrics = self._model.get_metrics()
     metrics["batch-aggressive-steps"] = aggressive_steps
     # Add metrics from output dict
     metrics.update({
         k: v.item() if isinstance(v, torch.Tensor) else v
         for k, v in output_dict.items()
     })
     # Add Learning rate
     metrics["encoder-lr"] = self._encoder_scheduler.get_current_lr()[0]
     metrics["decoder-lr"] = self._decoder_scheduler.get_current_lr()[0]
     return metrics