def validation_step(self, batch, batch_idx):
     if isinstance(batch, dict):
         batch = rlt.DiscreteDqnInput.from_dict(batch)
     # HACK: Move to cpu in order to hold more batches in memory
     # This is only needed when trainers need in-memory
     # EvaluationDataPages of the full evaluation dataset
     return EvaluationDataPage.create_from_training_batch(batch, self).cpu()
 def gather_eval_data(self, test_step_outputs):
     eval_data = None
     for batch in test_step_outputs:
         edp = EvaluationDataPage.create_from_training_batch(batch, self)
         if eval_data is None:
             eval_data = edp
         else:
             eval_data = eval_data.append(edp)
     if eval_data.mdp_id is not None:
         eval_data = eval_data.sort()
         eval_data = eval_data.compute_values(self.gamma)
         eval_data.validate()
     return eval_data
Exemple #3
0
 def handle(self, tdp: PreprocessedTrainingBatch) -> None:
     if not self.trainer.calc_cpe_in_training:
         return
     # TODO: Perhaps we can make an RLTrainer param to check if continuous?
     if isinstance(self.trainer, (SACTrainer, TD3Trainer)):
         # TODO: Implement CPE for continuous algos
         edp = None
     else:
         edp = EvaluationDataPage.create_from_training_batch(
             tdp, self.trainer)
     if self.evaluation_data is None:
         self.evaluation_data = edp
     else:
         self.evaluation_data = self.evaluation_data.append(edp)
 def gather_eval_data(self, validation_step_outputs):
     was_on_gpu = self.on_gpu
     self.cpu()
     eval_data = None
     for batch in validation_step_outputs:
         edp = EvaluationDataPage.create_from_training_batch(batch, self)
         if eval_data is None:
             eval_data = edp
         else:
             eval_data = eval_data.append(edp)
     if eval_data and eval_data.mdp_id is not None:
         eval_data = eval_data.sort()
         eval_data = eval_data.compute_values(self.gamma)
         eval_data.validate()
     if was_on_gpu:
         self.cuda()
     return eval_data
Exemple #5
0
def gather_eval_data(
    trainer: RLTrainer,
    eval_dataset: Dataset,
    batch_preprocessor: BatchPreprocessor,
    use_gpu: bool,
    reader_options: ReaderOptions,
) -> EvaluationDataPage:
    """ Sorts, computes logged values and validates the EvaluationDataPage """
    if isinstance(trainer, (SACTrainer, TD3Trainer)):
        raise NotImplementedError("TODO: Implement CPE for continuous algos")
    assert (trainer.calc_cpe_in_training
            ), "this function should only be called when this is true."

    # first read the eval_dataset as EvaluationDataPages
    device = "cuda" if use_gpu else "cpu"
    eval_data = None
    with make_batch_reader(
            # pyre-fixme[16]: `HiveDataSetClass` has no attribute `parquet_url`.
            # pyre-fixme[16]: `HiveDataSetClass` has no attribute `parquet_url`.
            eval_dataset.parquet_url,
            num_epochs=1,
            # pyre-fixme[16]: `ReaderOptions` has no attribute `petastorm_reader_pool_type`.
            # pyre-fixme[16]: `ReaderOptions` has no attribute `petastorm_reader_pool_type`.
            reader_pool_type=reader_options.petastorm_reader_pool_type,
    ) as reader:
        for batch in reader:
            assert rlt.isinstance_namedtuple(batch)
            tensor_batch = dict_to_tensor(batch._asdict(), device=device)
            tdp: rlt.PreprocessedTrainingBatch = batch_preprocessor(
                tensor_batch)
            edp = EvaluationDataPage.create_from_training_batch(tdp, trainer)
            if eval_data is None:
                eval_data = edp
            else:
                eval_data = eval_data.append(edp)

    eval_data = eval_data.sort()
    eval_data = eval_data.compute_values(trainer.gamma)
    eval_data.validate()
    return eval_data
Exemple #6
0
 def validation_step(self, batch, batch_idx):
     # HACK: Move to cpu in order to hold more batches in memory
     # This is only needed when trainers need in-memory
     # EvaluationDataPages of the full evaluation dataset
     return EvaluationDataPage.create_from_training_batch(batch, self).cpu()