def validation_step(self, batch, batch_idx): if isinstance(batch, dict): batch = rlt.DiscreteDqnInput.from_dict(batch) # HACK: Move to cpu in order to hold more batches in memory # This is only needed when trainers need in-memory # EvaluationDataPages of the full evaluation dataset return EvaluationDataPage.create_from_training_batch(batch, self).cpu()
def gather_eval_data(self, test_step_outputs): eval_data = None for batch in test_step_outputs: edp = EvaluationDataPage.create_from_training_batch(batch, self) if eval_data is None: eval_data = edp else: eval_data = eval_data.append(edp) if eval_data.mdp_id is not None: eval_data = eval_data.sort() eval_data = eval_data.compute_values(self.gamma) eval_data.validate() return eval_data
def handle(self, tdp: PreprocessedTrainingBatch) -> None: if not self.trainer.calc_cpe_in_training: return # TODO: Perhaps we can make an RLTrainer param to check if continuous? if isinstance(self.trainer, (SACTrainer, TD3Trainer)): # TODO: Implement CPE for continuous algos edp = None else: edp = EvaluationDataPage.create_from_training_batch( tdp, self.trainer) if self.evaluation_data is None: self.evaluation_data = edp else: self.evaluation_data = self.evaluation_data.append(edp)
def gather_eval_data(self, validation_step_outputs): was_on_gpu = self.on_gpu self.cpu() eval_data = None for batch in validation_step_outputs: edp = EvaluationDataPage.create_from_training_batch(batch, self) if eval_data is None: eval_data = edp else: eval_data = eval_data.append(edp) if eval_data and eval_data.mdp_id is not None: eval_data = eval_data.sort() eval_data = eval_data.compute_values(self.gamma) eval_data.validate() if was_on_gpu: self.cuda() return eval_data
def gather_eval_data( trainer: RLTrainer, eval_dataset: Dataset, batch_preprocessor: BatchPreprocessor, use_gpu: bool, reader_options: ReaderOptions, ) -> EvaluationDataPage: """ Sorts, computes logged values and validates the EvaluationDataPage """ if isinstance(trainer, (SACTrainer, TD3Trainer)): raise NotImplementedError("TODO: Implement CPE for continuous algos") assert (trainer.calc_cpe_in_training ), "this function should only be called when this is true." # first read the eval_dataset as EvaluationDataPages device = "cuda" if use_gpu else "cpu" eval_data = None with make_batch_reader( # pyre-fixme[16]: `HiveDataSetClass` has no attribute `parquet_url`. # pyre-fixme[16]: `HiveDataSetClass` has no attribute `parquet_url`. eval_dataset.parquet_url, num_epochs=1, # pyre-fixme[16]: `ReaderOptions` has no attribute `petastorm_reader_pool_type`. # pyre-fixme[16]: `ReaderOptions` has no attribute `petastorm_reader_pool_type`. reader_pool_type=reader_options.petastorm_reader_pool_type, ) as reader: for batch in reader: assert rlt.isinstance_namedtuple(batch) tensor_batch = dict_to_tensor(batch._asdict(), device=device) tdp: rlt.PreprocessedTrainingBatch = batch_preprocessor( tensor_batch) edp = EvaluationDataPage.create_from_training_batch(tdp, trainer) if eval_data is None: eval_data = edp else: eval_data = eval_data.append(edp) eval_data = eval_data.sort() eval_data = eval_data.compute_values(trainer.gamma) eval_data.validate() return eval_data
def validation_step(self, batch, batch_idx): # HACK: Move to cpu in order to hold more batches in memory # This is only needed when trainers need in-memory # EvaluationDataPages of the full evaluation dataset return EvaluationDataPage.create_from_training_batch(batch, self).cpu()