Beispiel #1
0
    def evaluate_gridworld(self,
                           environment,
                           evaluator,
                           trainer,
                           use_gpu,
                           one_hot_action=True):
        pre_training_loss = None
        if self.run_pre_training_eval:
            predictor = self.get_predictor(trainer, environment)

            evaluator.evaluate(predictor)
            print("Pre-Training eval: ", evaluator.mc_loss[-1])
            pre_training_loss = evaluator.mc_loss[-1]

        for _ in range(self.num_epochs):
            samples = environment.generate_samples(10240, 1.0, DISCOUNT)

            if trainer.rl_parameters.reward_boost is not None:
                # Reverse any reward boost
                rewards_update = []
                for action, reward in zip(samples.actions, samples.rewards):
                    rewards_update.append(
                        reward -
                        trainer.rl_parameters.reward_boost.get(action, 0.0))
                samples = samples._replace(rewards=rewards_update)

            tdps = environment.preprocess_samples(
                samples,
                self.minibatch_size,
                use_gpu=use_gpu,
                one_hot_action=one_hot_action,
            )

            for tdp in tdps:
                trainer.train(tdp)

        predictor = self.get_predictor(trainer, environment)
        evaluator.evaluate(predictor)
        print("Post-Training eval: ", evaluator.mc_loss[-1])
        if self.check_tolerance:
            self.assertLess(evaluator.mc_loss[-1], self.tolerance_threshold)
        if self.run_pre_training_eval:
            self.assertGreater(pre_training_loss, evaluator.mc_loss[-1])

        if self.test_save_load:
            with tempfile.TemporaryDirectory() as tmpdirname:
                tmp_path = os.path.join(tmpdirname, "model")

                serving_module_bytes = export_module_to_buffer(
                    predictor.model).getvalue()
                logger.info(
                    "Saving TorchScript predictor to {}".format(tmp_path))
                with open(tmp_path, "wb") as output_fp:
                    output_fp.write(serving_module_bytes)
                new_predictor = type(predictor)(torch.jit.load(tmp_path))
                evaluator.evaluate(new_predictor)
                print("Post-ONNX eval: ", evaluator.mc_loss[-1])
                if self.check_tolerance:
                    self.assertLess(evaluator.mc_loss[-1],
                                    self.tolerance_threshold)
Beispiel #2
0
    def test_predictor_torch_export(self):
        """Verify that q-values before model export equal q-values after
        model export. Meant to catch issues with export logic."""
        environment = Gridworld()
        samples = Samples(
            mdp_ids=["0"],
            sequence_numbers=[0],
            sequence_number_ordinals=[1],
            states=[{0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0, 5: 1.0, 15: 1.0, 24: 1.0}],
            actions=["D"],
            action_probabilities=[0.5],
            rewards=[0],
            possible_actions=[["R", "D"]],
            next_states=[{5: 1.0}],
            next_actions=["U"],
            terminals=[False],
            possible_next_actions=[["R", "U", "D"]],
        )
        tdps = environment.preprocess_samples(samples, 1)
        assert len(tdps) == 1, "Invalid number of data pages"

        trainer, exporter = self.get_modular_sarsa_trainer_exporter(
            environment, {}, False
        )
        input = rlt.PreprocessedState.from_tensor(tdps[0].states)

        pre_export_q_values = trainer.q_network(input).q_values.detach().numpy()

        preprocessor = Preprocessor(environment.normalization, False)
        cpu_q_network = trainer.q_network.cpu_model()
        cpu_q_network.eval()
        dqn_with_preprocessor = DiscreteDqnWithPreprocessor(cpu_q_network, preprocessor)
        serving_module = DiscreteDqnPredictorWrapper(
            dqn_with_preprocessor, action_names=environment.ACTIONS
        )

        with tempfile.TemporaryDirectory() as tmpdirname:
            buf = export_module_to_buffer(serving_module)
            tmp_path = os.path.join(tmpdirname, "model")
            with open(tmp_path, "wb") as f:
                f.write(buf.getvalue())
                f.close()
                predictor = DiscreteDqnTorchPredictor(torch.jit.load(tmp_path))

        post_export_q_values = predictor.predict([samples.states[0]])

        for i, action in enumerate(environment.ACTIONS):
            self.assertAlmostEqual(
                float(pre_export_q_values[0][i]),
                float(post_export_q_values[0][action]),
                places=4,
            )
Beispiel #3
0
 def save_torchscript_model(self, module: torch.nn.Module, path: str):
     logger.info("Saving TorchScript predictor to {}".format(path))
     serving_module_bytes = export_module_to_buffer(module).getvalue()
     with open(path, "wb") as output_fp:
         output_fp.write(serving_module_bytes)