예제 #1
0
 def _test_dqn_workflow(self, use_gpu=False, use_all_avail_gpus=False):
     """Run DQN workflow to ensure no crashes, algorithm correctness
     not tested here."""
     with tempfile.TemporaryDirectory() as tmpdirname:
         lockfile = os.path.join(tmpdirname, "multiprocess_lock")
         Path(lockfile).touch()
         params = {
             "training_data_path":
             os.path.join(
                 curr_dir,
                 "test_data/discrete_action/cartpole_training.json.bz2"),
             "eval_data_path":
             os.path.join(
                 curr_dir,
                 "test_data/discrete_action/cartpole_eval.json.bz2"),
             "state_norm_data_path":
             os.path.join(curr_dir,
                          "test_data/discrete_action/cartpole_norm.json"),
             "model_output_path":
             tmpdirname,
             "use_gpu":
             use_gpu,
             "use_all_avail_gpus":
             use_all_avail_gpus,
             "init_method":
             "file://" + lockfile,
             "num_nodes":
             1,
             "node_index":
             0,
             "actions": ["0", "1"],
             "epochs":
             1,
             "rl": {},
             "rainbow": {
                 "double_q_learning": False,
                 "dueling_architecture": False
             },
             "training": {
                 "minibatch_size": 128
             },
         }
         dqn_workflow.main(params)
         predictor_files = glob.glob(tmpdirname + "/model_*.torchscript")
         assert len(
             predictor_files) == 1, "Somehow created two predictor files!"
         predictor = DiscreteDqnTorchPredictor(
             torch.jit.load(predictor_files[0]))
         test_float_state_features = [{
             "0": 1.0,
             "1": 1.0,
             "2": 1.0,
             "3": 1.0
         }]
         q_values = predictor.predict(test_float_state_features)
         assert len(q_values[0].keys()) == 2
예제 #2
0
    def test_predictor_torch_export(self):
        """Verify that q-values before model export equal q-values after
        model export. Meant to catch issues with export logic."""
        environment = Gridworld()
        samples = Samples(
            mdp_ids=["0"],
            sequence_numbers=[0],
            sequence_number_ordinals=[1],
            states=[{0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0, 5: 1.0, 15: 1.0, 24: 1.0}],
            actions=["D"],
            action_probabilities=[0.5],
            rewards=[0],
            possible_actions=[["R", "D"]],
            next_states=[{5: 1.0}],
            next_actions=["U"],
            terminals=[False],
            possible_next_actions=[["R", "U", "D"]],
        )
        tdps = environment.preprocess_samples(samples, 1)
        assert len(tdps) == 1, "Invalid number of data pages"

        trainer, exporter = self.get_modular_sarsa_trainer_exporter(
            environment, {}, False
        )
        input = rlt.PreprocessedState.from_tensor(tdps[0].states)

        pre_export_q_values = trainer.q_network(input).q_values.detach().numpy()

        preprocessor = Preprocessor(environment.normalization, False)
        cpu_q_network = trainer.q_network.cpu_model()
        cpu_q_network.eval()
        dqn_with_preprocessor = DiscreteDqnWithPreprocessor(cpu_q_network, preprocessor)
        serving_module = DiscreteDqnPredictorWrapper(
            dqn_with_preprocessor, action_names=environment.ACTIONS
        )

        with tempfile.TemporaryDirectory() as tmpdirname:
            buf = export_module_to_buffer(serving_module)
            tmp_path = os.path.join(tmpdirname, "model")
            with open(tmp_path, "wb") as f:
                f.write(buf.getvalue())
                f.close()
                predictor = DiscreteDqnTorchPredictor(torch.jit.load(tmp_path))

        post_export_q_values = predictor.predict([samples.states[0]])

        for i, action in enumerate(environment.ACTIONS):
            self.assertAlmostEqual(
                float(pre_export_q_values[0][i]),
                float(post_export_q_values[0][action]),
                places=4,
            )
예제 #3
0
def main(model_path, temperature):
    model_path = glob.glob(model_path)[0]
    predictor = DiscreteDqnTorchPredictor(torch.jit.load(model_path))
    predictor.softmax_temperature = temperature

    env = OpenAIGymEnvironment(gymenv=ENV)

    avg_rewards, avg_discounted_rewards = env.run_ep_n_times(AVG_OVER_NUM_EPS,
                                                             predictor,
                                                             test=True)

    logger.info(
        "Achieved an average reward score of {} over {} evaluations.".format(
            avg_rewards, AVG_OVER_NUM_EPS))
예제 #4
0
 def get_predictor(self, trainer, environment):
     state_preprocessor = Preprocessor(environment.normalization, False)
     q_network = trainer.q_network
     dqn_with_preprocessor = DiscreteDqnWithPreprocessor(
         q_network.cpu_model().eval(), state_preprocessor)
     serving_module = DiscreteDqnPredictorWrapper(
         dqn_with_preprocessor=dqn_with_preprocessor,
         action_names=environment.ACTIONS,
     )
     predictor = DiscreteDqnTorchPredictor(serving_module)
     return predictor