Esempio n. 1
0
 def _test_parametric_dqn_workflow(self,
                                   use_gpu=False,
                                   use_all_avail_gpus=False):
     """Run Parametric DQN workflow to ensure no crashes, algorithm correctness
     not tested here."""
     with tempfile.TemporaryDirectory() as tmpdirname:
         lockfile = os.path.join(tmpdirname, "multiprocess_lock")
         Path(lockfile).touch()
         params = {
             "training_data_path":
             os.path.join(
                 curr_dir,
                 "test_data/parametric_action/cartpole_training.json.bz2"),
             "eval_data_path":
             os.path.join(
                 curr_dir,
                 "test_data/parametric_action/cartpole_eval.json.bz2"),
             "state_norm_data_path":
             os.path.join(
                 curr_dir,
                 "test_data/parametric_action/state_features_norm.json"),
             "action_norm_data_path":
             os.path.join(curr_dir,
                          "test_data/parametric_action/action_norm.json"),
             "model_output_path":
             tmpdirname,
             "use_gpu":
             use_gpu,
             "use_all_avail_gpus":
             use_all_avail_gpus,
             "init_method":
             "file://" + lockfile,
             "num_nodes":
             1,
             "node_index":
             0,
             "epochs":
             1,
             "rl": {},
             "rainbow": {},
             "training": {
                 "minibatch_size": 128
             },
         }
         parametric_dqn_workflow.main(params)
         predictor_files = glob.glob(tmpdirname + "/predictor_*.c2")
         assert len(
             predictor_files) == 1, "Somehow created two predictor files!"
         predictor = ParametricDQNPredictor.load(predictor_files[0],
                                                 "minidb")
         test_float_state_features = [{
             "0": 1.0,
             "1": 1.0,
             "2": 1.0,
             "3": 1.0
         }]
         test_action_features = [{"4": 0.0, "5": 1.0}]
         q_values = predictor.predict(test_float_state_features,
                                      test_action_features)
     assert len(q_values[0].keys()) == 1
 def predictor(self) -> ParametricDQNPredictor:
     """Builds a ParametricDQNPredictor."""
     return ParametricDQNPredictor.export(
         self,
         self.state_normalization_parameters,
         self.action_normalization_parameters,
         self.use_gpu,
     )
Esempio n. 3
0
    def critic_prediction(self, float_state_features, actions):
        """ Critic Prediction - Returns values for each state/action pair. Accepts

        :param float_state_features states as list of feature -> float value dict
        :param actions actions as list of feature -> value dict
        """
        return ParametricDQNPredictor.predict(self, float_state_features,
                                              actions)
Esempio n. 4
0
 def predictor(self) -> ParametricDQNPredictor:
     """Builds a ParametricDQNPredictor."""
     return ParametricDQNPredictor.export(
         self,
         self.state_normalization_parameters,
         self.action_normalization_parameters,
         self._additional_feature_types.int_features,
         self.use_gpu,
     )
Esempio n. 5
0
 def predictor(self) -> ParametricDQNPredictor:
     """Builds a ParametricDQNPredictor."""
     return ParametricDQNPredictor.export(
         self,
         self.state_normalization_parameters,
         self.action_normalization_parameters,
         self._additional_feature_types.int_features,
         self.use_gpu,
     )
Esempio n. 6
0
    def critic_prediction(self, float_state_features, int_state_features, actions):
        """ Critic Prediction - Returns values for each state/action pair. Accepts
        int_features as 3rd optional parameter.

        :param float_state_features states as list of feature -> float value dict
        :param int_state_features states as list of feature -> int value dict
        :param actions actions as list of feature -> value dict
        """
        return ParametricDQNPredictor.predict(
            self, float_state_features, int_state_features, actions
        )
Esempio n. 7
0
 def export_critic(
     cls,
     trainer,
     state_normalization_parameters,
     action_normalization_parameters,
     model_on_gpu=False,
 ):
     return ParametricDQNPredictor.export(
         trainer,
         state_normalization_parameters,
         action_normalization_parameters,
         model_on_gpu=model_on_gpu,
         normalize_actions=False,
     )
Esempio n. 8
0
 def export_critic(
     cls,
     trainer,
     state_normalization_parameters,
     action_normalization_parameters,
     int_features=False,
     model_on_gpu=False,
 ):
     return ParametricDQNPredictor.export(
         trainer,
         state_normalization_parameters,
         action_normalization_parameters,
         int_features=int_features,
         model_on_gpu=model_on_gpu,
         normalize_actions=False,
     )