def test_create_net_max_q_discrete_action(self):
     extractor = TrainingFeatureExtractor(
         state_normalization_parameters=self.get_state_normalization_parameters(),
         include_possible_actions=True,
         max_num_actions=2,
     )
     expected_input_record = schema.Struct(
         ("state_features", map_schema()),
         ("next_state_features", map_schema()),
         ("action", schema.Scalar()),
         ("next_action", schema.Scalar()),
         ("not_terminal", schema.Scalar()),
         ("possible_actions_mask", schema.List(schema.Scalar())),
         ("possible_next_actions_mask", schema.List(schema.Scalar())),
     )
     expected_output_record = schema.Struct(
         ("state_features", schema.Scalar()),
         ("next_state_features", schema.Scalar()),
         ("action", schema.Scalar()),
         ("next_action", schema.Scalar()),
         ("not_terminal", schema.Scalar()),
         ("possible_actions_mask", schema.Scalar()),
         ("possible_next_actions_mask", schema.Scalar()),
     )
     self.check_create_net_spec(
         extractor, expected_input_record, expected_output_record
     )
 def _test_create_net_sarsa_parametric_action(self, normalize):
     extractor = TrainingFeatureExtractor(
         state_normalization_parameters=self.get_state_normalization_parameters(),
         action_normalization_parameters=self.get_action_normalization_parameters(),
         include_possible_actions=False,
         normalize=normalize,
         max_num_actions=2,
     )
     expected_input_record = schema.Struct(
         ("state_features", map_schema()),
         ("next_state_features", map_schema()),
         ("action", map_schema()),
         ("next_action", map_schema()),
         ("not_terminal", schema.Scalar()),
     )
     expected_output_record = schema.Struct(
         ("state_features", schema.Scalar()),
         ("next_state_features", schema.Scalar()),
         ("action", schema.Scalar()),
         ("next_action", schema.Scalar()),
         ("not_terminal", schema.Scalar()),
     )
     self.check_create_net_spec(
         extractor, expected_input_record, expected_output_record
     )
Beispiel #3
0
 def _test_create_net_sarsa_parametric_action(self, normalize):
     extractor = TrainingFeatureExtractor(
         state_normalization_parameters=self.
         get_state_normalization_parameters(),
         action_normalization_parameters=self.
         get_action_normalization_parameters(),
         include_possible_actions=False,
         normalize=normalize,
         max_num_actions=2,
     )
     expected_input_record = schema.Struct(
         ("state_features", map_schema()),
         ("next_state_features", map_schema()),
         ("action", map_schema()),
         ("next_action", map_schema()),
         ("not_terminal", schema.Scalar()),
     )
     expected_output_record = schema.Struct(
         ("state_features", schema.Scalar()),
         ("next_state_features", schema.Scalar()),
         ("action", schema.Scalar()),
         ("next_action", schema.Scalar()),
         ("not_terminal", schema.Scalar()),
     )
     self.check_create_net_spec(extractor, expected_input_record,
                                expected_output_record)
Beispiel #4
0
 def test_create_net_max_q_discrete_action(self):
     extractor = TrainingFeatureExtractor(
         state_normalization_parameters=self.
         get_state_normalization_parameters(),
         include_possible_actions=True,
         max_num_actions=2,
     )
     expected_input_record = schema.Struct(
         ("state_features", map_schema()),
         ("next_state_features", map_schema()),
         ("action", schema.Scalar()),
         ("next_action", schema.Scalar()),
         ("not_terminal", schema.Scalar()),
         ("possible_actions_mask", schema.List(schema.Scalar())),
         ("possible_next_actions_mask", schema.List(schema.Scalar())),
     )
     expected_output_record = schema.Struct(
         ("state_features", schema.Scalar()),
         ("next_state_features", schema.Scalar()),
         ("action", schema.Scalar()),
         ("next_action", schema.Scalar()),
         ("not_terminal", schema.Scalar()),
         ("possible_actions_mask", schema.Scalar()),
         ("possible_next_actions_mask", schema.Scalar()),
     )
     self.check_create_net_spec(extractor, expected_input_record,
                                expected_output_record)
 def test_create_net_sarsa_no_action(self):
     extractor = PredictorFeatureExtractor(
         state_normalization_parameters=self.
         get_state_normalization_parameters())
     expected_input_record = schema.Struct(("float_features", map_schema()))
     expected_output_record = schema.Struct(("state", schema.Scalar()))
     self.check_create_net_spec(extractor, expected_input_record,
                                expected_output_record)
 def _test_create_net_sarsa_no_action(self, normalize):
     extractor = PredictorFeatureExtractor(
         state_normalization_parameters=self.get_state_normalization_parameters(),
         normalize=normalize,
     )
     expected_input_record = schema.Struct(("float_features", map_schema()))
     expected_output_record = schema.Struct(("state", schema.Scalar()))
     self.check_create_net_spec(
         extractor, expected_input_record, expected_output_record
     )
 def test_create_net_sarsa_discrete_action(self):
     extractor = TrainingFeatureExtractor(
         state_normalization_parameters=self.
         get_state_normalization_parameters(),
         max_q_learning=False,
     )
     expected_input_record = schema.Struct(
         ("state_features", map_schema()),
         ("next_state_features", map_schema()),
         ("action", schema.Scalar()),
         ("next_action", schema.Scalar()),
     )
     expected_output_record = schema.Struct(
         ("state", schema.Scalar()),
         ("next_state", schema.Scalar()),
         ("action", schema.Scalar()),
         ("next_action", schema.Scalar()),
     )
     self.check_create_net_spec(extractor, expected_input_record,
                                expected_output_record)
Beispiel #8
0
 def test_create_net_max_q_parametric_action(self):
     extractor = TrainingFeatureExtractor(
         state_normalization_parameters=self.get_state_normalization_parameters(),
         action_normalization_parameters=self.get_action_normalization_parameters(),
         max_q_learning=True,
     )
     expected_input_record = schema.Struct(
         ("state_features", map_schema()),
         ("next_state_features", map_schema()),
         ("action", map_schema()),
         ("possible_next_actions", schema.List(map_schema())),
     )
     expected_output_record = schema.Struct(
         ("state", schema.Scalar()),
         ("next_state", schema.Scalar()),
         ("action", schema.Scalar()),
         ("possible_next_actions", schema.List(schema.Scalar())),
     )
     self.check_create_net_spec(
         extractor, expected_input_record, expected_output_record
     )