def test_create_net_max_q_discrete_action(self): extractor = TrainingFeatureExtractor( state_normalization_parameters=self.get_state_normalization_parameters(), include_possible_actions=True, max_num_actions=2, ) expected_input_record = schema.Struct( ("state_features", map_schema()), ("next_state_features", map_schema()), ("action", schema.Scalar()), ("next_action", schema.Scalar()), ("not_terminal", schema.Scalar()), ("possible_actions_mask", schema.List(schema.Scalar())), ("possible_next_actions_mask", schema.List(schema.Scalar())), ) expected_output_record = schema.Struct( ("state_features", schema.Scalar()), ("next_state_features", schema.Scalar()), ("action", schema.Scalar()), ("next_action", schema.Scalar()), ("not_terminal", schema.Scalar()), ("possible_actions_mask", schema.Scalar()), ("possible_next_actions_mask", schema.Scalar()), ) self.check_create_net_spec( extractor, expected_input_record, expected_output_record )
def _test_create_net_sarsa_parametric_action(self, normalize): extractor = TrainingFeatureExtractor( state_normalization_parameters=self.get_state_normalization_parameters(), action_normalization_parameters=self.get_action_normalization_parameters(), include_possible_actions=False, normalize=normalize, max_num_actions=2, ) expected_input_record = schema.Struct( ("state_features", map_schema()), ("next_state_features", map_schema()), ("action", map_schema()), ("next_action", map_schema()), ("not_terminal", schema.Scalar()), ) expected_output_record = schema.Struct( ("state_features", schema.Scalar()), ("next_state_features", schema.Scalar()), ("action", schema.Scalar()), ("next_action", schema.Scalar()), ("not_terminal", schema.Scalar()), ) self.check_create_net_spec( extractor, expected_input_record, expected_output_record )
def _test_create_net_sarsa_parametric_action(self, normalize): extractor = TrainingFeatureExtractor( state_normalization_parameters=self. get_state_normalization_parameters(), action_normalization_parameters=self. get_action_normalization_parameters(), include_possible_actions=False, normalize=normalize, max_num_actions=2, ) expected_input_record = schema.Struct( ("state_features", map_schema()), ("next_state_features", map_schema()), ("action", map_schema()), ("next_action", map_schema()), ("not_terminal", schema.Scalar()), ) expected_output_record = schema.Struct( ("state_features", schema.Scalar()), ("next_state_features", schema.Scalar()), ("action", schema.Scalar()), ("next_action", schema.Scalar()), ("not_terminal", schema.Scalar()), ) self.check_create_net_spec(extractor, expected_input_record, expected_output_record)
def test_create_net_max_q_discrete_action(self): extractor = TrainingFeatureExtractor( state_normalization_parameters=self. get_state_normalization_parameters(), include_possible_actions=True, max_num_actions=2, ) expected_input_record = schema.Struct( ("state_features", map_schema()), ("next_state_features", map_schema()), ("action", schema.Scalar()), ("next_action", schema.Scalar()), ("not_terminal", schema.Scalar()), ("possible_actions_mask", schema.List(schema.Scalar())), ("possible_next_actions_mask", schema.List(schema.Scalar())), ) expected_output_record = schema.Struct( ("state_features", schema.Scalar()), ("next_state_features", schema.Scalar()), ("action", schema.Scalar()), ("next_action", schema.Scalar()), ("not_terminal", schema.Scalar()), ("possible_actions_mask", schema.Scalar()), ("possible_next_actions_mask", schema.Scalar()), ) self.check_create_net_spec(extractor, expected_input_record, expected_output_record)
def test_create_net_sarsa_no_action(self): extractor = PredictorFeatureExtractor( state_normalization_parameters=self. get_state_normalization_parameters()) expected_input_record = schema.Struct(("float_features", map_schema())) expected_output_record = schema.Struct(("state", schema.Scalar())) self.check_create_net_spec(extractor, expected_input_record, expected_output_record)
def _test_create_net_sarsa_no_action(self, normalize): extractor = PredictorFeatureExtractor( state_normalization_parameters=self.get_state_normalization_parameters(), normalize=normalize, ) expected_input_record = schema.Struct(("float_features", map_schema())) expected_output_record = schema.Struct(("state", schema.Scalar())) self.check_create_net_spec( extractor, expected_input_record, expected_output_record )
def test_create_net_sarsa_discrete_action(self): extractor = TrainingFeatureExtractor( state_normalization_parameters=self. get_state_normalization_parameters(), max_q_learning=False, ) expected_input_record = schema.Struct( ("state_features", map_schema()), ("next_state_features", map_schema()), ("action", schema.Scalar()), ("next_action", schema.Scalar()), ) expected_output_record = schema.Struct( ("state", schema.Scalar()), ("next_state", schema.Scalar()), ("action", schema.Scalar()), ("next_action", schema.Scalar()), ) self.check_create_net_spec(extractor, expected_input_record, expected_output_record)
def test_create_net_max_q_parametric_action(self): extractor = TrainingFeatureExtractor( state_normalization_parameters=self.get_state_normalization_parameters(), action_normalization_parameters=self.get_action_normalization_parameters(), max_q_learning=True, ) expected_input_record = schema.Struct( ("state_features", map_schema()), ("next_state_features", map_schema()), ("action", map_schema()), ("possible_next_actions", schema.List(map_schema())), ) expected_output_record = schema.Struct( ("state", schema.Scalar()), ("next_state", schema.Scalar()), ("action", schema.Scalar()), ("possible_next_actions", schema.List(schema.Scalar())), ) self.check_create_net_spec( extractor, expected_input_record, expected_output_record )