Beispiel #1
0
 def actor_predictor(self,
                     feature_extractor=None,
                     output_trasnformer=None,
                     net_container=None) -> ActorPredictor:
     actor_network = self.actor_network.cpu_model()
     if net_container is not None:
         actor_network = net_container(actor_network)
     predictor = ActorExporter(actor_network, feature_extractor,
                               output_trasnformer).export()
     self.actor_network.train()
     return predictor
Beispiel #2
0
    def get_actor_predictor(self, trainer, environment):
        feature_extractor = PredictorFeatureExtractor(
            state_normalization_parameters=environment.normalization)

        output_transformer = ActorOutputTransformer(
            sort_features_by_normalization(
                environment.normalization_action)[0],
            environment.max_action_range.reshape(-1),
            environment.min_action_range.reshape(-1),
        )

        predictor = ActorExporter(trainer.actor_network, feature_extractor,
                                  output_transformer).export()
        return predictor
Beispiel #3
0
    def _test_ddpg_trainer(self, use_gpu=False, use_all_avail_gpus=False):
        # FIXME:the test not really working
        self.run_pre_training_eval = False
        self.check_tolerance = False
        environment = GridworldContinuous()

        parameters = self.get_ddpg_parameters()

        state_dim = get_num_output_features(environment.normalization)
        action_dim = get_num_output_features(environment.normalization_action)

        # Build Actor Network
        actor_network = ActorNetModel(
            layers=[state_dim] + parameters.actor_training.layers[1:-1] +
            [action_dim],
            activations=parameters.actor_training.activations,
            fl_init=parameters.shared_training.final_layer_init,
            state_dim=state_dim,
            action_dim=action_dim,
            use_gpu=use_gpu,
            use_all_avail_gpus=use_all_avail_gpus,
        )

        # Build Critic Network
        critic_network = CriticNetModel(
            # Ensure dims match input state and scalar output
            layers=[state_dim] + parameters.critic_training.layers[1:-1] + [1],
            activations=parameters.critic_training.activations,
            fl_init=parameters.shared_training.final_layer_init,
            state_dim=state_dim,
            action_dim=action_dim,
            use_gpu=use_gpu,
            use_all_avail_gpus=use_all_avail_gpus,
        )

        trainer = DDPGTrainer(
            actor_network,
            critic_network,
            parameters,
            environment.normalization,
            environment.normalization_action,
            environment.min_action_range,
            environment.max_action_range,
            use_gpu=use_gpu,
            use_all_avail_gpus=use_all_avail_gpus,
        )

        exporter = ParametricDQNExporter.from_state_action_normalization(
            trainer.critic,
            state_normalization=environment.normalization,
            action_normalization=environment.normalization_action,
        )

        evaluator = GridworldDDPGEvaluator(environment, DISCOUNT)
        self.evaluate_gridworld(environment, evaluator, trainer, exporter,
                                use_gpu)

        # Make sure actor predictor works
        actor = ActorExporter.from_state_action_normalization(
            trainer.actor,
            state_normalization=environment.normalization,
            action_normalization=environment.normalization_action,
        ).export()

        # Make sure all actions are optimal
        error = evaluator.evaluate_actor(actor, thres=0.2)
        print("gridworld optimal action match MAE: {0:.3f}".format(error))
Beispiel #4
0
def _get_actor_exporter(trainer, state_normalization, action_normalization):
    return ActorExporter.from_state_action_normalization(
        trainer.actor,
        state_normalization=state_normalization,
        action_normalization=action_normalization,
    )