Esempio n. 1
0
    def get_modular_sarsa_trainer_exporter(self,
                                           environment,
                                           parameters=None,
                                           use_gpu=False,
                                           use_all_avail_gpus=False):
        parameters = parameters or self.get_sarsa_parameters()
        q_network = FullyConnectedParametricDQN(
            state_dim=get_num_output_features(environment.normalization),
            action_dim=get_num_output_features(
                environment.normalization_action),
            sizes=parameters.training.layers[1:-1],
            activations=parameters.training.activations[:-1],
        )
        reward_network = FullyConnectedParametricDQN(
            state_dim=get_num_output_features(environment.normalization),
            action_dim=get_num_output_features(
                environment.normalization_action),
            sizes=parameters.training.layers[1:-1],
            activations=parameters.training.activations[:-1],
        )
        if use_gpu:
            q_network = q_network.cuda()
            reward_network = reward_network.cuda()
            if use_all_avail_gpus:
                q_network = q_network.get_data_parallel_model()
                reward_network = reward_network.get_data_parallel_model()

        q_network_target = q_network.get_target_network()
        trainer = _ParametricDQNTrainer(q_network, q_network_target,
                                        reward_network, parameters)
        state_preprocessor = Preprocessor(environment.normalization, False,
                                          True)
        action_preprocessor = Preprocessor(environment.normalization_action,
                                           False, True)
        feature_extractor = PredictorFeatureExtractor(
            state_normalization_parameters=environment.normalization,
            action_normalization_parameters=environment.normalization_action,
        )
        output_transformer = ParametricActionOutputTransformer()
        exporter = ParametricDQNExporter(
            q_network,
            feature_extractor,
            output_transformer,
            state_preprocessor,
            action_preprocessor,
        )
        return (trainer, exporter)
    def get_modular_sarsa_trainer_exporter(
        self, environment, parameters=None, use_gpu=False, use_all_avail_gpus=False
    ):
        parameters = parameters or self.get_sarsa_parameters()
        q_network = FullyConnectedParametricDQN(
            state_dim=get_num_output_features(environment.normalization),
            action_dim=get_num_output_features(environment.normalization_action),
            sizes=parameters.training.layers[1:-1],
            activations=parameters.training.activations[:-1],
        )
        reward_network = FullyConnectedParametricDQN(
            state_dim=get_num_output_features(environment.normalization),
            action_dim=get_num_output_features(environment.normalization_action),
            sizes=parameters.training.layers[1:-1],
            activations=parameters.training.activations[:-1],
        )
        if use_gpu:
            q_network = q_network.cuda()
            reward_network = reward_network.cuda()
            if use_all_avail_gpus:
                q_network = q_network.get_data_parallel_model()
                reward_network = reward_network.get_data_parallel_model()

        q_network_target = q_network.get_target_network()
        trainer = _ParametricDQNTrainer(
            q_network, q_network_target, reward_network, parameters
        )
        feature_extractor = PredictorFeatureExtractor(
            state_normalization_parameters=environment.normalization,
            action_normalization_parameters=environment.normalization_action,
        )
        output_transformer = ParametricActionOutputTransformer()
        exporter = ParametricDQNExporter(
            q_network, feature_extractor, output_transformer
        )
        return (trainer, exporter)