Beispiel #1
0
 def initialize_model(self):
     return PGProbRatioModel(
         states_spec=self.states_spec,
         actions_spec=self.actions_spec,
         network_spec=self.network_spec,
         device=self.device,
         session_config=self.session_config,
         scope=self.scope,
         saver_spec=self.saver_spec,
         summary_spec=self.summary_spec,
         distributed_spec=self.distributed_spec,
         optimizer=self.optimizer,
         discount=self.discount,
         variable_noise=self.variable_noise,
         states_preprocessing_spec=self.states_preprocessing_spec,
         explorations_spec=self.explorations_spec,
         reward_preprocessing_spec=self.reward_preprocessing_spec,
         distributions_spec=self.distributions_spec,
         entropy_regularization=self.entropy_regularization,
         baseline_mode=self.baseline_mode,
         baseline=self.baseline,
         baseline_optimizer=self.baseline_optimizer,
         gae_lambda=self.gae_lambda,
         likelihood_ratio_clipping=self.likelihood_ratio_clipping
     )
Beispiel #2
0
 def initialize_model(self):
     return PGProbRatioModel(
         states=self.states,
         actions=self.actions,
         scope=self.scope,
         device=self.device,
         saver=self.saver,
         summarizer=self.summarizer,
         execution=self.execution,
         batching_capacity=self.batching_capacity,
         variable_noise=self.variable_noise,
         states_preprocessing=self.states_preprocessing,
         actions_exploration=self.actions_exploration,
         reward_preprocessing=self.reward_preprocessing,
         update_mode=self.update_mode,
         memory=self.memory,
         optimizer=self.optimizer,
         discount=self.discount,
         network=self.network,
         distributions=self.distributions,
         entropy_regularization=self.entropy_regularization,
         baseline_mode=self.baseline_mode,
         baseline=self.baseline,
         baseline_optimizer=self.baseline_optimizer,
         gae_lambda=self.gae_lambda,
         likelihood_ratio_clipping=self.likelihood_ratio_clipping
     )
Beispiel #3
0
 def initialize_model(self, states_spec, actions_spec, config):
     return PGProbRatioModel(
         states_spec=states_spec,
         actions_spec=actions_spec,
         network_spec=self.network_spec,
         config=config
     )
 def initialize_model(self, states_spec, actions_spec):
     return PGProbRatioModel(
         states_spec=states_spec,
         actions_spec=actions_spec,
         network_spec=self.network_spec,
         device=self.device,
         scope=self.scope,
         saver_spec=self.saver_spec,
         summary_spec=self.summary_spec,
         distributed_spec=self.distributed_spec,
         optimizer=self.optimizer,
         discount=self.discount,
         normalize_rewards=self.normalize_rewards,
         variable_noise=self.variable_noise,
         distributions_spec=self.distributions_spec,
         entropy_regularization=self.entropy_regularization,
         baseline_mode=self.baseline_mode,
         baseline=self.baseline,
         baseline_optimizer=self.baseline_optimizer,
         gae_lambda=self.gae_lambda,
         likelihood_ratio_clipping=self.likelihood_ratio_clipping)