def initialize_model(self): return PGProbRatioModel( states_spec=self.states_spec, actions_spec=self.actions_spec, network_spec=self.network_spec, device=self.device, session_config=self.session_config, scope=self.scope, saver_spec=self.saver_spec, summary_spec=self.summary_spec, distributed_spec=self.distributed_spec, optimizer=self.optimizer, discount=self.discount, variable_noise=self.variable_noise, states_preprocessing_spec=self.states_preprocessing_spec, explorations_spec=self.explorations_spec, reward_preprocessing_spec=self.reward_preprocessing_spec, distributions_spec=self.distributions_spec, entropy_regularization=self.entropy_regularization, baseline_mode=self.baseline_mode, baseline=self.baseline, baseline_optimizer=self.baseline_optimizer, gae_lambda=self.gae_lambda, likelihood_ratio_clipping=self.likelihood_ratio_clipping )
def initialize_model(self): return PGProbRatioModel( states=self.states, actions=self.actions, scope=self.scope, device=self.device, saver=self.saver, summarizer=self.summarizer, execution=self.execution, batching_capacity=self.batching_capacity, variable_noise=self.variable_noise, states_preprocessing=self.states_preprocessing, actions_exploration=self.actions_exploration, reward_preprocessing=self.reward_preprocessing, update_mode=self.update_mode, memory=self.memory, optimizer=self.optimizer, discount=self.discount, network=self.network, distributions=self.distributions, entropy_regularization=self.entropy_regularization, baseline_mode=self.baseline_mode, baseline=self.baseline, baseline_optimizer=self.baseline_optimizer, gae_lambda=self.gae_lambda, likelihood_ratio_clipping=self.likelihood_ratio_clipping )
def initialize_model(self, states_spec, actions_spec, config): return PGProbRatioModel( states_spec=states_spec, actions_spec=actions_spec, network_spec=self.network_spec, config=config )
def initialize_model(self, states_spec, actions_spec): return PGProbRatioModel( states_spec=states_spec, actions_spec=actions_spec, network_spec=self.network_spec, device=self.device, scope=self.scope, saver_spec=self.saver_spec, summary_spec=self.summary_spec, distributed_spec=self.distributed_spec, optimizer=self.optimizer, discount=self.discount, normalize_rewards=self.normalize_rewards, variable_noise=self.variable_noise, distributions_spec=self.distributions_spec, entropy_regularization=self.entropy_regularization, baseline_mode=self.baseline_mode, baseline=self.baseline, baseline_optimizer=self.baseline_optimizer, gae_lambda=self.gae_lambda, likelihood_ratio_clipping=self.likelihood_ratio_clipping)