def initialize_model(self, states_spec, actions_spec, config): return QNstepModel( states_spec=states_spec, actions_spec=actions_spec, network_spec=self.network_spec, config=config )
def initialize_model(self): return QNstepModel( states_spec=self.states_spec, actions_spec=self.actions_spec, network_spec=self.network_spec, device=self.device, session_config=self.session_config, scope=self.scope, saver_spec=self.saver_spec, summary_spec=self.summary_spec, distributed_spec=self.distributed_spec, optimizer=self.optimizer, discount=self.discount, variable_noise=self.variable_noise, states_preprocessing_spec=self.states_preprocessing_spec, explorations_spec=self.explorations_spec, reward_preprocessing_spec=self.reward_preprocessing_spec, distributions_spec=self.distributions_spec, entropy_regularization=self.entropy_regularization, target_sync_frequency=self.target_sync_frequency, target_update_weight=self.target_update_weight, double_q_model=self.double_q_model, huber_loss=self.huber_loss, # TEMP: Random sampling fix random_sampling_fix=False)
def initialize_model(self, states_spec, actions_spec): return QNstepModel(states_spec=states_spec, actions_spec=actions_spec, network_spec=self.network_spec, device=self.device, scope=self.scope, saver_spec=self.saver_spec, summary_spec=self.summary_spec, distributed_spec=self.distributed_spec, optimizer=self.optimizer, discount=self.discount, normalize_rewards=self.normalize_rewards, variable_noise=self.variable_noise, distributions_spec=self.distributions_spec, entropy_regularization=self.entropy_regularization, target_sync_frequency=self.target_sync_frequency, target_update_weight=self.target_update_weight, double_q_model=self.double_q_model, huber_loss=self.huber_loss)
def initialize_model(self): return QNstepModel(states=self.states, actions=self.actions, scope=self.scope, device=self.device, saver=self.saver, summarizer=self.summarizer, distributed=self.distributed, batching_capacity=self.batching_capacity, variable_noise=self.variable_noise, states_preprocessing=self.states_preprocessing, actions_exploration=self.actions_exploration, reward_preprocessing=self.reward_preprocessing, update_mode=self.update_mode, memory=self.memory, optimizer=self.optimizer, discount=self.discount, network=self.network, distributions=self.distributions, entropy_regularization=self.entropy_regularization, target_sync_frequency=self.target_sync_frequency, target_update_weight=self.target_update_weight, double_q_model=self.double_q_model, huber_loss=self.huber_loss)