def __init__(self, obs_space, action_space, config, existing_inputs=None): if get_default_config: config = dict(get_default_config(), **config) if before_init: before_init(self, obs_space, action_space, config) def before_loss_init_wrapper(policy, obs_space, action_space, config): if before_loss_init: before_loss_init(policy, obs_space, action_space, config) if extra_action_fetches_fn is None: self._extra_action_fetches = {} else: self._extra_action_fetches = extra_action_fetches_fn(self) DynamicTFPolicy.__init__(self, obs_space, action_space, config, loss_fn, stats_fn=stats_fn, grad_stats_fn=grad_stats_fn, before_loss_init=before_loss_init_wrapper, existing_inputs=existing_inputs) if after_init: after_init(self, obs_space, action_space, config)
def __init__( self, obs_space, action_space, config, existing_model=None, existing_inputs=None, ): if get_default_config: config = dict(get_default_config(), **config) if validate_spaces: validate_spaces(self, obs_space, action_space, config) if before_init: before_init(self, obs_space, action_space, config) def before_loss_init_wrapper(policy, obs_space, action_space, config): if before_loss_init: before_loss_init(policy, obs_space, action_space, config) if extra_action_out_fn is None or policy._is_tower: extra_action_fetches = {} else: extra_action_fetches = extra_action_out_fn(policy) if hasattr(policy, "_extra_action_fetches"): policy._extra_action_fetches.update(extra_action_fetches) else: policy._extra_action_fetches = extra_action_fetches DynamicTFPolicy.__init__( self, obs_space=obs_space, action_space=action_space, config=config, loss_fn=loss_fn, stats_fn=stats_fn, grad_stats_fn=grad_stats_fn, before_loss_init=before_loss_init_wrapper, make_model=make_model, action_sampler_fn=action_sampler_fn, action_distribution_fn=action_distribution_fn, existing_inputs=existing_inputs, existing_model=existing_model, get_batch_divisibility_req=get_batch_divisibility_req, ) if after_init: after_init(self, obs_space, action_space, config) # Got to reset global_timestep again after this fake run-through. self.global_timestep = 0
def __init__(self, obs_space, action_space, config, existing_model=None, existing_inputs=None): if get_default_config: config = dict(get_default_config(), **config) if validate_spaces: validate_spaces(self, obs_space, action_space, config) if before_init: before_init(self, obs_space, action_space, config) def before_loss_init_wrapper(policy, obs_space, action_space, config): if before_loss_init: before_loss_init(policy, obs_space, action_space, config) if extra_action_fetches_fn is None: policy._extra_action_fetches = {} else: policy._extra_action_fetches = extra_action_fetches_fn( policy) policy._extra_action_fetches = extra_action_fetches_fn( policy) DynamicTFPolicy.__init__( self, obs_space=obs_space, action_space=action_space, config=config, loss_fn=loss_fn, stats_fn=stats_fn, grad_stats_fn=grad_stats_fn, before_loss_init=before_loss_init_wrapper, make_model=make_model, action_sampler_fn=action_sampler_fn, action_distribution_fn=action_distribution_fn, existing_inputs=existing_inputs, existing_model=existing_model, view_requirements_fn=view_requirements_fn, get_batch_divisibility_req=get_batch_divisibility_req, obs_include_prev_action_reward=obs_include_prev_action_reward) if after_init: after_init(self, obs_space, action_space, config) # Got to reset global_timestep again after this fake run-through. self.global_timestep = 0
def __init__(self, obs_space, action_space, config, existing_model=None, existing_inputs=None): if get_default_config: config = dict(get_default_config(), **config) if before_init: before_init(self, obs_space, action_space, config) def before_loss_init_wrapper(policy, obs_space, action_space, config): if before_loss_init: before_loss_init(policy, obs_space, action_space, config) if extra_action_fetches_fn is None: self._extra_action_fetches = {} else: self._extra_action_fetches = extra_action_fetches_fn(self) t = time.time() # DEBUG print("\ttf_policy_template.py: Initializing DynamicTFPolicy.") DynamicTFPolicy.__init__( self, obs_space, action_space, config, loss_fn, stats_fn=stats_fn, grad_stats_fn=grad_stats_fn, before_loss_init=before_loss_init_wrapper, make_model=make_model, action_sampler_fn=action_sampler_fn, existing_model=existing_model, existing_inputs=existing_inputs, get_batch_divisibility_req=get_batch_divisibility_req, obs_include_prev_action_reward=obs_include_prev_action_reward) # DEBUG print( "\ttf_policy_template.py: Done initializing DynamicTFPolicy.") print("DTFP: %fs" % (time.time() - t)) if after_init: after_init(self, obs_space, action_space, config)
def __init__(self, obs_space, action_space, config, existing_model=None, existing_inputs=None): if get_default_config: config = dict(get_default_config(), **config) if validate_spaces: validate_spaces(self, obs_space, action_space, config) if before_init: before_init(self, obs_space, action_space, config) def before_loss_init_wrapper(policy, obs_space, action_space, config): if before_loss_init: before_loss_init(policy, obs_space, action_space, config) if extra_action_fetches_fn is None: self._extra_action_fetches = {} else: self._extra_action_fetches = extra_action_fetches_fn(self) DynamicTFPolicy.__init__( self, obs_space=obs_space, action_space=action_space, config=config, loss_fn=loss_fn, stats_fn=stats_fn, grad_stats_fn=grad_stats_fn, before_loss_init=before_loss_init_wrapper, make_model=make_model, action_sampler_fn=action_sampler_fn, action_distribution_fn=action_distribution_fn, existing_model=existing_model, existing_inputs=existing_inputs, get_batch_divisibility_req=get_batch_divisibility_req, obs_include_prev_action_reward=obs_include_prev_action_reward) if after_init: after_init(self, obs_space, action_space, config)