def __init__(self, states_spec, actions_spec, batched_observe, batch_size, memory, first_update, update_frequency, repeat_update): """ Args: states_spec: Dict containing at least one state definition. In the case of a single state, keys `shape` and `type` are necessary. For multiple states, pass a dict of dicts where each state is a dict itself with a unique name as its key. actions_spec: Dict containing at least one action definition. Actions have types and either `num_actions` for discrete actions or a `shape` for continuous actions. Consult documentation and tests for more. batched_observe: Optional int specifying how many observe calls are batched into one session run. Without batching, throughput will be lower because every `observe` triggers a session invocation to update rewards in the graph. batch_size: Int specifying batch size used to sample from memory. Should be smaller than memory size. memory: Dict describing memory via `type` (e.g. `replay`) and `capacity`. first_update: Int describing at which time step the first update is performed. Should be larger than batch size. update_frequency: Int specifying number of observe steps to perform until an update is executed. repeat_update: Int specifying how many update steps are performed per update, where each update step implies sampling a batch from the memory and passing it to the model. """ self.memory_spec = memory self.batch_size = batch_size self.first_update = first_update self.update_frequency = update_frequency self.repeat_update = repeat_update super(MemoryAgent, self).__init__(states_spec=states_spec, actions_spec=actions_spec, batched_observe=batched_observe) self.memory = Memory.from_spec(spec=self.memory_spec, kwargs=dict( states_spec=self.states_spec, actions_spec=self.actions_spec))
def initialize(self, custom_getter): super(MemoryModel, self).initialize(custom_getter) # Memory self.memory = Memory.from_spec( spec=self.memory_spec, kwargs=dict( states=self.states_spec, internals=self.internals_spec, actions=self.actions_spec, summary_labels=self.summary_labels ) ) # Optimizer self.optimizer = Optimizer.from_spec( spec=self.optimizer_spec, kwargs=dict(summary_labels=self.summary_labels) ) # TensorFlow functions self.fn_discounted_cumulative_reward = tf.make_template( name_='discounted-cumulative-reward', func_=self.tf_discounted_cumulative_reward, custom_getter_=custom_getter ) self.fn_reference = tf.make_template( name_='reference', func_=self.tf_reference, custom_getter_=custom_getter ) self.fn_loss_per_instance = tf.make_template( name_='loss-per-instance', func_=self.tf_loss_per_instance, custom_getter_=custom_getter ) self.fn_regularization_losses = tf.make_template( name_='regularization-losses', func_=self.tf_regularization_losses, custom_getter_=custom_getter ) self.fn_loss = tf.make_template( name_='loss', func_=self.tf_loss, custom_getter_=custom_getter ) self.fn_optimization = tf.make_template( name_='optimization', func_=self.tf_optimization, custom_getter_=custom_getter ) self.fn_import_experience = tf.make_template( name_='import-experience', func_=self.tf_import_experience, custom_getter_=custom_getter )
def __init__(self, config, model=None): config.default(MemoryAgent.default_config) super(MemoryAgent, self).__init__(config, model) self.batch_size = config.batch_size self.memory = Memory.from_config(config=config.memory, kwargs=dict( capacity=config.memory_capacity, states_config=config.states, actions_config=config.actions)) self.update_frequency = config.update_frequency self.first_update = config.first_update self.repeat_update = config.repeat_update
def setup_components_and_tf_funcs(self, custom_getter=None): """ Constructs the memory and the optimizer objects. Generates and stores all template functions. """ custom_getter = super( MemoryModel, self).setup_components_and_tf_funcs(custom_getter) # Memory self.memory = Memory.from_spec(spec=self.memory_spec, kwargs=dict( states=self.states_spec, internals=self.internals_spec, actions=self.actions_spec, summary_labels=self.summary_labels)) # Optimizer self.optimizer = Optimizer.from_spec( spec=self.optimizer_spec, kwargs=dict(summary_labels=self.summary_labels)) # TensorFlow functions self.fn_discounted_cumulative_reward = tf.make_template( name_='discounted-cumulative-reward', func_=self.tf_discounted_cumulative_reward, custom_getter_=custom_getter) self.fn_reference = tf.make_template(name_='reference', func_=self.tf_reference, custom_getter_=custom_getter) self.fn_loss_per_instance = tf.make_template( name_='loss-per-instance', func_=self.tf_loss_per_instance, custom_getter_=custom_getter) self.fn_regularization_losses = tf.make_template( name_='regularization-losses', func_=self.tf_regularization_losses, custom_getter_=custom_getter) self.fn_loss = tf.make_template(name_='loss', func_=self.tf_loss, custom_getter_=custom_getter) self.fn_optimization = tf.make_template(name_='optimization', func_=self.tf_optimization, custom_getter_=custom_getter) self.fn_import_experience = tf.make_template( name_='import-experience', func_=self.tf_import_experience, custom_getter_=custom_getter) return custom_getter
def __init__(self, states_spec, actions_spec, config): config.default(MemoryAgent.default_config) self.batch_size = config.batch_size self.memory_capacity = config.memory_capacity self.update_frequency = config.update_frequency self.first_update = config.first_update self.repeat_update = config.repeat_update super(MemoryAgent, self).__init__(states_spec, actions_spec, config) self.memory = Memory.from_spec(spec=config.memory, kwargs=dict( capacity=self.memory_capacity, states_spec=self.states_spec, actions_spec=self.actions_spec))
def __init__(self, states_spec, actions_spec, config): self.memory_spec = config.memory self.batch_size = config.batch_size self.first_update = config.first_update self.update_frequency = config.update_frequency self.repeat_update = config.repeat_update super(MemoryAgent, self).__init__(states_spec=states_spec, actions_spec=actions_spec, config=config) self.memory = Memory.from_spec(spec=self.memory_spec, kwargs=dict( states_spec=self.states_spec, actions_spec=self.actions_spec))
def __init__(self, states_spec, actions_spec, preprocessing, exploration, reward_preprocessing, batched_observe, batch_size, memory, first_update, update_frequency, repeat_update): self.memory_spec = memory self.batch_size = batch_size self.first_update = first_update self.update_frequency = update_frequency self.repeat_update = repeat_update super(MemoryAgent, self).__init__(states_spec=states_spec, actions_spec=actions_spec, preprocessing=preprocessing, exploration=exploration, reward_preprocessing=reward_preprocessing, batched_observe=batched_observe) self.memory = Memory.from_spec(spec=self.memory_spec, kwargs=dict( states_spec=self.states_spec, actions_spec=self.actions_spec))
def __init__( self, states_spec, actions_spec, batched_observe=1000, scope='memory_agent', # parameters specific to LearningAgents summary_spec=None, network_spec=None, discount=0.99, device=None, session_config=None, saver_spec=None, distributed_spec=None, optimizer=None, variable_noise=None, states_preprocessing_spec=None, explorations_spec=None, reward_preprocessing_spec=None, distributions_spec=None, entropy_regularization=None, # parameters specific to MemoryAgents batch_size=1000, memory=None, first_update=10000, update_frequency=4, repeat_update=1 ): """ Args: batch_size (int): The batch size used to sample from memory. Should be smaller than memory size. memory (Union[dict,Memory]): Dict describing memory via `type` (e.g. `replay`) and `capacity`. Alternatively, an actual Memory object can be passed in directly. first_update (int): At which time step the first update is performed. Should be larger than batch size. update_frequency (int): Number of `observe` steps to perform until an update is executed. repeat_update (int): How many update steps are performed per update, where each update step implies sampling a batch from the memory and passing it to the model. """ super(MemoryAgent, self).__init__( states_spec=states_spec, actions_spec=actions_spec, batched_observe=batched_observe, scope=scope, # parameters specific to LearningAgent summary_spec=summary_spec, network_spec=network_spec, discount=discount, device=device, session_config=session_config, saver_spec=saver_spec, distributed_spec=distributed_spec, optimizer=optimizer, variable_noise=variable_noise, states_preprocessing_spec=states_preprocessing_spec, explorations_spec=explorations_spec, reward_preprocessing_spec=reward_preprocessing_spec, distributions_spec=distributions_spec, entropy_regularization=entropy_regularization ) # Memory already given as a Memory object: Use that. if isinstance(memory, Memory): self.memory = memory self.memory_spec = None else: # Nothing given: Create a default memory spec. if memory is None: memory = dict( type='replay', capacity=100000 ) # Now create actual Memory object from the spec. self.memory_spec = memory self.memory = Memory.from_spec( spec=self.memory_spec, kwargs=dict( states_spec=self.states_spec, actions_spec=self.actions_spec ) ) self.batch_size = batch_size self.first_update = first_update self.update_frequency = update_frequency self.repeat_update = repeat_update