def setup_components_and_tf_funcs(self, custom_getter=None): custom_getter = super(DPGTargetModel, self).setup_components_and_tf_funcs(custom_getter) # Target network self.target_network = Network.from_spec( spec=self.network_spec, kwargs=dict(scope='target-network', summary_labels=self.summary_labels) ) # Target network optimizer self.target_network_optimizer = Synchronization( sync_frequency=self.target_sync_frequency, update_weight=self.target_update_weight ) # Target network distributions self.target_distributions = self.create_distributions() # critic self.critic_network = Network.from_spec( spec=self.critic_network_spec, kwargs=dict(scope='critic') ) self.target_critic_network = Network.from_spec( spec=self.critic_network_spec, kwargs=dict(scope='target-critic') ) self.critic_optimizer = Optimizer.from_spec( spec=self.critic_optimizer_spec, kwargs=dict(summary_labels=self.summary_labels) ) # Target critic optimizer self.target_critic_optimizer = Synchronization( sync_frequency=self.target_sync_frequency, update_weight=self.target_update_weight ) self.fn_target_actions_and_internals = tf.make_template( name_='target-actions-and-internals', func_=self.tf_target_actions_and_internals, custom_getter_=custom_getter ) self.fn_predict_target_q = tf.make_template( name_='predict-target-q', func_=self.tf_predict_target_q, custom_getter_=custom_getter ) return custom_getter
def initialize(self, custom_getter): super(QModel, self).initialize(custom_getter) # TEMP: Random sampling fix if self.random_sampling_fix: self.next_state_inputs = dict() for name, state in self.states_spec.items(): self.next_state_inputs[name] = tf.placeholder( dtype=util.tf_dtype(state['type']), shape=(None, ) + tuple(state['shape']), name=('next-' + name)) # Target network self.target_network = Network.from_spec( spec=self.network_spec, kwargs=dict(scope='target', summary_labels=self.summary_labels)) # Target network optimizer self.target_optimizer = Synchronization( sync_frequency=self.target_sync_frequency, update_weight=self.target_update_weight) # Target network distributions self.target_distributions = self.generate_distributions( self.actions_spec, self.distributions_spec, self.summary_labels)
def initialize(self, custom_getter): super(QModel, self).initialize(custom_getter) # Target network self.target_network = Network.from_spec( spec=self.network_spec, kwargs=dict(scope='target', summary_labels=self.summary_labels)) # Target network optimizer self.target_optimizer = Synchronization( sync_frequency=self.target_sync_frequency, update_weight=self.target_update_weight)
def setup_components_and_tf_funcs(self, custom_getter=None): custom_getter = super( DPGTargetModel, self).setup_components_and_tf_funcs(custom_getter) # Target network self.target_network = Network.from_spec( spec=self.network_spec, kwargs=dict(scope='target-network', summary_labels=self.summary_labels)) # Target network optimizer self.target_network_optimizer = Synchronization( sync_frequency=self.target_sync_frequency, update_weight=self.target_update_weight) # Target network distributions self.target_distributions = self.create_distributions() # Critic #print ("type of self.critic_network_spec[]:") #print (type(self.critic_network_spec)) #for element in self.critic_network_spec: # print (element) # oliver: CHANGES HERE!! size_t0 = self.critic_network_spec[0]['size'] size_t1 = self.critic_network_spec[1]['size'] self.critic = DDPGCriticNetwork(scope='critic', size_t0=size_t0, size_t1=size_t1) self.critic_optimizer = Optimizer.from_spec( spec=self.critic_optimizer_spec, kwargs=dict(summary_labels=self.summary_labels)) self.target_critic = DDPGCriticNetwork(scope='target-critic', size_t0=size_t0, size_t1=size_t1) # Target critic optimizer self.target_critic_optimizer = Synchronization( sync_frequency=self.target_sync_frequency, update_weight=self.target_update_weight) self.fn_target_actions_and_internals = tf.make_template( name_='target-actions-and-internals', func_=self.tf_target_actions_and_internals, custom_getter_=custom_getter) self.fn_predict_target_q = tf.make_template( name_='predict-target-q', func_=self.tf_predict_target_q, custom_getter_=custom_getter) return custom_getter
def __init__(self, states_spec, actions_spec, network_spec, config): with tf.name_scope(name=config.scope): # Network self.network = Network.from_spec( spec=network_spec, kwargs=dict(summary_labels=config.summary_labels)) # Distributions self.distributions = dict() for name, action in actions_spec.items(): with tf.name_scope(name=(name + '-distribution')): if config.distributions is not None and name in config.distributions: kwargs = dict(action) kwargs['summary_labels'] = config.summary_labels self.distributions[name] = Distribution.from_spec( spec=config.distributions[name], kwargs=kwargs) elif action['type'] == 'bool': self.distributions[name] = Bernoulli( shape=action['shape'], summary_labels=config.summary_labels) elif action['type'] == 'int': self.distributions[name] = Categorical( shape=action['shape'], num_actions=action['num_actions'], summary_labels=config.summary_labels) elif action['type'] == 'float': if 'min_value' in action: self.distributions[name] = Beta( shape=action['shape'], min_value=action['min_value'], max_value=action['max_value'], summary_labels=config.summary_labels) else: self.distributions[name] = Gaussian( shape=action['shape'], summary_labels=config.summary_labels) # Entropy regularization assert config.entropy_regularization is None or config.entropy_regularization >= 0.0 self.entropy_regularization = config.entropy_regularization super(DistributionModel, self).__init__(states_spec=states_spec, actions_spec=actions_spec, network_spec=network_spec, config=config)
def initialize(self, custom_getter): super(DistributionModel, self).initialize(custom_getter) # Network self.network = Network.from_spec( spec=self.network_spec, kwargs=dict(summary_labels=self.summary_labels)) # Distributions self.distributions = dict() for name, action in self.actions_spec.items(): with tf.name_scope(name=(name + '-distribution')): if self.distributions_spec is not None and name in self.distributions_spec: kwargs = dict(action) kwargs['summary_labels'] = self.summary_labels self.distributions[name] = Distribution.from_spec( spec=self.distributions_spec[name], kwargs=kwargs) elif action['type'] == 'bool': self.distributions[name] = Bernoulli( shape=action['shape'], summary_labels=self.summary_labels) elif action['type'] == 'int': self.distributions[name] = Categorical( shape=action['shape'], num_actions=action['num_actions'], summary_labels=self.summary_labels) elif action['type'] == 'float': if 'min_value' in action: self.distributions[name] = Beta( shape=action['shape'], min_value=action['min_value'], max_value=action['max_value'], summary_labels=self.summary_labels) else: self.distributions[name] = Gaussian( shape=action['shape'], summary_labels=self.summary_labels) # Network internals self.internal_inputs.extend(self.network.internal_inputs()) self.internal_inits.extend(self.network.internal_inits()) # KL divergence function self.fn_kl_divergence = tf.make_template(name_='kl-divergence', func_=self.tf_kl_divergence, custom_getter_=custom_getter)
def __init__(self, network_spec, scope='network-baseline', summary_labels=()): """ Network baseline. Args: network_spec: Network specification dict """ with tf.name_scope(name=scope): self.network = Network.from_spec(spec=network_spec) assert len(self.network.internal_inputs()) == 0 self.linear = Linear(size=1, bias=0.0, scope='prediction') super(NetworkBaseline, self).__init__(scope, summary_labels)
def __init__(self, network, scope='network-baseline', summary_labels=()): """ Network baseline. Args: network_spec: Network specification dict """ self.network = Network.from_spec( spec=network, kwargs=dict(summary_labels=summary_labels)) assert len(self.network.internals_spec()) == 0 self.linear = Linear(size=1, bias=0.0, scope='prediction') super(NetworkBaseline, self).__init__(scope=scope, summary_labels=summary_labels)
def initialize(self, custom_getter): super(DPGTargetModel, self).initialize(custom_getter) # Target network self.target_network = Network.from_spec( spec=self.network_spec, kwargs=dict(scope='target-network', summary_labels=self.summary_labels)) # Target network optimizer self.target_network_optimizer = Synchronization( sync_frequency=self.target_sync_frequency, update_weight=self.target_update_weight) # Target network distributions self.target_distributions = self.create_distributions() # Critic size_t0 = self.critic_network_spec['size_t0'] size_t1 = self.critic_network_spec['size_t1'] self.critic = DDPGCriticNetwork(scope='critic', size_t0=size_t0, size_t1=size_t1) self.critic_optimizer = Optimizer.from_spec( spec=self.critic_optimizer_spec, kwargs=dict(summary_labels=self.summary_labels)) self.target_critic = DDPGCriticNetwork(scope='target-critic', size_t0=size_t0, size_t1=size_t1) # Target critic optimizer self.target_critic_optimizer = Synchronization( sync_frequency=self.target_sync_frequency, update_weight=self.target_update_weight) self.fn_target_actions_and_internals = tf.make_template( name_='target-actions-and-internals', func_=self.tf_target_actions_and_internals, custom_getter_=custom_getter) self.fn_predict_target_q = tf.make_template( name_='predict-target-q', func_=self.tf_predict_target_q, custom_getter_=custom_getter)
def initialize(self, custom_getter): # Network self.network = Network.from_spec( spec=self.network_spec, kwargs=dict(summary_labels=self.summary_labels)) # Before super-call since internals_spec attribute is required subsequently. assert len(self.internals_spec) == 0 self.internals_spec = self.network.internals_spec() super(DistributionModel, self).initialize(custom_getter) # Distributions self.distributions = self.create_distributions() # KL divergence function self.fn_kl_divergence = tf.make_template(name_='kl-divergence', func_=self.tf_kl_divergence, custom_getter_=custom_getter)
def initialize(self, custom_getter): super(DistributionModel, self).initialize(custom_getter) # Network self.network = Network.from_spec( spec=self.network_spec, kwargs=dict(summary_labels=self.summary_labels)) # Distributions self.distributions = self.create_distributions() # Network internals self.internals_input.extend(self.network.internals_input()) self.internals_init.extend(self.network.internals_init()) # KL divergence function self.fn_kl_divergence = tf.make_template(name_=(self.scope + '/kl-divergence'), func_=self.tf_kl_divergence, custom_getter_=custom_getter)
def __init__(self, states_spec, actions_spec, network_spec, config): with tf.name_scope(name=config.scope): # Target network self.target_network = Network.from_spec( spec=network_spec, kwargs=dict(scope='target')) # Target network optimizer self.target_optimizer = Synchronization( sync_frequency=config.target_sync_frequency, update_weight=config.target_update_weight) self.double_q_model = config.double_q_model assert config.huber_loss is None or config.huber_loss > 0.0 self.huber_loss = config.huber_loss super(QModel, self).__init__(states_spec=states_spec, actions_spec=actions_spec, network_spec=network_spec, config=config)
def setup_components_and_tf_funcs(self, custom_getter=None): """ Creates and stores Network and Distribution objects. Generates and stores all template functions. """ # Create network before super-call, since non-empty internals_spec attribute (for RNN) is required subsequently. self.network = Network.from_spec( spec=self.network_spec, kwargs=dict(summary_labels=self.summary_labels)) # Now that we have the network component: We can create the internals placeholders. assert len(self.internals_spec) == 0 self.internals_spec = self.network.internals_spec() for name in sorted(self.internals_spec): internal = self.internals_spec[name] self.internals_input[name] = tf.placeholder( dtype=util.tf_dtype(internal['type']), shape=(None, ) + tuple(internal['shape']), name=('internal-' + name)) if internal['initialization'] == 'zeros': self.internals_init[name] = np.zeros(shape=internal['shape']) else: raise TensorForceError( "Invalid internal initialization value.") # And only then call super. custom_getter = super( DistributionModel, self).setup_components_and_tf_funcs(custom_getter) # Distributions self.distributions = self.create_distributions() # KL divergence function self.fn_kl_divergence = tf.make_template(name_='kl-divergence', func_=self.tf_kl_divergence, custom_getter_=custom_getter) return custom_getter
def initialize(self, custom_getter): super(QModel, self).initialize(custom_getter) # # TEMP: Random sampling fix # if self.random_sampling_fix: # self.next_states_input = dict() # for name, state in self.states_spec.items(): # self.next_states_input[name] = tf.placeholder( # dtype=util.tf_dtype(state['type']), # shape=(None,) + tuple(state['shape']), # name=('next-' + name) # ) # Target network self.target_network = Network.from_spec( spec=self.target_network_spec, kwargs=dict(scope='target', summary_labels=self.summary_labels)) # Target network optimizer self.target_optimizer = Optimizer.from_spec( spec=self.target_optimizer_spec) # Target network distributions self.target_distributions = self.create_distributions()
def initialize(self, custom_getter): super(DeterministicESModel, self).initialize(custom_getter) # Network self.network = Network.from_spec( spec=self.network_spec, kwargs=dict(summary_labels=self.summary_labels)) # Network internals self.internals_input.extend(self.network.internals_input()) self.internals_init.extend(self.network.internals_init()) # Seed collection = self.graph.get_collection(name='noise_seed') if len(collection) == 0: self.seed = tf.get_variable( name='noise_seed', shape=(self.num_workers, self.vec_len), dtype=util.tf_dtype('int'), initializer=tf.zeros_initializer(dtype=util.tf_dtype('int'))) self.graph.add_to_collection(name='noise_seed', value=self.seed) else: assert len(collection) == 1 self.seed = collection[0] # Score collection = self.graph.get_collection(name='evolution_score') if len(collection) == 0: self.score = tf.get_variable( name='evolution_score', shape=(self.num_workers, 2 * self.vec_len), dtype=util.tf_dtype('float'), initializer=tf.zeros_initializer(dtype=util.tf_dtype('float'))) self.graph.add_to_collection(name='evolution_score', value=self.score) else: assert len(collection) == 1 self.score = collection[0] # Evaluation score collection = self.graph.get_collection(name='evaluation_score') if len(collection) == 0: self.eval_score = tf.get_variable( name='evaluation_score', shape=(self.num_workers, self.eval_len), dtype=util.tf_dtype('float'), initializer=tf.zeros_initializer(dtype=util.tf_dtype('float'))) self.graph.add_to_collection(name='evaluation_score', value=self.eval_score) else: assert len(collection) == 1 self.eval_score = collection[0] self.doors = {} self.locks = {} self.lock_collection = self.graph.get_collection(name="sync_var") if len(self.lock_collection) == 0: with tf.variable_scope('sync_var'): for i in range(1, self.num_workers): self.doors[i] = tf.get_variable(name="sync_point_%d" % i, dtype=util.tf_dtype('int'), initializer=tf.constant( 0, dtype=tf.int32)) self.locks[i] = tf.get_variable(name="lock_flag_%d" % i, dtype=util.tf_dtype('int'), initializer=tf.constant( 1, dtype=tf.int32)) self.graph.add_to_collection(name='sync_var', value=self.doors[i]) self.graph.add_to_collection(name='sync_var', value=self.locks[i]) else: assert len(self.lock_collection) == 2 * (self.num_workers - 1) for i in range(1, self.num_workers): self.doors[i] = self.lock_collection[2 * (i - 1)] self.locks[i] = self.lock_collection[2 * (i - 1) + 1] self.lock_collection = self.graph.get_collection(name="sync_var") # Seed and score placeholder self.seed_ph = tf.placeholder(dtype=tf.int32, shape=(self.vec_len, ), name='seed_ph') self.score_ph = tf.placeholder(dtype=tf.float32, shape=(2 * self.vec_len, ), name='score_ph') self.eval_score_ph = tf.placeholder(dtype=tf.float32, shape=(self.eval_len, ), name='evaluation_score_ph')