def initialize(self, custom_getter): super(PGModel, self).initialize(custom_getter) # Baseline if self.baseline_spec is None: assert self.baseline_mode is None elif all(name in self.states_spec for name in self.baseline_spec): # Implies AggregatedBaseline. assert self.baseline_mode == 'states' self.baseline = AggregatedBaseline(baselines=self.baseline_spec) else: assert self.baseline_mode is not None self.baseline = Baseline.from_spec( spec=self.baseline_spec, kwargs=dict(summary_labels=self.summary_labels)) # Baseline optimizer if self.baseline_optimizer_spec is not None: assert self.baseline_mode is not None self.baseline_optimizer = Optimizer.from_spec( spec=self.baseline_optimizer_spec) # TODO: Baseline internal states !!! (see target_network q_model) # Reward estimation self.fn_reward_estimation = tf.make_template( name_='reward-estimation', func_=self.tf_reward_estimation, custom_getter_=custom_getter) # Baseline loss self.fn_baseline_loss = tf.make_template(name_='baseline-loss', func_=self.tf_baseline_loss, custom_getter_=custom_getter)
def __init__(self, optimizer): """ Creates a new meta optimizer instance. Args: optimizer: The optimizer which is modified by this meta optimizer. """ super(MetaOptimizer, self).__init__() self.optimizer = Optimizer.from_spec(spec=optimizer)
def initialize(self, custom_getter): super(MemoryModel, self).initialize(custom_getter) # Memory self.memory = Memory.from_spec( spec=self.memory_spec, kwargs=dict( states=self.states_spec, internals=self.internals_spec, actions=self.actions_spec, summary_labels=self.summary_labels ) ) # Optimizer self.optimizer = Optimizer.from_spec( spec=self.optimizer_spec, kwargs=dict(summary_labels=self.summary_labels) ) # TensorFlow functions self.fn_discounted_cumulative_reward = tf.make_template( name_='discounted-cumulative-reward', func_=self.tf_discounted_cumulative_reward, custom_getter_=custom_getter ) self.fn_reference = tf.make_template( name_='reference', func_=self.tf_reference, custom_getter_=custom_getter ) self.fn_loss_per_instance = tf.make_template( name_='loss-per-instance', func_=self.tf_loss_per_instance, custom_getter_=custom_getter ) self.fn_regularization_losses = tf.make_template( name_='regularization-losses', func_=self.tf_regularization_losses, custom_getter_=custom_getter ) self.fn_loss = tf.make_template( name_='loss', func_=self.tf_loss, custom_getter_=custom_getter ) self.fn_optimization = tf.make_template( name_='optimization', func_=self.tf_optimization, custom_getter_=custom_getter ) self.fn_import_experience = tf.make_template( name_='import-experience', func_=self.tf_import_experience, custom_getter_=custom_getter )
def __init__(self, optimizer, scope='meta-optimizer', summary_labels=(), **kwargs): """ Creates a new meta optimizer instance. Args: optimizer: The optimizer which is modified by this meta optimizer. """ self.optimizer = Optimizer.from_spec(spec=optimizer, kwargs=kwargs) super(MetaOptimizer, self).__init__(scope=scope, summary_labels=summary_labels)
def setup_components_and_tf_funcs(self, custom_getter=None): custom_getter = super( DPGTargetModel, self).setup_components_and_tf_funcs(custom_getter) # Target network self.target_network = Network.from_spec( spec=self.network_spec, kwargs=dict(scope='target-network', summary_labels=self.summary_labels)) # Target network optimizer self.target_network_optimizer = Synchronization( sync_frequency=self.target_sync_frequency, update_weight=self.target_update_weight) # Target network distributions self.target_distributions = self.create_distributions() # Critic #print ("type of self.critic_network_spec[]:") #print (type(self.critic_network_spec)) #for element in self.critic_network_spec: # print (element) # oliver: CHANGES HERE!! size_t0 = self.critic_network_spec[0]['size'] size_t1 = self.critic_network_spec[1]['size'] self.critic = DDPGCriticNetwork(scope='critic', size_t0=size_t0, size_t1=size_t1) self.critic_optimizer = Optimizer.from_spec( spec=self.critic_optimizer_spec, kwargs=dict(summary_labels=self.summary_labels)) self.target_critic = DDPGCriticNetwork(scope='target-critic', size_t0=size_t0, size_t1=size_t1) # Target critic optimizer self.target_critic_optimizer = Synchronization( sync_frequency=self.target_sync_frequency, update_weight=self.target_update_weight) self.fn_target_actions_and_internals = tf.make_template( name_='target-actions-and-internals', func_=self.tf_target_actions_and_internals, custom_getter_=custom_getter) self.fn_predict_target_q = tf.make_template( name_='predict-target-q', func_=self.tf_predict_target_q, custom_getter_=custom_getter) return custom_getter
def create_tf_operations(self, config): """ Creates generic TensorFlow operations and placeholders required for models. Args: config: Model configuration which must contain entries for states and actions. Returns: """ self.action_taken = dict() self.internal_inputs = list() self.internal_outputs = list() self.internal_inits = list() # Placeholders with tf.variable_scope('placeholder'): # States self.state = dict() for name, state in config.states.items(): self.state[name] = tf.placeholder(dtype=util.tf_dtype(state.type), shape=(None,) + tuple(state.shape), name=name) # Actions self.action = dict() self.discrete_actions = [] self.continuous_actions = [] for name, action in config.actions: if action.continuous: if not self.__class__.allows_continuous_actions: raise TensorForceError("Error: Model does not support continuous actions.") self.action[name] = tf.placeholder(dtype=util.tf_dtype('float'), shape=(None,) + tuple(action.shape), name=name) else: if not self.__class__.allows_discrete_actions: raise TensorForceError("Error: Model does not support discrete actions.") self.action[name] = tf.placeholder(dtype=util.tf_dtype('int'), shape=(None,) + tuple(action.shape), name=name) # Reward & terminal self.reward = tf.placeholder(dtype=tf.float32, shape=(None,), name='reward') self.terminal = tf.placeholder(dtype=tf.bool, shape=(None,), name='terminal') # Deterministic action flag self.deterministic = tf.placeholder(dtype=tf.bool, shape=(), name='deterministic') # Optimizer if config.optimizer is not None: with tf.variable_scope('optimization'): self.optimizer = Optimizer.from_config( config=config.optimizer, kwargs=dict(learning_rate=config.learning_rate) ) else: self.optimizer = None
def setup_components_and_tf_funcs(self, custom_getter=None): custom_getter = super(DPGTargetModel, self).setup_components_and_tf_funcs(custom_getter) # Target network self.target_network = Network.from_spec( spec=self.network_spec, kwargs=dict(scope='target-network', summary_labels=self.summary_labels) ) # Target network optimizer self.target_network_optimizer = Synchronization( sync_frequency=self.target_sync_frequency, update_weight=self.target_update_weight ) # Target network distributions self.target_distributions = self.create_distributions() # critic self.critic_network = Network.from_spec( spec=self.critic_network_spec, kwargs=dict(scope='critic') ) self.target_critic_network = Network.from_spec( spec=self.critic_network_spec, kwargs=dict(scope='target-critic') ) self.critic_optimizer = Optimizer.from_spec( spec=self.critic_optimizer_spec, kwargs=dict(summary_labels=self.summary_labels) ) # Target critic optimizer self.target_critic_optimizer = Synchronization( sync_frequency=self.target_sync_frequency, update_weight=self.target_update_weight ) self.fn_target_actions_and_internals = tf.make_template( name_='target-actions-and-internals', func_=self.tf_target_actions_and_internals, custom_getter_=custom_getter ) self.fn_predict_target_q = tf.make_template( name_='predict-target-q', func_=self.tf_predict_target_q, custom_getter_=custom_getter ) return custom_getter
def setup_components_and_tf_funcs(self, custom_getter=None): """ Constructs the memory and the optimizer objects. Generates and stores all template functions. """ custom_getter = super( MemoryModel, self).setup_components_and_tf_funcs(custom_getter) # Memory self.memory = Memory.from_spec(spec=self.memory_spec, kwargs=dict( states=self.states_spec, internals=self.internals_spec, actions=self.actions_spec, summary_labels=self.summary_labels)) # Optimizer self.optimizer = Optimizer.from_spec( spec=self.optimizer_spec, kwargs=dict(summary_labels=self.summary_labels)) # TensorFlow functions self.fn_discounted_cumulative_reward = tf.make_template( name_='discounted-cumulative-reward', func_=self.tf_discounted_cumulative_reward, custom_getter_=custom_getter) self.fn_reference = tf.make_template(name_='reference', func_=self.tf_reference, custom_getter_=custom_getter) self.fn_loss_per_instance = tf.make_template( name_='loss-per-instance', func_=self.tf_loss_per_instance, custom_getter_=custom_getter) self.fn_regularization_losses = tf.make_template( name_='regularization-losses', func_=self.tf_regularization_losses, custom_getter_=custom_getter) self.fn_loss = tf.make_template(name_='loss', func_=self.tf_loss, custom_getter_=custom_getter) self.fn_optimization = tf.make_template(name_='optimization', func_=self.tf_optimization, custom_getter_=custom_getter) self.fn_import_experience = tf.make_template( name_='import-experience', func_=self.tf_import_experience, custom_getter_=custom_getter) return custom_getter
def initialize(self, custom_getter): super(DPGTargetModel, self).initialize(custom_getter) # Target network self.target_network = Network.from_spec( spec=self.network_spec, kwargs=dict(scope='target-network', summary_labels=self.summary_labels)) # Target network optimizer self.target_network_optimizer = Synchronization( sync_frequency=self.target_sync_frequency, update_weight=self.target_update_weight) # Target network distributions self.target_distributions = self.create_distributions() # Critic size_t0 = self.critic_network_spec['size_t0'] size_t1 = self.critic_network_spec['size_t1'] self.critic = DDPGCriticNetwork(scope='critic', size_t0=size_t0, size_t1=size_t1) self.critic_optimizer = Optimizer.from_spec( spec=self.critic_optimizer_spec, kwargs=dict(summary_labels=self.summary_labels)) self.target_critic = DDPGCriticNetwork(scope='target-critic', size_t0=size_t0, size_t1=size_t1) # Target critic optimizer self.target_critic_optimizer = Synchronization( sync_frequency=self.target_sync_frequency, update_weight=self.target_update_weight) self.fn_target_actions_and_internals = tf.make_template( name_='target-actions-and-internals', func_=self.tf_target_actions_and_internals, custom_getter_=custom_getter) self.fn_predict_target_q = tf.make_template( name_='predict-target-q', func_=self.tf_predict_target_q, custom_getter_=custom_getter)
def __init__(self, states_spec, actions_spec, network_spec, config): # Baseline mode assert config.baseline_mode is None or config.baseline_mode in ( 'states', 'network') self.baseline_mode = config.baseline_mode with tf.name_scope(name=config.scope): # Baseline if config.baseline is None: assert self.baseline_mode is None self.baseline = None elif all(name in states_spec for name in config.baseline): # Implies AggregatedBaseline assert self.baseline_mode == 'states' self.baseline = AggregatedBaseline(baselines=config.baseline) else: assert self.baseline_mode is not None self.baseline = Baseline.from_spec( spec=config.baseline, kwargs=dict(summary_labels=config.summary_labels)) # Baseline optimizer if config.baseline_optimizer is None: self.baseline_optimizer = None else: assert self.baseline_mode is not None self.baseline_optimizer = Optimizer.from_spec( spec=config.baseline_optimizer) # Generalized advantage function assert config.gae_lambda is None or ( 0.0 <= config.gae_lambda <= 1.0 and self.baseline_mode is not None) self.gae_lambda = config.gae_lambda super(PGModel, self).__init__(states_spec=states_spec, actions_spec=actions_spec, network_spec=network_spec, config=config)
def initialize(self, custom_getter): super(PGModel, self).initialize(custom_getter) # Baseline if self.baseline is None: assert self.baseline_mode is None self.baseline = None elif all(name in self.states_spec for name in self.baseline): # Implies AggregatedBaseline. assert self.baseline_mode == 'states' self.baseline = AggregatedBaseline(baselines=self.baseline) else: assert self.baseline_mode is not None self.baseline = Baseline.from_spec( spec=self.baseline, kwargs=dict(summary_labels=self.summary_labels)) # Baseline optimizer if self.baseline_optimizer is None: self.baseline_optimizer = None else: assert self.baseline_mode is not None self.baseline_optimizer = Optimizer.from_spec( spec=self.baseline_optimizer) # TODO: Baseline internal states !!! (see target_network q_model) # Reward estimation self.fn_reward_estimation = tf.make_template( name_=(self.scope + '/reward-estimation'), func_=self.tf_reward_estimation, custom_getter_=custom_getter) # PG loss per instance function self.fn_pg_loss_per_instance = tf.make_template( name_=(self.scope + '/pg-loss-per-instance'), func_=self.tf_pg_loss_per_instance, custom_getter_=custom_getter)
def initialize(self, custom_getter): super(QModel, self).initialize(custom_getter) # # TEMP: Random sampling fix # if self.random_sampling_fix: # self.next_states_input = dict() # for name, state in self.states_spec.items(): # self.next_states_input[name] = tf.placeholder( # dtype=util.tf_dtype(state['type']), # shape=(None,) + tuple(state['shape']), # name=('next-' + name) # ) # Target network self.target_network = Network.from_spec( spec=self.target_network_spec, kwargs=dict(scope='target', summary_labels=self.summary_labels)) # Target network optimizer self.target_optimizer = Optimizer.from_spec( spec=self.target_optimizer_spec) # Target network distributions self.target_distributions = self.create_distributions()
def setup(self): """ Sets up the TensorFlow model graph and initializes the TensorFlow session. """ default_graph = None if self.distributed_spec is None: self.global_model = None self.graph = tf.Graph() default_graph = self.graph.as_default() default_graph.__enter__() elif self.distributed_spec.get('parameter_server'): if self.distributed_spec.get('replica_model'): raise TensorForceError( "Invalid config value for distributed mode.") self.global_model = None self.graph = tf.Graph() default_graph = self.graph.as_default() default_graph.__enter__() elif self.distributed_spec.get('replica_model'): self.device = tf.train.replica_device_setter( worker_device=self.device, cluster=self.distributed_spec['cluster_spec']) self.global_model = None # Replica model is part of its parent model's graph, hence no new graph here. self.graph = tf.get_default_graph() else: graph = tf.Graph() default_graph = graph.as_default() default_graph.__enter__() # Global model. self.global_model = deepcopy(self) self.global_model.distributed_spec['replica_model'] = True self.global_model.setup() self.graph = graph with tf.device(device_name_or_function=self.device): # Episode collection = self.graph.get_collection(name='episode') if len(collection) == 0: self.episode = tf.get_variable(name='episode', dtype=tf.int32, initializer=0, trainable=False) self.graph.add_to_collection(name='episode', value=self.episode) else: assert len(collection) == 1 self.episode = collection[0] # Timestep collection = self.graph.get_collection(name='timestep') if len(collection) == 0: self.timestep = tf.get_variable(name='timestep', dtype=tf.int32, initializer=0, trainable=False) self.graph.add_to_collection(name='timestep', value=self.timestep) self.graph.add_to_collection(name=tf.GraphKeys.GLOBAL_STEP, value=self.timestep) else: assert len(collection) == 1 self.timestep = collection[0] # Variables and summaries self.variables = dict() self.all_variables = dict() self.registered_variables = set() self.summaries = list() def custom_getter(getter, name, registered=False, second=False, **kwargs): if registered: self.registered_variables.add(name) elif name in self.registered_variables: registered = True variable = getter(name=name, **kwargs) # Top-level, hence no 'registered' if not registered: self.all_variables[name] = variable if kwargs.get( 'trainable', True) and not name.startswith('optimization'): self.variables[name] = variable if 'variables' in self.summary_labels: summary = tf.summary.histogram(name=name, values=variable) self.summaries.append(summary) return variable # Create placeholders, tf functions, internals, etc self.initialize(custom_getter=custom_getter) # Input tensors states = self.get_states(states=self.state_inputs) internals = [ tf.identity(input=internal) for internal in self.internal_inputs ] actions = self.get_actions(actions=self.action_inputs) terminal = tf.identity(input=self.terminal_input) reward = self.get_reward(states=states, internals=internals, terminal=terminal, reward=self.reward_input) # Stop gradients for input preprocessing states = { name: tf.stop_gradient(input=state) for name, state in states.items() } actions = { name: tf.stop_gradient(input=action) for name, action in actions.items() } reward = tf.stop_gradient(input=reward) # Optimizer if self.optimizer is None: pass elif self.distributed_spec is not None and \ not self.distributed_spec.get('parameter_server') and \ not self.distributed_spec.get('replica_model'): # If not internal global model self.optimizer = GlobalOptimizer(optimizer=self.optimizer) else: self.optimizer = Optimizer.from_spec(spec=self.optimizer) # Create output fetch operations self.create_output_operations( states=states, internals=internals, actions=actions, terminal=terminal, reward=reward, update=self.update_input, deterministic=self.deterministic_input) if 'inputs' in self.summary_labels: for name, state in states.items(): summary = tf.summary.histogram( name=(self.scope + '/inputs/states/' + name), values=state) self.summaries.append(summary) for name, action in actions.items(): summary = tf.summary.histogram( name=(self.scope + '/inputs/actions/' + name), values=action) self.summaries.append(summary) summary = tf.summary.histogram(name=(self.scope + '/inputs/reward'), values=reward) self.summaries.append(summary) if self.distributed_spec is not None: if self.distributed_spec.get('replica_model'): # If internal global model return elif self.distributed_spec.get('parameter_server'): server = tf.train.Server( server_or_cluster_def=self. distributed_spec['cluster_spec'], job_name='ps', task_index=self.distributed_spec['task_index'], protocol=self.distributed_spec.get('protocol'), config=None, start=True) # Param server does nothing actively server.join() return # Global and local variables initialize operations if self.distributed_spec is None: global_variables = self.get_variables(include_non_trainable=True) init_op = tf.variables_initializer(var_list=global_variables) ready_op = tf.report_uninitialized_variables( var_list=global_variables) ready_for_local_init_op = None local_init_op = None else: global_variables = self.global_model.get_variables( include_non_trainable=True) local_variables = self.get_variables(include_non_trainable=True) init_op = tf.variables_initializer(var_list=global_variables) ready_op = tf.report_uninitialized_variables( var_list=(global_variables + local_variables)) ready_for_local_init_op = tf.report_uninitialized_variables( var_list=global_variables) local_init_op = tf.group(*(local_var.assign(value=global_var) for local_var, global_var in zip( local_variables, global_variables))) def init_fn(scaffold, session): if self.saver_spec is not None and self.saver_spec.get( 'load', True): directory = self.saver_spec['directory'] file = self.saver_spec.get('file') if file is None: file = tf.train.latest_checkpoint( checkpoint_dir=directory, latest_filename= None # Corresponds to argument of saver.save() in Model.save(). ) elif not os.path.isfile(file): file = os.path.join(directory, file) if file is not None: scaffold.saver.restore(sess=session, save_path=file) # Summary operation summaries = self.get_summaries() if len(summaries) > 0: summary_op = tf.summary.merge(inputs=summaries) else: summary_op = None # TensorFlow saver object saver = tf.train.Saver( var_list=global_variables, # should be given? reshape=False, sharded=False, # should be true? max_to_keep=5, keep_checkpoint_every_n_hours=10000.0, name=None, restore_sequentially=False, saver_def=None, builder=None, defer_build=False, allow_empty=True, write_version=tf.train.SaverDef.V2, pad_step_number=False, save_relative_paths=True #filename=None ) # TensorFlow scaffold object self.scaffold = tf.train.Scaffold( init_op=init_op, init_feed_dict=None, init_fn=init_fn, ready_op=ready_op, ready_for_local_init_op=ready_for_local_init_op, local_init_op=local_init_op, summary_op=summary_op, saver=saver, copy_from_scaffold=None) hooks = list() # Checkpoint saver hook if self.saver_spec is not None and ( self.distributed_spec is None or self.distributed_spec['task_index'] == 0): self.saver_directory = self.saver_spec['directory'] hooks.append( tf.train.CheckpointSaverHook( checkpoint_dir=self.saver_directory, save_secs=self.saver_spec.get( 'seconds', None if 'steps' in self.saver_spec else 600), save_steps=self.saver_spec.get( 'steps'), # Either one or the other has to be set. saver=None, # None since given via 'scaffold' argument. checkpoint_basename=self.saver_spec.get( 'basename', 'model.ckpt'), scaffold=self.scaffold, listeners=None)) else: self.saver_directory = None # Summary saver hook if self.summary_spec is None: self.summary_writer_hook = None else: # TensorFlow summary writer object summary_writer = tf.summary.FileWriter( logdir=self.summary_spec['directory'], graph=self.graph, max_queue=10, flush_secs=120, filename_suffix=None) self.summary_writer_hook = util.UpdateSummarySaverHook( update_input=self.update_input, save_steps=self.summary_spec.get( 'steps'), # Either one or the other has to be set. save_secs=self.summary_spec.get( 'seconds', None if 'steps' in self.summary_spec else 120), output_dir= None, # None since given via 'summary_writer' argument. summary_writer=summary_writer, scaffold=self.scaffold, summary_op=None # None since given via 'scaffold' argument. ) hooks.append(self.summary_writer_hook) # Stop at step hook # hooks.append(tf.train.StopAtStepHook( # num_steps=???, # This makes more sense, if load and continue training. # last_step=None # Either one or the other has to be set. # )) # # Step counter hook # hooks.append(tf.train.StepCounterHook( # every_n_steps=counter_config.get('steps', 100), # Either one or the other has to be set. # every_n_secs=counter_config.get('secs'), # Either one or the other has to be set. # output_dir=None, # None since given via 'summary_writer' argument. # summary_writer=summary_writer # )) # Other available hooks: # tf.train.FinalOpsHook(final_ops, final_ops_feed_dict=None) # tf.train.GlobalStepWaiterHook(wait_until_step) # tf.train.LoggingTensorHook(tensors, every_n_iter=None, every_n_secs=None) # tf.train.NanTensorHook(loss_tensor, fail_on_nan_loss=True) # tf.train.ProfilerHook(save_steps=None, save_secs=None, output_dir='', show_dataflow=True, show_memory=False) if self.distributed_spec is None: # TensorFlow non-distributed monitored session object self.monitored_session = tf.train.SingularMonitoredSession( hooks=hooks, scaffold=self.scaffold, master='', # Default value. config=self.session_config, # always the same? checkpoint_dir=None) else: server = tf.train.Server( server_or_cluster_def=self.distributed_spec['cluster_spec'], job_name='worker', task_index=self.distributed_spec['task_index'], protocol=self.distributed_spec.get('protocol'), config=self.session_config, start=True) if self.distributed_spec['task_index'] == 0: # TensorFlow chief session creator object session_creator = tf.train.ChiefSessionCreator( scaffold=self.scaffold, master=server.target, config=self.session_config, checkpoint_dir=None, checkpoint_filename_with_path=None) else: # TensorFlow worker session creator object session_creator = tf.train.WorkerSessionCreator( scaffold=self.scaffold, master=server.target, config=self.session_config, ) # TensorFlow monitored session object self.monitored_session = tf.train.MonitoredSession( session_creator=session_creator, hooks=hooks, stop_grace_period_secs=120 # Default value. ) if default_graph: default_graph.__exit__(None, None, None) self.graph.finalize() self.monitored_session.__enter__() self.session = self.monitored_session._tf_sess()
def __init__(self, states_spec, actions_spec, config, **kwargs): # States and actions specifications self.states_spec = states_spec self.actions_spec = actions_spec # Discount factor self.discount = config.discount # Reward normalization assert isinstance(config.normalize_rewards, bool) self.normalize_rewards = config.normalize_rewards # Variable noise assert config.variable_noise is None or config.variable_noise > 0.0 self.variable_noise = config.variable_noise # TensorFlow summaries self.summary_labels = set(config.summary_labels or ()) # Variables and summaries self.variables = dict() self.all_variables = dict() self.summaries = list() if not config.local_model or not config.replica_model: # If not local_model mode or not internal global model self.default_graph = tf.Graph().as_default() self.graph = self.default_graph.__enter__() if config.cluster_spec is None: if config.parameter_server or config.replica_model or config.local_model: raise TensorForceError( "Invalid config value for distributed mode.") self.device = config.device self.global_model = None elif config.parameter_server: if config.replica_model or config.local_model: raise TensorForceError( "Invalid config value for distributed mode.") self.device = config.device self.global_model = None elif config.replica_model: self.device = tf.train.replica_device_setter( worker_device=config.device, cluster=config.cluster_spec) self.global_model = None elif config.local_model: if config.replica_model: raise TensorForceError( "Invalid config value for distributed mode.") self.device = config.device global_config = config.copy() global_config.set(key='replica_model', value=True) self.global_model = self.__class__(states_spec=states_spec, actions_spec=actions_spec, config=global_config, **kwargs) else: raise TensorForceError( "Invalid config value for distributed mode.") with tf.device(device_name_or_function=self.device): # Timestep and episode # TODO: various modes !!! if self.global_model is None: # TODO: Variables seem to re-initialize in the beginning every time a runner starts self.timestep = tf.get_variable(name='timestep', dtype=tf.int32, initializer=0, trainable=False) self.episode = tf.get_variable(name='episode', dtype=tf.int32, initializer=0, trainable=False) else: self.timestep = self.global_model.timestep self.episode = self.global_model.episode with tf.name_scope(name=config.scope): def custom_getter(getter, name, registered=False, **kwargs): variable = getter( name=name, **kwargs) # Top-level, hence no 'registered' if not registered: self.all_variables[name] = variable if kwargs.get( 'trainable', True) and not name.startswith('optimization'): self.variables[name] = variable if 'variables' in self.summary_labels: summary = tf.summary.histogram(name=name, values=variable) self.summaries.append(summary) return variable # Create placeholders, tf functions, internals, etc self.initialize(custom_getter=custom_getter) # Input tensors states = self.get_states(states=self.state_inputs) internals = [ tf.identity(input=internal) for internal in self.internal_inputs ] actions = self.get_actions(actions=self.action_inputs) terminal = tf.identity(input=self.terminal_input) reward = self.get_reward(states=states, internals=internals, terminal=terminal, reward=self.reward_input) # Stop gradients for input preprocessing states = { name: tf.stop_gradient(input=state) for name, state in states.items() } actions = { name: tf.stop_gradient(input=action) for name, action in actions.items() } reward = tf.stop_gradient(input=reward) # Optimizer if config.optimizer is None: self.optimizer = None elif config.local_model and not config.replica_model: # If local_model mode and not internal global model self.optimizer = GlobalOptimizer( optimizer=config.optimizer) else: self.optimizer = Optimizer.from_spec(spec=config.optimizer) # Create output fetch operations self.create_output_operations(states=states, internals=internals, actions=actions, terminal=terminal, reward=reward, deterministic=self.deterministic) if config.local_model and config.replica_model: # If local_model mode and internal global model return # Local and global initialize operations if config.local_model: init_op = tf.variables_initializer( var_list=self.global_model.get_variables( include_non_trainable=True)) local_init_op = tf.variables_initializer( var_list=self.get_variables(include_non_trainable=True)) else: init_op = tf.variables_initializer(var_list=self.get_variables( include_non_trainable=True)) local_init_op = None # Summary operation if len(self.get_summaries()) > 0: summary_op = tf.summary.merge(inputs=self.get_summaries()) else: summary_op = None # TODO: MonitoredSession or so? self.supervisor = tf.train.Supervisor( is_chief=(config.task_index == 0), init_op=init_op, local_init_op=local_init_op, logdir=config.model_directory, summary_op=summary_op, global_step=self.timestep, save_summaries_secs=config.summary_frequency, save_model_secs=config.save_frequency # checkpoint_basename='model.ckpt' # session_manager=None ) # tf.ConfigProto(device_filters=['/job:ps', '/job:worker/task:{}/cpu:0'.format(self.task_index)]) if config.parameter_server: self.server = tf.train.Server( server_or_cluster_def=config.cluster_spec, job_name='ps', task_index=config.task_index, # config=tf.ConfigProto(device_filters=["/job:ps"]) # config=tf.ConfigProto( # inter_op_parallelism_threads=2, # log_device_placement=True # ) ) # Param server does nothing actively self.server.join() elif config.cluster_spec is not None: self.server = tf.train.Server( server_or_cluster_def=config.cluster_spec, job_name='worker', task_index=config.task_index, # config=tf.ConfigProto(device_filters=["/job:ps"]) # config=tf.ConfigProto( # inter_op_parallelism_threads=2, # log_device_placement=True # ) ) self.managed_session = self.supervisor.managed_session( master=self.server.target, start_standard_services=(config.model_directory is not None)) self.session = self.managed_session.__enter__() else: self.managed_session = self.supervisor.managed_session( start_standard_services=(config.model_directory is not None)) self.session = self.managed_session.__enter__()