Ejemplo n.º 1
0
    def initialize(self, custom_getter):
        super(PGModel, self).initialize(custom_getter)

        # Baseline
        if self.baseline_spec is None:
            assert self.baseline_mode is None

        elif all(name in self.states_spec for name in self.baseline_spec):
            # Implies AggregatedBaseline.
            assert self.baseline_mode == 'states'
            self.baseline = AggregatedBaseline(baselines=self.baseline_spec)

        else:
            assert self.baseline_mode is not None
            self.baseline = Baseline.from_spec(
                spec=self.baseline_spec,
                kwargs=dict(summary_labels=self.summary_labels))

        # Baseline optimizer
        if self.baseline_optimizer_spec is not None:
            assert self.baseline_mode is not None
            self.baseline_optimizer = Optimizer.from_spec(
                spec=self.baseline_optimizer_spec)

        # TODO: Baseline internal states !!! (see target_network q_model)

        # Reward estimation
        self.fn_reward_estimation = tf.make_template(
            name_='reward-estimation',
            func_=self.tf_reward_estimation,
            custom_getter_=custom_getter)
        # Baseline loss
        self.fn_baseline_loss = tf.make_template(name_='baseline-loss',
                                                 func_=self.tf_baseline_loss,
                                                 custom_getter_=custom_getter)
Ejemplo n.º 2
0
    def __init__(self, optimizer):
        """
        Creates a new meta optimizer instance.

        Args:
            optimizer: The optimizer which is modified by this meta optimizer.
        """
        super(MetaOptimizer, self).__init__()
        self.optimizer = Optimizer.from_spec(spec=optimizer)
Ejemplo n.º 3
0
    def initialize(self, custom_getter):
        super(MemoryModel, self).initialize(custom_getter)

        # Memory
        self.memory = Memory.from_spec(
            spec=self.memory_spec,
            kwargs=dict(
                states=self.states_spec,
                internals=self.internals_spec,
                actions=self.actions_spec,
                summary_labels=self.summary_labels
            )
        )

        # Optimizer
        self.optimizer = Optimizer.from_spec(
            spec=self.optimizer_spec,
            kwargs=dict(summary_labels=self.summary_labels)
        )

        # TensorFlow functions
        self.fn_discounted_cumulative_reward = tf.make_template(
            name_='discounted-cumulative-reward',
            func_=self.tf_discounted_cumulative_reward,
            custom_getter_=custom_getter
        )
        self.fn_reference = tf.make_template(
            name_='reference',
            func_=self.tf_reference,
            custom_getter_=custom_getter
        )
        self.fn_loss_per_instance = tf.make_template(
            name_='loss-per-instance',
            func_=self.tf_loss_per_instance,
            custom_getter_=custom_getter
        )
        self.fn_regularization_losses = tf.make_template(
            name_='regularization-losses',
            func_=self.tf_regularization_losses,
            custom_getter_=custom_getter
        )
        self.fn_loss = tf.make_template(
            name_='loss',
            func_=self.tf_loss,
            custom_getter_=custom_getter
        )
        self.fn_optimization = tf.make_template(
            name_='optimization',
            func_=self.tf_optimization,
            custom_getter_=custom_getter
        )
        self.fn_import_experience = tf.make_template(
            name_='import-experience',
            func_=self.tf_import_experience,
            custom_getter_=custom_getter
        )
Ejemplo n.º 4
0
    def __init__(self, optimizer, scope='meta-optimizer', summary_labels=(), **kwargs):
        """
        Creates a new meta optimizer instance.

        Args:
            optimizer: The optimizer which is modified by this meta optimizer.
        """
        self.optimizer = Optimizer.from_spec(spec=optimizer, kwargs=kwargs)

        super(MetaOptimizer, self).__init__(scope=scope, summary_labels=summary_labels)
Ejemplo n.º 5
0
    def setup_components_and_tf_funcs(self, custom_getter=None):
        custom_getter = super(
            DPGTargetModel, self).setup_components_and_tf_funcs(custom_getter)

        # Target network
        self.target_network = Network.from_spec(
            spec=self.network_spec,
            kwargs=dict(scope='target-network',
                        summary_labels=self.summary_labels))

        # Target network optimizer
        self.target_network_optimizer = Synchronization(
            sync_frequency=self.target_sync_frequency,
            update_weight=self.target_update_weight)

        # Target network distributions
        self.target_distributions = self.create_distributions()

        # Critic
        #print ("type of self.critic_network_spec[]:")
        #print (type(self.critic_network_spec))
        #for element in self.critic_network_spec:
        #    print (element)
        # oliver: CHANGES HERE!!
        size_t0 = self.critic_network_spec[0]['size']
        size_t1 = self.critic_network_spec[1]['size']

        self.critic = DDPGCriticNetwork(scope='critic',
                                        size_t0=size_t0,
                                        size_t1=size_t1)
        self.critic_optimizer = Optimizer.from_spec(
            spec=self.critic_optimizer_spec,
            kwargs=dict(summary_labels=self.summary_labels))

        self.target_critic = DDPGCriticNetwork(scope='target-critic',
                                               size_t0=size_t0,
                                               size_t1=size_t1)

        # Target critic optimizer
        self.target_critic_optimizer = Synchronization(
            sync_frequency=self.target_sync_frequency,
            update_weight=self.target_update_weight)

        self.fn_target_actions_and_internals = tf.make_template(
            name_='target-actions-and-internals',
            func_=self.tf_target_actions_and_internals,
            custom_getter_=custom_getter)

        self.fn_predict_target_q = tf.make_template(
            name_='predict-target-q',
            func_=self.tf_predict_target_q,
            custom_getter_=custom_getter)
        return custom_getter
Ejemplo n.º 6
0
    def setup_components_and_tf_funcs(self, custom_getter=None):
        custom_getter = super(DPGTargetModel, self).setup_components_and_tf_funcs(custom_getter)

        # Target network
        self.target_network = Network.from_spec(
            spec=self.network_spec,
            kwargs=dict(scope='target-network', summary_labels=self.summary_labels)
        )

        # Target network optimizer
        self.target_network_optimizer = Synchronization(
            sync_frequency=self.target_sync_frequency,
            update_weight=self.target_update_weight
        )

        # Target network distributions
        self.target_distributions = self.create_distributions()

        # critic
        self.critic_network = Network.from_spec(
            spec=self.critic_network_spec,
            kwargs=dict(scope='critic')
        )

        self.target_critic_network = Network.from_spec(
            spec=self.critic_network_spec,
            kwargs=dict(scope='target-critic')
        )

        self.critic_optimizer = Optimizer.from_spec(
            spec=self.critic_optimizer_spec,
            kwargs=dict(summary_labels=self.summary_labels)
        )

        # Target critic optimizer
        self.target_critic_optimizer = Synchronization(
            sync_frequency=self.target_sync_frequency,
            update_weight=self.target_update_weight
        )

        self.fn_target_actions_and_internals = tf.make_template(
            name_='target-actions-and-internals',
            func_=self.tf_target_actions_and_internals,
            custom_getter_=custom_getter
        )

        self.fn_predict_target_q = tf.make_template(
            name_='predict-target-q',
            func_=self.tf_predict_target_q,
            custom_getter_=custom_getter
        )
        return custom_getter
Ejemplo n.º 7
0
    def setup_components_and_tf_funcs(self, custom_getter=None):
        """
        Constructs the memory and the optimizer objects.
        Generates and stores all template functions.
        """
        custom_getter = super(
            MemoryModel, self).setup_components_and_tf_funcs(custom_getter)

        # Memory
        self.memory = Memory.from_spec(spec=self.memory_spec,
                                       kwargs=dict(
                                           states=self.states_spec,
                                           internals=self.internals_spec,
                                           actions=self.actions_spec,
                                           summary_labels=self.summary_labels))

        # Optimizer
        self.optimizer = Optimizer.from_spec(
            spec=self.optimizer_spec,
            kwargs=dict(summary_labels=self.summary_labels))

        # TensorFlow functions
        self.fn_discounted_cumulative_reward = tf.make_template(
            name_='discounted-cumulative-reward',
            func_=self.tf_discounted_cumulative_reward,
            custom_getter_=custom_getter)
        self.fn_reference = tf.make_template(name_='reference',
                                             func_=self.tf_reference,
                                             custom_getter_=custom_getter)
        self.fn_loss_per_instance = tf.make_template(
            name_='loss-per-instance',
            func_=self.tf_loss_per_instance,
            custom_getter_=custom_getter)
        self.fn_regularization_losses = tf.make_template(
            name_='regularization-losses',
            func_=self.tf_regularization_losses,
            custom_getter_=custom_getter)
        self.fn_loss = tf.make_template(name_='loss',
                                        func_=self.tf_loss,
                                        custom_getter_=custom_getter)
        self.fn_optimization = tf.make_template(name_='optimization',
                                                func_=self.tf_optimization,
                                                custom_getter_=custom_getter)
        self.fn_import_experience = tf.make_template(
            name_='import-experience',
            func_=self.tf_import_experience,
            custom_getter_=custom_getter)

        return custom_getter
Ejemplo n.º 8
0
    def initialize(self, custom_getter):
        super(DPGTargetModel, self).initialize(custom_getter)

        # Target network
        self.target_network = Network.from_spec(
            spec=self.network_spec,
            kwargs=dict(scope='target-network',
                        summary_labels=self.summary_labels))

        # Target network optimizer
        self.target_network_optimizer = Synchronization(
            sync_frequency=self.target_sync_frequency,
            update_weight=self.target_update_weight)

        # Target network distributions
        self.target_distributions = self.create_distributions()

        # Critic
        size_t0 = self.critic_network_spec['size_t0']
        size_t1 = self.critic_network_spec['size_t1']

        self.critic = DDPGCriticNetwork(scope='critic',
                                        size_t0=size_t0,
                                        size_t1=size_t1)
        self.critic_optimizer = Optimizer.from_spec(
            spec=self.critic_optimizer_spec,
            kwargs=dict(summary_labels=self.summary_labels))

        self.target_critic = DDPGCriticNetwork(scope='target-critic',
                                               size_t0=size_t0,
                                               size_t1=size_t1)

        # Target critic optimizer
        self.target_critic_optimizer = Synchronization(
            sync_frequency=self.target_sync_frequency,
            update_weight=self.target_update_weight)

        self.fn_target_actions_and_internals = tf.make_template(
            name_='target-actions-and-internals',
            func_=self.tf_target_actions_and_internals,
            custom_getter_=custom_getter)

        self.fn_predict_target_q = tf.make_template(
            name_='predict-target-q',
            func_=self.tf_predict_target_q,
            custom_getter_=custom_getter)
Ejemplo n.º 9
0
    def __init__(self, states_spec, actions_spec, network_spec, config):
        # Baseline mode
        assert config.baseline_mode is None or config.baseline_mode in (
            'states', 'network')
        self.baseline_mode = config.baseline_mode

        with tf.name_scope(name=config.scope):
            # Baseline
            if config.baseline is None:
                assert self.baseline_mode is None
                self.baseline = None

            elif all(name in states_spec for name in config.baseline):
                # Implies AggregatedBaseline
                assert self.baseline_mode == 'states'
                self.baseline = AggregatedBaseline(baselines=config.baseline)

            else:
                assert self.baseline_mode is not None
                self.baseline = Baseline.from_spec(
                    spec=config.baseline,
                    kwargs=dict(summary_labels=config.summary_labels))

            # Baseline optimizer
            if config.baseline_optimizer is None:
                self.baseline_optimizer = None
            else:
                assert self.baseline_mode is not None
                self.baseline_optimizer = Optimizer.from_spec(
                    spec=config.baseline_optimizer)

        # Generalized advantage function
        assert config.gae_lambda is None or (
            0.0 <= config.gae_lambda <= 1.0 and self.baseline_mode is not None)
        self.gae_lambda = config.gae_lambda

        super(PGModel, self).__init__(states_spec=states_spec,
                                      actions_spec=actions_spec,
                                      network_spec=network_spec,
                                      config=config)
Ejemplo n.º 10
0
    def initialize(self, custom_getter):
        super(PGModel, self).initialize(custom_getter)

        # Baseline
        if self.baseline is None:
            assert self.baseline_mode is None
            self.baseline = None

        elif all(name in self.states_spec for name in self.baseline):
            # Implies AggregatedBaseline.
            assert self.baseline_mode == 'states'
            self.baseline = AggregatedBaseline(baselines=self.baseline)

        else:
            assert self.baseline_mode is not None
            self.baseline = Baseline.from_spec(
                spec=self.baseline,
                kwargs=dict(summary_labels=self.summary_labels))

        # Baseline optimizer
        if self.baseline_optimizer is None:
            self.baseline_optimizer = None
        else:
            assert self.baseline_mode is not None
            self.baseline_optimizer = Optimizer.from_spec(
                spec=self.baseline_optimizer)

        # TODO: Baseline internal states !!! (see target_network q_model)

        # Reward estimation
        self.fn_reward_estimation = tf.make_template(
            name_=(self.scope + '/reward-estimation'),
            func_=self.tf_reward_estimation,
            custom_getter_=custom_getter)
        # PG loss per instance function
        self.fn_pg_loss_per_instance = tf.make_template(
            name_=(self.scope + '/pg-loss-per-instance'),
            func_=self.tf_pg_loss_per_instance,
            custom_getter_=custom_getter)
Ejemplo n.º 11
0
    def initialize(self, custom_getter):
        super(QModel, self).initialize(custom_getter)

        # # TEMP: Random sampling fix
        # if self.random_sampling_fix:
        #     self.next_states_input = dict()
        #     for name, state in self.states_spec.items():
        #         self.next_states_input[name] = tf.placeholder(
        #             dtype=util.tf_dtype(state['type']),
        #             shape=(None,) + tuple(state['shape']),
        #             name=('next-' + name)
        #         )

        # Target network
        self.target_network = Network.from_spec(
            spec=self.target_network_spec,
            kwargs=dict(scope='target', summary_labels=self.summary_labels))

        # Target network optimizer
        self.target_optimizer = Optimizer.from_spec(
            spec=self.target_optimizer_spec)

        # Target network distributions
        self.target_distributions = self.create_distributions()
Ejemplo n.º 12
0
    def setup(self):
        """
        Sets up the TensorFlow model graph and initializes the TensorFlow session.
        """
        default_graph = None
        if self.distributed_spec is None:
            self.global_model = None
            self.graph = tf.Graph()
            default_graph = self.graph.as_default()
            default_graph.__enter__()

        elif self.distributed_spec.get('parameter_server'):
            if self.distributed_spec.get('replica_model'):
                raise TensorForceError(
                    "Invalid config value for distributed mode.")
            self.global_model = None
            self.graph = tf.Graph()
            default_graph = self.graph.as_default()
            default_graph.__enter__()

        elif self.distributed_spec.get('replica_model'):
            self.device = tf.train.replica_device_setter(
                worker_device=self.device,
                cluster=self.distributed_spec['cluster_spec'])
            self.global_model = None
            # Replica model is part of its parent model's graph, hence no new graph here.
            self.graph = tf.get_default_graph()

        else:
            graph = tf.Graph()
            default_graph = graph.as_default()
            default_graph.__enter__()
            # Global model.
            self.global_model = deepcopy(self)
            self.global_model.distributed_spec['replica_model'] = True
            self.global_model.setup()
            self.graph = graph

        with tf.device(device_name_or_function=self.device):
            # Episode
            collection = self.graph.get_collection(name='episode')
            if len(collection) == 0:
                self.episode = tf.get_variable(name='episode',
                                               dtype=tf.int32,
                                               initializer=0,
                                               trainable=False)
                self.graph.add_to_collection(name='episode',
                                             value=self.episode)
            else:
                assert len(collection) == 1
                self.episode = collection[0]

            # Timestep
            collection = self.graph.get_collection(name='timestep')
            if len(collection) == 0:
                self.timestep = tf.get_variable(name='timestep',
                                                dtype=tf.int32,
                                                initializer=0,
                                                trainable=False)
                self.graph.add_to_collection(name='timestep',
                                             value=self.timestep)
                self.graph.add_to_collection(name=tf.GraphKeys.GLOBAL_STEP,
                                             value=self.timestep)
            else:
                assert len(collection) == 1
                self.timestep = collection[0]

            # Variables and summaries
            self.variables = dict()
            self.all_variables = dict()
            self.registered_variables = set()
            self.summaries = list()

            def custom_getter(getter,
                              name,
                              registered=False,
                              second=False,
                              **kwargs):
                if registered:
                    self.registered_variables.add(name)
                elif name in self.registered_variables:
                    registered = True
                variable = getter(name=name,
                                  **kwargs)  # Top-level, hence no 'registered'
                if not registered:
                    self.all_variables[name] = variable
                    if kwargs.get(
                            'trainable',
                            True) and not name.startswith('optimization'):
                        self.variables[name] = variable
                        if 'variables' in self.summary_labels:
                            summary = tf.summary.histogram(name=name,
                                                           values=variable)
                            self.summaries.append(summary)
                return variable

            # Create placeholders, tf functions, internals, etc
            self.initialize(custom_getter=custom_getter)

            # Input tensors
            states = self.get_states(states=self.state_inputs)
            internals = [
                tf.identity(input=internal)
                for internal in self.internal_inputs
            ]
            actions = self.get_actions(actions=self.action_inputs)
            terminal = tf.identity(input=self.terminal_input)
            reward = self.get_reward(states=states,
                                     internals=internals,
                                     terminal=terminal,
                                     reward=self.reward_input)

            # Stop gradients for input preprocessing
            states = {
                name: tf.stop_gradient(input=state)
                for name, state in states.items()
            }
            actions = {
                name: tf.stop_gradient(input=action)
                for name, action in actions.items()
            }
            reward = tf.stop_gradient(input=reward)

            # Optimizer
            if self.optimizer is None:
                pass
            elif self.distributed_spec is not None and \
                    not self.distributed_spec.get('parameter_server') and \
                    not self.distributed_spec.get('replica_model'):
                # If not internal global model
                self.optimizer = GlobalOptimizer(optimizer=self.optimizer)
            else:
                self.optimizer = Optimizer.from_spec(spec=self.optimizer)

            # Create output fetch operations
            self.create_output_operations(
                states=states,
                internals=internals,
                actions=actions,
                terminal=terminal,
                reward=reward,
                update=self.update_input,
                deterministic=self.deterministic_input)

            if 'inputs' in self.summary_labels:
                for name, state in states.items():
                    summary = tf.summary.histogram(
                        name=(self.scope + '/inputs/states/' + name),
                        values=state)
                    self.summaries.append(summary)
                for name, action in actions.items():
                    summary = tf.summary.histogram(
                        name=(self.scope + '/inputs/actions/' + name),
                        values=action)
                    self.summaries.append(summary)
                summary = tf.summary.histogram(name=(self.scope +
                                                     '/inputs/reward'),
                                               values=reward)
                self.summaries.append(summary)

        if self.distributed_spec is not None:
            if self.distributed_spec.get('replica_model'):
                # If internal global model
                return

            elif self.distributed_spec.get('parameter_server'):
                server = tf.train.Server(
                    server_or_cluster_def=self.
                    distributed_spec['cluster_spec'],
                    job_name='ps',
                    task_index=self.distributed_spec['task_index'],
                    protocol=self.distributed_spec.get('protocol'),
                    config=None,
                    start=True)
                # Param server does nothing actively
                server.join()
                return

        # Global and local variables initialize operations
        if self.distributed_spec is None:
            global_variables = self.get_variables(include_non_trainable=True)
            init_op = tf.variables_initializer(var_list=global_variables)
            ready_op = tf.report_uninitialized_variables(
                var_list=global_variables)
            ready_for_local_init_op = None
            local_init_op = None
        else:
            global_variables = self.global_model.get_variables(
                include_non_trainable=True)
            local_variables = self.get_variables(include_non_trainable=True)
            init_op = tf.variables_initializer(var_list=global_variables)
            ready_op = tf.report_uninitialized_variables(
                var_list=(global_variables + local_variables))
            ready_for_local_init_op = tf.report_uninitialized_variables(
                var_list=global_variables)
            local_init_op = tf.group(*(local_var.assign(value=global_var)
                                       for local_var, global_var in zip(
                                           local_variables, global_variables)))

        def init_fn(scaffold, session):
            if self.saver_spec is not None and self.saver_spec.get(
                    'load', True):
                directory = self.saver_spec['directory']
                file = self.saver_spec.get('file')
                if file is None:
                    file = tf.train.latest_checkpoint(
                        checkpoint_dir=directory,
                        latest_filename=
                        None  # Corresponds to argument of saver.save() in Model.save().
                    )
                elif not os.path.isfile(file):
                    file = os.path.join(directory, file)
                if file is not None:
                    scaffold.saver.restore(sess=session, save_path=file)

        # Summary operation
        summaries = self.get_summaries()
        if len(summaries) > 0:
            summary_op = tf.summary.merge(inputs=summaries)
        else:
            summary_op = None

        # TensorFlow saver object
        saver = tf.train.Saver(
            var_list=global_variables,  # should be given?
            reshape=False,
            sharded=False,  # should be true?
            max_to_keep=5,
            keep_checkpoint_every_n_hours=10000.0,
            name=None,
            restore_sequentially=False,
            saver_def=None,
            builder=None,
            defer_build=False,
            allow_empty=True,
            write_version=tf.train.SaverDef.V2,
            pad_step_number=False,
            save_relative_paths=True
            #filename=None
        )

        # TensorFlow scaffold object
        self.scaffold = tf.train.Scaffold(
            init_op=init_op,
            init_feed_dict=None,
            init_fn=init_fn,
            ready_op=ready_op,
            ready_for_local_init_op=ready_for_local_init_op,
            local_init_op=local_init_op,
            summary_op=summary_op,
            saver=saver,
            copy_from_scaffold=None)

        hooks = list()

        # Checkpoint saver hook
        if self.saver_spec is not None and (
                self.distributed_spec is None
                or self.distributed_spec['task_index'] == 0):
            self.saver_directory = self.saver_spec['directory']
            hooks.append(
                tf.train.CheckpointSaverHook(
                    checkpoint_dir=self.saver_directory,
                    save_secs=self.saver_spec.get(
                        'seconds',
                        None if 'steps' in self.saver_spec else 600),
                    save_steps=self.saver_spec.get(
                        'steps'),  # Either one or the other has to be set.
                    saver=None,  # None since given via 'scaffold' argument.
                    checkpoint_basename=self.saver_spec.get(
                        'basename', 'model.ckpt'),
                    scaffold=self.scaffold,
                    listeners=None))
        else:
            self.saver_directory = None

        # Summary saver hook
        if self.summary_spec is None:
            self.summary_writer_hook = None
        else:
            # TensorFlow summary writer object
            summary_writer = tf.summary.FileWriter(
                logdir=self.summary_spec['directory'],
                graph=self.graph,
                max_queue=10,
                flush_secs=120,
                filename_suffix=None)
            self.summary_writer_hook = util.UpdateSummarySaverHook(
                update_input=self.update_input,
                save_steps=self.summary_spec.get(
                    'steps'),  # Either one or the other has to be set.
                save_secs=self.summary_spec.get(
                    'seconds', None if 'steps' in self.summary_spec else 120),
                output_dir=
                None,  # None since given via 'summary_writer' argument.
                summary_writer=summary_writer,
                scaffold=self.scaffold,
                summary_op=None  # None since given via 'scaffold' argument.
            )
            hooks.append(self.summary_writer_hook)

        # Stop at step hook
        # hooks.append(tf.train.StopAtStepHook(
        #     num_steps=???,  # This makes more sense, if load and continue training.
        #     last_step=None  # Either one or the other has to be set.
        # ))

        # # Step counter hook
        # hooks.append(tf.train.StepCounterHook(
        #     every_n_steps=counter_config.get('steps', 100),  # Either one or the other has to be set.
        #     every_n_secs=counter_config.get('secs'),  # Either one or the other has to be set.
        #     output_dir=None,  # None since given via 'summary_writer' argument.
        #     summary_writer=summary_writer
        # ))

        # Other available hooks:
        # tf.train.FinalOpsHook(final_ops, final_ops_feed_dict=None)
        # tf.train.GlobalStepWaiterHook(wait_until_step)
        # tf.train.LoggingTensorHook(tensors, every_n_iter=None, every_n_secs=None)
        # tf.train.NanTensorHook(loss_tensor, fail_on_nan_loss=True)
        # tf.train.ProfilerHook(save_steps=None, save_secs=None, output_dir='', show_dataflow=True, show_memory=False)

        if self.distributed_spec is None:
            # TensorFlow non-distributed monitored session object
            self.monitored_session = tf.train.SingularMonitoredSession(
                hooks=hooks,
                scaffold=self.scaffold,
                master='',  # Default value.
                config=self.session_config,  # always the same?
                checkpoint_dir=None)

        else:
            server = tf.train.Server(
                server_or_cluster_def=self.distributed_spec['cluster_spec'],
                job_name='worker',
                task_index=self.distributed_spec['task_index'],
                protocol=self.distributed_spec.get('protocol'),
                config=self.session_config,
                start=True)

            if self.distributed_spec['task_index'] == 0:
                # TensorFlow chief session creator object
                session_creator = tf.train.ChiefSessionCreator(
                    scaffold=self.scaffold,
                    master=server.target,
                    config=self.session_config,
                    checkpoint_dir=None,
                    checkpoint_filename_with_path=None)
            else:
                # TensorFlow worker session creator object
                session_creator = tf.train.WorkerSessionCreator(
                    scaffold=self.scaffold,
                    master=server.target,
                    config=self.session_config,
                )

            # TensorFlow monitored session object
            self.monitored_session = tf.train.MonitoredSession(
                session_creator=session_creator,
                hooks=hooks,
                stop_grace_period_secs=120  # Default value.
            )

        if default_graph:
            default_graph.__exit__(None, None, None)
        self.graph.finalize()
        self.monitored_session.__enter__()
        self.session = self.monitored_session._tf_sess()
Ejemplo n.º 13
0
    def __init__(self, states_spec, actions_spec, config, **kwargs):

        # States and actions specifications
        self.states_spec = states_spec
        self.actions_spec = actions_spec

        # Discount factor
        self.discount = config.discount

        # Reward normalization
        assert isinstance(config.normalize_rewards, bool)
        self.normalize_rewards = config.normalize_rewards

        # Variable noise
        assert config.variable_noise is None or config.variable_noise > 0.0
        self.variable_noise = config.variable_noise

        # TensorFlow summaries
        self.summary_labels = set(config.summary_labels or ())

        # Variables and summaries
        self.variables = dict()
        self.all_variables = dict()
        self.summaries = list()

        if not config.local_model or not config.replica_model:
            # If not local_model mode or not internal global model
            self.default_graph = tf.Graph().as_default()
            self.graph = self.default_graph.__enter__()

        if config.cluster_spec is None:
            if config.parameter_server or config.replica_model or config.local_model:
                raise TensorForceError(
                    "Invalid config value for distributed mode.")
            self.device = config.device
            self.global_model = None

        elif config.parameter_server:
            if config.replica_model or config.local_model:
                raise TensorForceError(
                    "Invalid config value for distributed mode.")
            self.device = config.device
            self.global_model = None

        elif config.replica_model:
            self.device = tf.train.replica_device_setter(
                worker_device=config.device, cluster=config.cluster_spec)
            self.global_model = None

        elif config.local_model:
            if config.replica_model:
                raise TensorForceError(
                    "Invalid config value for distributed mode.")
            self.device = config.device

            global_config = config.copy()
            global_config.set(key='replica_model', value=True)

            self.global_model = self.__class__(states_spec=states_spec,
                                               actions_spec=actions_spec,
                                               config=global_config,
                                               **kwargs)

        else:
            raise TensorForceError(
                "Invalid config value for distributed mode.")

        with tf.device(device_name_or_function=self.device):

            # Timestep and episode
            # TODO: various modes !!!
            if self.global_model is None:
                # TODO: Variables seem to re-initialize in the beginning every time a runner starts
                self.timestep = tf.get_variable(name='timestep',
                                                dtype=tf.int32,
                                                initializer=0,
                                                trainable=False)
                self.episode = tf.get_variable(name='episode',
                                               dtype=tf.int32,
                                               initializer=0,
                                               trainable=False)
            else:
                self.timestep = self.global_model.timestep
                self.episode = self.global_model.episode

            with tf.name_scope(name=config.scope):

                def custom_getter(getter, name, registered=False, **kwargs):
                    variable = getter(
                        name=name,
                        **kwargs)  # Top-level, hence no 'registered'
                    if not registered:
                        self.all_variables[name] = variable
                        if kwargs.get(
                                'trainable',
                                True) and not name.startswith('optimization'):
                            self.variables[name] = variable
                        if 'variables' in self.summary_labels:
                            summary = tf.summary.histogram(name=name,
                                                           values=variable)
                            self.summaries.append(summary)
                    return variable

                # Create placeholders, tf functions, internals, etc
                self.initialize(custom_getter=custom_getter)

                # Input tensors
                states = self.get_states(states=self.state_inputs)
                internals = [
                    tf.identity(input=internal)
                    for internal in self.internal_inputs
                ]
                actions = self.get_actions(actions=self.action_inputs)
                terminal = tf.identity(input=self.terminal_input)
                reward = self.get_reward(states=states,
                                         internals=internals,
                                         terminal=terminal,
                                         reward=self.reward_input)

                # Stop gradients for input preprocessing
                states = {
                    name: tf.stop_gradient(input=state)
                    for name, state in states.items()
                }
                actions = {
                    name: tf.stop_gradient(input=action)
                    for name, action in actions.items()
                }
                reward = tf.stop_gradient(input=reward)

                # Optimizer
                if config.optimizer is None:
                    self.optimizer = None
                elif config.local_model and not config.replica_model:
                    # If local_model mode and not internal global model
                    self.optimizer = GlobalOptimizer(
                        optimizer=config.optimizer)
                else:
                    self.optimizer = Optimizer.from_spec(spec=config.optimizer)

                # Create output fetch operations
                self.create_output_operations(states=states,
                                              internals=internals,
                                              actions=actions,
                                              terminal=terminal,
                                              reward=reward,
                                              deterministic=self.deterministic)

        if config.local_model and config.replica_model:
            # If local_model mode and internal global model
            return

        # Local and global initialize operations
        if config.local_model:
            init_op = tf.variables_initializer(
                var_list=self.global_model.get_variables(
                    include_non_trainable=True))
            local_init_op = tf.variables_initializer(
                var_list=self.get_variables(include_non_trainable=True))

        else:
            init_op = tf.variables_initializer(var_list=self.get_variables(
                include_non_trainable=True))
            local_init_op = None

        # Summary operation
        if len(self.get_summaries()) > 0:
            summary_op = tf.summary.merge(inputs=self.get_summaries())
        else:
            summary_op = None

        # TODO: MonitoredSession or so?
        self.supervisor = tf.train.Supervisor(
            is_chief=(config.task_index == 0),
            init_op=init_op,
            local_init_op=local_init_op,
            logdir=config.model_directory,
            summary_op=summary_op,
            global_step=self.timestep,
            save_summaries_secs=config.summary_frequency,
            save_model_secs=config.save_frequency
            # checkpoint_basename='model.ckpt'
            # session_manager=None
        )

        # tf.ConfigProto(device_filters=['/job:ps', '/job:worker/task:{}/cpu:0'.format(self.task_index)])
        if config.parameter_server:
            self.server = tf.train.Server(
                server_or_cluster_def=config.cluster_spec,
                job_name='ps',
                task_index=config.task_index,
                # config=tf.ConfigProto(device_filters=["/job:ps"])
                # config=tf.ConfigProto(
                #     inter_op_parallelism_threads=2,
                #     log_device_placement=True
                # )
            )

            # Param server does nothing actively
            self.server.join()

        elif config.cluster_spec is not None:
            self.server = tf.train.Server(
                server_or_cluster_def=config.cluster_spec,
                job_name='worker',
                task_index=config.task_index,
                # config=tf.ConfigProto(device_filters=["/job:ps"])
                # config=tf.ConfigProto(
                #     inter_op_parallelism_threads=2,
                #     log_device_placement=True
                # )
            )

            self.managed_session = self.supervisor.managed_session(
                master=self.server.target,
                start_standard_services=(config.model_directory is not None))
            self.session = self.managed_session.__enter__()

        else:
            self.managed_session = self.supervisor.managed_session(
                start_standard_services=(config.model_directory is not None))
            self.session = self.managed_session.__enter__()