Ejemplo n.º 1
0
    def setup_components_and_tf_funcs(self, custom_getter=None):
        custom_getter = super(DPGTargetModel, self).setup_components_and_tf_funcs(custom_getter)

        # Target network
        self.target_network = Network.from_spec(
            spec=self.network_spec,
            kwargs=dict(scope='target-network', summary_labels=self.summary_labels)
        )

        # Target network optimizer
        self.target_network_optimizer = Synchronization(
            sync_frequency=self.target_sync_frequency,
            update_weight=self.target_update_weight
        )

        # Target network distributions
        self.target_distributions = self.create_distributions()

        # critic
        self.critic_network = Network.from_spec(
            spec=self.critic_network_spec,
            kwargs=dict(scope='critic')
        )

        self.target_critic_network = Network.from_spec(
            spec=self.critic_network_spec,
            kwargs=dict(scope='target-critic')
        )

        self.critic_optimizer = Optimizer.from_spec(
            spec=self.critic_optimizer_spec,
            kwargs=dict(summary_labels=self.summary_labels)
        )

        # Target critic optimizer
        self.target_critic_optimizer = Synchronization(
            sync_frequency=self.target_sync_frequency,
            update_weight=self.target_update_weight
        )

        self.fn_target_actions_and_internals = tf.make_template(
            name_='target-actions-and-internals',
            func_=self.tf_target_actions_and_internals,
            custom_getter_=custom_getter
        )

        self.fn_predict_target_q = tf.make_template(
            name_='predict-target-q',
            func_=self.tf_predict_target_q,
            custom_getter_=custom_getter
        )
        return custom_getter
Ejemplo n.º 2
0
    def initialize(self, custom_getter):
        super(QModel, self).initialize(custom_getter)

        # TEMP: Random sampling fix
        if self.random_sampling_fix:
            self.next_state_inputs = dict()
            for name, state in self.states_spec.items():
                self.next_state_inputs[name] = tf.placeholder(
                    dtype=util.tf_dtype(state['type']),
                    shape=(None, ) + tuple(state['shape']),
                    name=('next-' + name))

        # Target network
        self.target_network = Network.from_spec(
            spec=self.network_spec,
            kwargs=dict(scope='target', summary_labels=self.summary_labels))

        # Target network optimizer
        self.target_optimizer = Synchronization(
            sync_frequency=self.target_sync_frequency,
            update_weight=self.target_update_weight)

        # Target network distributions
        self.target_distributions = self.generate_distributions(
            self.actions_spec, self.distributions_spec, self.summary_labels)
Ejemplo n.º 3
0
    def initialize(self, custom_getter):
        super(QModel, self).initialize(custom_getter)

        # Target network
        self.target_network = Network.from_spec(
            spec=self.network_spec,
            kwargs=dict(scope='target', summary_labels=self.summary_labels))

        # Target network optimizer
        self.target_optimizer = Synchronization(
            sync_frequency=self.target_sync_frequency,
            update_weight=self.target_update_weight)
Ejemplo n.º 4
0
    def setup_components_and_tf_funcs(self, custom_getter=None):
        custom_getter = super(
            DPGTargetModel, self).setup_components_and_tf_funcs(custom_getter)

        # Target network
        self.target_network = Network.from_spec(
            spec=self.network_spec,
            kwargs=dict(scope='target-network',
                        summary_labels=self.summary_labels))

        # Target network optimizer
        self.target_network_optimizer = Synchronization(
            sync_frequency=self.target_sync_frequency,
            update_weight=self.target_update_weight)

        # Target network distributions
        self.target_distributions = self.create_distributions()

        # Critic
        #print ("type of self.critic_network_spec[]:")
        #print (type(self.critic_network_spec))
        #for element in self.critic_network_spec:
        #    print (element)
        # oliver: CHANGES HERE!!
        size_t0 = self.critic_network_spec[0]['size']
        size_t1 = self.critic_network_spec[1]['size']

        self.critic = DDPGCriticNetwork(scope='critic',
                                        size_t0=size_t0,
                                        size_t1=size_t1)
        self.critic_optimizer = Optimizer.from_spec(
            spec=self.critic_optimizer_spec,
            kwargs=dict(summary_labels=self.summary_labels))

        self.target_critic = DDPGCriticNetwork(scope='target-critic',
                                               size_t0=size_t0,
                                               size_t1=size_t1)

        # Target critic optimizer
        self.target_critic_optimizer = Synchronization(
            sync_frequency=self.target_sync_frequency,
            update_weight=self.target_update_weight)

        self.fn_target_actions_and_internals = tf.make_template(
            name_='target-actions-and-internals',
            func_=self.tf_target_actions_and_internals,
            custom_getter_=custom_getter)

        self.fn_predict_target_q = tf.make_template(
            name_='predict-target-q',
            func_=self.tf_predict_target_q,
            custom_getter_=custom_getter)
        return custom_getter
Ejemplo n.º 5
0
    def __init__(self, states_spec, actions_spec, network_spec, config):

        with tf.name_scope(name=config.scope):
            # Network
            self.network = Network.from_spec(
                spec=network_spec,
                kwargs=dict(summary_labels=config.summary_labels))

            # Distributions
            self.distributions = dict()
            for name, action in actions_spec.items():

                with tf.name_scope(name=(name + '-distribution')):

                    if config.distributions is not None and name in config.distributions:
                        kwargs = dict(action)
                        kwargs['summary_labels'] = config.summary_labels
                        self.distributions[name] = Distribution.from_spec(
                            spec=config.distributions[name], kwargs=kwargs)

                    elif action['type'] == 'bool':
                        self.distributions[name] = Bernoulli(
                            shape=action['shape'],
                            summary_labels=config.summary_labels)

                    elif action['type'] == 'int':
                        self.distributions[name] = Categorical(
                            shape=action['shape'],
                            num_actions=action['num_actions'],
                            summary_labels=config.summary_labels)

                    elif action['type'] == 'float':
                        if 'min_value' in action:
                            self.distributions[name] = Beta(
                                shape=action['shape'],
                                min_value=action['min_value'],
                                max_value=action['max_value'],
                                summary_labels=config.summary_labels)

                        else:
                            self.distributions[name] = Gaussian(
                                shape=action['shape'],
                                summary_labels=config.summary_labels)

        # Entropy regularization
        assert config.entropy_regularization is None or config.entropy_regularization >= 0.0
        self.entropy_regularization = config.entropy_regularization

        super(DistributionModel, self).__init__(states_spec=states_spec,
                                                actions_spec=actions_spec,
                                                network_spec=network_spec,
                                                config=config)
Ejemplo n.º 6
0
    def initialize(self, custom_getter):
        super(DistributionModel, self).initialize(custom_getter)

        # Network
        self.network = Network.from_spec(
            spec=self.network_spec,
            kwargs=dict(summary_labels=self.summary_labels))

        # Distributions
        self.distributions = dict()
        for name, action in self.actions_spec.items():
            with tf.name_scope(name=(name + '-distribution')):

                if self.distributions_spec is not None and name in self.distributions_spec:
                    kwargs = dict(action)
                    kwargs['summary_labels'] = self.summary_labels
                    self.distributions[name] = Distribution.from_spec(
                        spec=self.distributions_spec[name], kwargs=kwargs)

                elif action['type'] == 'bool':
                    self.distributions[name] = Bernoulli(
                        shape=action['shape'],
                        summary_labels=self.summary_labels)

                elif action['type'] == 'int':
                    self.distributions[name] = Categorical(
                        shape=action['shape'],
                        num_actions=action['num_actions'],
                        summary_labels=self.summary_labels)

                elif action['type'] == 'float':
                    if 'min_value' in action:
                        self.distributions[name] = Beta(
                            shape=action['shape'],
                            min_value=action['min_value'],
                            max_value=action['max_value'],
                            summary_labels=self.summary_labels)

                    else:
                        self.distributions[name] = Gaussian(
                            shape=action['shape'],
                            summary_labels=self.summary_labels)

        # Network internals
        self.internal_inputs.extend(self.network.internal_inputs())
        self.internal_inits.extend(self.network.internal_inits())

        # KL divergence function
        self.fn_kl_divergence = tf.make_template(name_='kl-divergence',
                                                 func_=self.tf_kl_divergence,
                                                 custom_getter_=custom_getter)
Ejemplo n.º 7
0
    def __init__(self, network_spec, scope='network-baseline', summary_labels=()):
        """
        Network baseline.

        Args:
            network_spec: Network specification dict
        """
        with tf.name_scope(name=scope):
            self.network = Network.from_spec(spec=network_spec)
            assert len(self.network.internal_inputs()) == 0

            self.linear = Linear(size=1, bias=0.0, scope='prediction')

        super(NetworkBaseline, self).__init__(scope, summary_labels)
Ejemplo n.º 8
0
    def __init__(self, network, scope='network-baseline', summary_labels=()):
        """
        Network baseline.

        Args:
            network_spec: Network specification dict
        """
        self.network = Network.from_spec(
            spec=network, kwargs=dict(summary_labels=summary_labels))
        assert len(self.network.internals_spec()) == 0

        self.linear = Linear(size=1, bias=0.0, scope='prediction')

        super(NetworkBaseline, self).__init__(scope=scope,
                                              summary_labels=summary_labels)
Ejemplo n.º 9
0
    def initialize(self, custom_getter):
        super(DPGTargetModel, self).initialize(custom_getter)

        # Target network
        self.target_network = Network.from_spec(
            spec=self.network_spec,
            kwargs=dict(scope='target-network',
                        summary_labels=self.summary_labels))

        # Target network optimizer
        self.target_network_optimizer = Synchronization(
            sync_frequency=self.target_sync_frequency,
            update_weight=self.target_update_weight)

        # Target network distributions
        self.target_distributions = self.create_distributions()

        # Critic
        size_t0 = self.critic_network_spec['size_t0']
        size_t1 = self.critic_network_spec['size_t1']

        self.critic = DDPGCriticNetwork(scope='critic',
                                        size_t0=size_t0,
                                        size_t1=size_t1)
        self.critic_optimizer = Optimizer.from_spec(
            spec=self.critic_optimizer_spec,
            kwargs=dict(summary_labels=self.summary_labels))

        self.target_critic = DDPGCriticNetwork(scope='target-critic',
                                               size_t0=size_t0,
                                               size_t1=size_t1)

        # Target critic optimizer
        self.target_critic_optimizer = Synchronization(
            sync_frequency=self.target_sync_frequency,
            update_weight=self.target_update_weight)

        self.fn_target_actions_and_internals = tf.make_template(
            name_='target-actions-and-internals',
            func_=self.tf_target_actions_and_internals,
            custom_getter_=custom_getter)

        self.fn_predict_target_q = tf.make_template(
            name_='predict-target-q',
            func_=self.tf_predict_target_q,
            custom_getter_=custom_getter)
Ejemplo n.º 10
0
    def initialize(self, custom_getter):
        # Network
        self.network = Network.from_spec(
            spec=self.network_spec,
            kwargs=dict(summary_labels=self.summary_labels))

        # Before super-call since internals_spec attribute is required subsequently.
        assert len(self.internals_spec) == 0
        self.internals_spec = self.network.internals_spec()

        super(DistributionModel, self).initialize(custom_getter)

        # Distributions
        self.distributions = self.create_distributions()

        # KL divergence function
        self.fn_kl_divergence = tf.make_template(name_='kl-divergence',
                                                 func_=self.tf_kl_divergence,
                                                 custom_getter_=custom_getter)
Ejemplo n.º 11
0
    def initialize(self, custom_getter):
        super(DistributionModel, self).initialize(custom_getter)

        # Network
        self.network = Network.from_spec(
            spec=self.network_spec,
            kwargs=dict(summary_labels=self.summary_labels))

        # Distributions
        self.distributions = self.create_distributions()

        # Network internals
        self.internals_input.extend(self.network.internals_input())
        self.internals_init.extend(self.network.internals_init())

        # KL divergence function
        self.fn_kl_divergence = tf.make_template(name_=(self.scope +
                                                        '/kl-divergence'),
                                                 func_=self.tf_kl_divergence,
                                                 custom_getter_=custom_getter)
Ejemplo n.º 12
0
    def __init__(self, states_spec, actions_spec, network_spec, config):

        with tf.name_scope(name=config.scope):
            # Target network
            self.target_network = Network.from_spec(
                spec=network_spec, kwargs=dict(scope='target'))

            # Target network optimizer
            self.target_optimizer = Synchronization(
                sync_frequency=config.target_sync_frequency,
                update_weight=config.target_update_weight)

        self.double_q_model = config.double_q_model

        assert config.huber_loss is None or config.huber_loss > 0.0
        self.huber_loss = config.huber_loss

        super(QModel, self).__init__(states_spec=states_spec,
                                     actions_spec=actions_spec,
                                     network_spec=network_spec,
                                     config=config)
Ejemplo n.º 13
0
    def setup_components_and_tf_funcs(self, custom_getter=None):
        """
        Creates and stores Network and Distribution objects.
        Generates and stores all template functions.
        """
        # Create network before super-call, since non-empty internals_spec attribute (for RNN) is required subsequently.
        self.network = Network.from_spec(
            spec=self.network_spec,
            kwargs=dict(summary_labels=self.summary_labels))

        # Now that we have the network component: We can create the internals placeholders.
        assert len(self.internals_spec) == 0
        self.internals_spec = self.network.internals_spec()
        for name in sorted(self.internals_spec):
            internal = self.internals_spec[name]
            self.internals_input[name] = tf.placeholder(
                dtype=util.tf_dtype(internal['type']),
                shape=(None, ) + tuple(internal['shape']),
                name=('internal-' + name))
            if internal['initialization'] == 'zeros':
                self.internals_init[name] = np.zeros(shape=internal['shape'])
            else:
                raise TensorForceError(
                    "Invalid internal initialization value.")

        # And only then call super.
        custom_getter = super(
            DistributionModel,
            self).setup_components_and_tf_funcs(custom_getter)

        # Distributions
        self.distributions = self.create_distributions()

        # KL divergence function
        self.fn_kl_divergence = tf.make_template(name_='kl-divergence',
                                                 func_=self.tf_kl_divergence,
                                                 custom_getter_=custom_getter)

        return custom_getter
Ejemplo n.º 14
0
    def initialize(self, custom_getter):
        super(QModel, self).initialize(custom_getter)

        # # TEMP: Random sampling fix
        # if self.random_sampling_fix:
        #     self.next_states_input = dict()
        #     for name, state in self.states_spec.items():
        #         self.next_states_input[name] = tf.placeholder(
        #             dtype=util.tf_dtype(state['type']),
        #             shape=(None,) + tuple(state['shape']),
        #             name=('next-' + name)
        #         )

        # Target network
        self.target_network = Network.from_spec(
            spec=self.target_network_spec,
            kwargs=dict(scope='target', summary_labels=self.summary_labels))

        # Target network optimizer
        self.target_optimizer = Optimizer.from_spec(
            spec=self.target_optimizer_spec)

        # Target network distributions
        self.target_distributions = self.create_distributions()
Ejemplo n.º 15
0
    def initialize(self, custom_getter):
        super(DeterministicESModel, self).initialize(custom_getter)

        # Network
        self.network = Network.from_spec(
            spec=self.network_spec,
            kwargs=dict(summary_labels=self.summary_labels))

        # Network internals
        self.internals_input.extend(self.network.internals_input())
        self.internals_init.extend(self.network.internals_init())

        # Seed
        collection = self.graph.get_collection(name='noise_seed')
        if len(collection) == 0:
            self.seed = tf.get_variable(
                name='noise_seed',
                shape=(self.num_workers, self.vec_len),
                dtype=util.tf_dtype('int'),
                initializer=tf.zeros_initializer(dtype=util.tf_dtype('int')))
            self.graph.add_to_collection(name='noise_seed', value=self.seed)
        else:
            assert len(collection) == 1
            self.seed = collection[0]

        # Score
        collection = self.graph.get_collection(name='evolution_score')
        if len(collection) == 0:
            self.score = tf.get_variable(
                name='evolution_score',
                shape=(self.num_workers, 2 * self.vec_len),
                dtype=util.tf_dtype('float'),
                initializer=tf.zeros_initializer(dtype=util.tf_dtype('float')))
            self.graph.add_to_collection(name='evolution_score',
                                         value=self.score)
        else:
            assert len(collection) == 1
            self.score = collection[0]

        # Evaluation score
        collection = self.graph.get_collection(name='evaluation_score')
        if len(collection) == 0:
            self.eval_score = tf.get_variable(
                name='evaluation_score',
                shape=(self.num_workers, self.eval_len),
                dtype=util.tf_dtype('float'),
                initializer=tf.zeros_initializer(dtype=util.tf_dtype('float')))
            self.graph.add_to_collection(name='evaluation_score',
                                         value=self.eval_score)
        else:
            assert len(collection) == 1
            self.eval_score = collection[0]

        self.doors = {}
        self.locks = {}
        self.lock_collection = self.graph.get_collection(name="sync_var")
        if len(self.lock_collection) == 0:
            with tf.variable_scope('sync_var'):
                for i in range(1, self.num_workers):
                    self.doors[i] = tf.get_variable(name="sync_point_%d" % i,
                                                    dtype=util.tf_dtype('int'),
                                                    initializer=tf.constant(
                                                        0, dtype=tf.int32))
                    self.locks[i] = tf.get_variable(name="lock_flag_%d" % i,
                                                    dtype=util.tf_dtype('int'),
                                                    initializer=tf.constant(
                                                        1, dtype=tf.int32))
                    self.graph.add_to_collection(name='sync_var',
                                                 value=self.doors[i])
                    self.graph.add_to_collection(name='sync_var',
                                                 value=self.locks[i])
        else:
            assert len(self.lock_collection) == 2 * (self.num_workers - 1)
            for i in range(1, self.num_workers):
                self.doors[i] = self.lock_collection[2 * (i - 1)]
                self.locks[i] = self.lock_collection[2 * (i - 1) + 1]

        self.lock_collection = self.graph.get_collection(name="sync_var")

        # Seed and score placeholder
        self.seed_ph = tf.placeholder(dtype=tf.int32,
                                      shape=(self.vec_len, ),
                                      name='seed_ph')
        self.score_ph = tf.placeholder(dtype=tf.float32,
                                       shape=(2 * self.vec_len, ),
                                       name='score_ph')
        self.eval_score_ph = tf.placeholder(dtype=tf.float32,
                                            shape=(self.eval_len, ),
                                            name='evaluation_score_ph')