예제 #1
0
    def initialize_given_variables(self, *, variables):
        assert not self.root.is_initialized and not self.is_initialized_given_variables

        for module in self.this_submodules:
            if isinstance(module, Optimizer):
                module.initialize_given_variables(variables=variables)

        # Replace "/" with "_" to ensure TensorDict is flat
        self.variables_spec = TensorsSpec(
            ((var.name[:-2].replace('/', '_'),
              TensorSpec(type=tf_util.dtype(x=var, fallback_tf_dtype=True),
                         shape=tf_util.shape(x=var))) for var in variables))

        self.is_initialized_given_variables = True

        if self.config.create_debug_assertions:
            self.is_initialized = False
            for variable in variables:
                self.zero_check_history = self.variable(
                    name='zero_check_history',
                    spec=TensorSpec(type='bool', shape=(3, len(variables))),
                    initializer='zeros',
                    is_trainable=False,
                    is_saved=False)
                self.zero_check_index = self.variable(
                    name='zero_check_index',
                    spec=TensorSpec(type='int', shape=()),
                    initializer='zeros',
                    is_trainable=False,
                    is_saved=False)
            self.is_initialized = True
예제 #2
0
        def body(deltas, previous_perturbations):
            with tf.control_dependencies(control_inputs=deltas):
                perturbations = [
                    learning_rate *
                    tf.random.normal(shape=tf_util.shape(x=variable),
                                     dtype=tf_util.get_dtype(type='float'))
                    for variable in variables
                ]
                perturbation_deltas = [
                    pert - prev_pert for pert, prev_pert in zip(
                        perturbations, previous_perturbations)
                ]
                assignments = list()
                for variable, delta in zip(variables, perturbation_deltas):
                    assignments.append(
                        variable.assign_add(delta=delta, read_value=False))

            with tf.control_dependencies(control_inputs=assignments):
                perturbed_loss = fn_loss(**arguments.to_kwargs())
                direction = tf.math.sign(x=(unperturbed_loss - perturbed_loss))
                deltas = [
                    delta + direction * perturbation
                    for delta, perturbation in zip(deltas, perturbations)
                ]

            return deltas, perturbations
예제 #3
0
    def step(self, *, arguments, variables, **kwargs):
        source_variables = kwargs['source_variables']

        assert all(
            tf_util.shape(x=source) == tf_util.shape(x=target)
            for source, target in zip(source_variables, variables))

        one = tf_util.constant(value=1, dtype='int')

        def apply_sync():
            next_sync_updated = self.next_sync.assign(
                value=self.sync_frequency.value(), read_value=False)

            with tf.control_dependencies(control_inputs=(next_sync_updated, )):
                update_weight = self.update_weight.value()
                deltas = list()
                assignments = list()
                for source_variable, target_variable in zip(
                        source_variables, variables):
                    delta = update_weight * (source_variable - target_variable)
                    deltas.append(delta)
                    assignments.append(
                        target_variable.assign_add(delta=delta,
                                                   read_value=False))

            with tf.control_dependencies(control_inputs=assignments):
                # Trivial operation to enforce control dependency
                return [tf_util.identity(input=delta) for delta in deltas]

        def no_sync():
            next_sync_updated = self.next_sync.assign_sub(delta=one,
                                                          read_value=False)

            with tf.control_dependencies(control_inputs=(next_sync_updated, )):
                deltas = list()
                for variable in variables:
                    delta = tf_util.zeros(shape=tf_util.shape(x=variable),
                                          dtype='float')
                    deltas.append(delta)
                return deltas

        skip_sync = tf.math.greater(x=self.next_sync, y=one)

        return tf.cond(pred=skip_sync, true_fn=no_sync, false_fn=apply_sync)
예제 #4
0
    def initialize_given_variables(self, *, variables, register_summaries):
        super().initialize_given_variables(
            variables=variables, register_summaries=register_summaries)

        values_spec = TensorsSpec(((var.name,
                                    TensorSpec(type=tf_util.dtype(x=var),
                                               shape=tf_util.shape(x=var)))
                                   for var in variables))
        self.line_search.complete_initialize(
            arguments_spec=self.arguments_spec, values_spec=values_spec)
예제 #5
0
        def no_sync():
            next_sync_updated = self.next_sync.assign_sub(delta=one,
                                                          read_value=False)

            with tf.control_dependencies(control_inputs=(next_sync_updated, )):
                deltas = list()
                for variable in variables:
                    delta = tf_util.zeros(shape=tf_util.shape(x=variable),
                                          dtype='float')
                    deltas.append(delta)
                return deltas
예제 #6
0
    def initialize_given_variables(self, *, variables):
        assert not self.root.is_initialized and not self.is_initialized_given_variables

        for module in self.this_submodules:
            if isinstance(module, Optimizer):
                module.initialize_given_variables(variables=variables)

        # Replace "/" with "_" to ensure TensorDict is flat
        self.variables_spec = TensorsSpec(
            ((var.name[:-2].replace('/', '_'),
              TensorSpec(type=tf_util.dtype(x=var, fallback_tf_dtype=True),
                         shape=tf_util.shape(x=var))) for var in variables))

        self.is_initialized_given_variables = True
예제 #7
0
    def step(self, *, arguments, variables, **kwargs):
        global_variables = kwargs['global_variables']

        assert all(
            tf_util.shape(x=global_variable) == tf_util.shape(x=local_variable)
            for global_variable, local_variable in zip(global_variables,
                                                       variables))

        local_deltas = self.optimizer.step(arguments=arguments,
                                           variables=variables,
                                           **kwargs)

        with tf.control_dependencies(control_inputs=local_deltas):
            assignments = list()
            for variable, delta in zip(global_variables, local_deltas):
                assignments.append(
                    variable.assign_add(delta=delta, read_value=False))

        with tf.control_dependencies(control_inputs=assignments):
            update_deltas = list()
            for global_variable, local_variable in zip(global_variables,
                                                       variables):
                update_deltas.append(global_variable - local_variable)

            assignments = list()
            for variable, delta in zip(variables, update_deltas):
                assignments.append(
                    variable.assign_add(delta=delta, read_value=False))

            # TODO: Update time, episode, etc (like in Synchronization)?

        with tf.control_dependencies(control_inputs=assignments):
            return [
                local_delta + update_delta for local_delta, update_delta in
                zip(local_deltas, update_deltas)
            ]
예제 #8
0
        def fn_initial_gradients(*, states, horizons, internals, auxiliaries,
                                 actions, reward, reference):
            if 'policy' in internals:
                policy_internals = internals['policy']
                baseline_internals = internals['baseline']
            else:
                policy_internals = internals
                # TODO: Baseline currently cannot have internal states, since generally only policy
                # internals are passed to policy optimizer
                assert len(baseline.internals_spec) == 0
                baseline_internals = TensorDict()

            actions = policy.act(states=states,
                                 horizons=horizons,
                                 internals=policy_internals,
                                 auxiliaries=auxiliaries,
                                 independent=True,
                                 return_internals=False)
            assert len(actions) == 1
            action = actions.value()
            shape = tf_util.shape(x=action)
            assert len(shape) <= 2

            with tf.GradientTape(persistent=False,
                                 watch_accessed_variables=False) as tape:
                tape.watch(tensor=action)
                actions_value = baseline.actions_value(
                    states=states,
                    horizons=horizons,
                    internals=baseline_internals,
                    auxiliaries=auxiliaries,
                    actions=actions,
                    reduced=True,
                    return_per_action=False)
                if len(shape) == 1:
                    return -tape.gradient(target=actions_value,
                                          sources=action)[0]
                elif len(shape) == 2 and shape[1] == 1:
                    return -tape.gradient(target=actions_value,
                                          sources=action)[0][0]
                else:
                    assert False
예제 #9
0
    def loss(self, *, states, horizons, internals, auxiliaries, actions, reward, reference, policy):
        reference_spec1 = self.objective1.reference_spec()
        reference_spec2 = self.objective2.reference_spec()
        assert tf_util.shape(x=reference)[1] == reference_spec1.size + reference_spec2.size

        reference1 = reference[:, :reference_spec1.size]
        reference1 = tf.reshape(tensor=reference1, shape=((-1,) + reference_spec1.shape))
        reference2 = reference[:, reference_spec1.size:]
        reference2 = tf.reshape(tensor=reference2, shape=((-1,) + reference_spec2.shape))

        loss1 = self.objective1.loss(
            states=states, horizons=horizons, internals=internals, auxiliaries=auxiliaries,
            actions=actions, reward=reward, reference=reference1, policy=policy
        )

        loss2 = self.objective2.loss(
            states=states, horizons=horizons, internals=internals, auxiliaries=auxiliaries,
            actions=actions, reward=reward, reference=reference2, policy=policy
        )

        return loss1 + loss2
예제 #10
0
    def variable(self,
                 *,
                 name,
                 spec,
                 initializer,
                 is_trainable,
                 is_saved,
                 initialization_scale=None):
        assert self.is_initialized is False
        # name
        if not isinstance(name, str):
            raise TensorforceError.type(name='variable',
                                        argument='name',
                                        dtype=type(name))
        # spec
        if not isinstance(spec, TensorSpec):
            raise TensorforceError.dtype(name='variable',
                                         argument='spec',
                                         dtype=type(spec))
        if spec.is_underspecified():
            raise TensorforceError.value(name='variable',
                                         argument='spec',
                                         value=spec,
                                         hint='underspecified')
        # initializer
        initializer_names = ('constant', 'normal', 'normal-relu', 'ones',
                             'orthogonal', 'orthogonal-relu', 'zeros')
        if not isinstance(initializer, (spec.py_type(), np.ndarray, tf.Tensor)) and \
                initializer not in initializer_names:
            raise TensorforceError.value(name='variable',
                                         argument='initializer',
                                         value=initializer)
        elif isinstance(initializer,
                        np.ndarray) and initializer.dtype != spec.np_type():
            raise TensorforceError.type(name='variable',
                                        argument='initializer',
                                        dtype=initializer.dtype)
        elif isinstance(
                initializer,
                tf.Tensor) and tf_util.dtype(x=initializer) != spec.tf_type():
            raise TensorforceError.type(name='variable',
                                        argument='initializer',
                                        dtype=tf_util.dtype(x=initializer))
        # initialization_scale
        if initialization_scale is not None:
            if isinstance(initializer, (spec.py_type(), np.ndarray, tf.Tensor)) or \
                    initializer not in ('constant', 'orthogonal', 'orthogonal-relu'):
                raise TensorforceError.invalid(
                    name='variable',
                    argument='initialization_scale',
                    condition='initializer not orthogonal')
            elif not isinstance(initialization_scale, spec.py_type()):
                raise TensorforceError.type(name='variable',
                                            argument='initialization_scale',
                                            dtype=type(initialization_scale),
                                            hint='!= float')
        # is_trainable
        if not isinstance(is_trainable, bool):
            raise TensorforceError.type(name='variable',
                                        argument='is_trainable',
                                        dtype=type(is_trainable))
        elif is_trainable and spec.type != 'float':
            raise TensorforceError.value(name='variable',
                                         argument='is_trainable',
                                         value=is_trainable,
                                         condition='spec.type != float')
        # is_saved
        if not isinstance(is_saved, bool):
            raise TensorforceError.type(name='variable',
                                        argument='is_saved',
                                        dtype=type(is_saved))

        # Variable initializer
        if isinstance(initializer, spec.py_type()):
            initializer = tf_util.constant(value=initializer,
                                           dtype=spec.type,
                                           shape=spec.shape)
        elif isinstance(initializer, np.ndarray):
            if initializer.shape != spec.shape:
                raise TensorforceError.mismatch(name='Module.variable',
                                                value1='shape',
                                                value2='initializer')
            initializer = tf_util.constant(value=initializer, dtype=spec.type)
        elif isinstance(initializer, tf.Tensor):
            if tf_util.shape(x=initializer) != spec.shape:
                raise TensorforceError.mismatch(name='Module.variable',
                                                value1='shape',
                                                value2='initializer')
            initializer = initializer
        elif not isinstance(initializer, str):
            raise TensorforceError(
                "Invalid variable initializer: {}".format(initializer))
        elif initializer.startswith('normal'):
            if spec.type != 'float':
                raise TensorforceError(
                    message=
                    "Invalid variable initializer value for non-float variable: {}."
                    .format(initializer))
            if initializer.endswith('-relu'):
                stddev = min(0.1,
                             np.sqrt(2.0 / util.product(xs=spec.shape[:-1])))
            else:
                stddev = min(
                    0.1,
                    np.sqrt(
                        2.0 /
                        (util.product(xs=spec.shape[:-1]) + spec.shape[-1])))
            initializer = tf.random.normal(shape=spec.shape,
                                           stddev=stddev,
                                           dtype=spec.tf_type())
        elif initializer.startswith('orthogonal'):
            if spec.type != 'float':
                raise TensorforceError(
                    message=
                    "Invalid variable initializer value for non-float variable: {}."
                    .format(initializer))
            if spec.rank < 2:
                raise TensorforceError(
                    message=
                    "Invalid variable initializer value for 0/1-rank variable: {}."
                    .format(initializer))
            normal = np.random.normal(size=(util.product(xs=spec.shape[:-1]),
                                            spec.shape[-1]))
            u, _, v = np.linalg.svd(a=normal, full_matrices=False)
            orthogonal = u if u.shape[1] == spec.shape[-1] else v
            if initializer.endswith('-relu'):
                orthogonal = orthogonal * np.sqrt(2.0)
            if initialization_scale is not None and initialization_scale != 1.0:
                if initialization_scale <= 0.0:
                    raise TensorforceError.value(
                        name='variable',
                        argument='initialization_scale',
                        value=initialization_scale,
                        hint='<= 0.0')
                orthogonal = orthogonal * initialization_scale
            initializer = tf_util.constant(value=orthogonal.reshape(
                spec.shape),
                                           dtype=spec.type)
        elif initializer == 'zeros':
            initializer = tf_util.zeros(shape=spec.shape, dtype=spec.type)
        elif initializer == 'ones':
            initializer = tf_util.ones(shape=spec.shape, dtype=spec.type)
        elif initializer == 'constant':
            initializer = tf.fill(dims=spec.shape,
                                  value=tf_util.constant(
                                      value=initialization_scale,
                                      dtype=spec.type))

        # Variable
        variable = tf.Variable(initial_value=initializer,
                               trainable=is_trainable,
                               validate_shape=True,
                               name=name,
                               dtype=spec.tf_type(),
                               shape=spec.shape)
        variable.is_saved = is_saved

        return variable