コード例 #1
0
ファイル: value.py プロジェクト: tensorforce/tensorforce
    def reference(self, *, states, horizons, internals, auxiliaries, actions,
                  policy):
        # if self.value == 'state':
        #     if self.early_reduce:
        #         value = policy.state_value(
        #             states=states, horizons=horizons, internals=internals, auxiliaries=auxiliaries
        #         )
        #     else:
        #         value = policy.state_values(
        #             states=states, horizons=horizons, internals=internals, auxiliaries=auxiliaries
        #         )
        #         value = tf.concat(values=tuple(value.values()), axis=1)

        # elif self.value == 'action':
        #     if self.early_reduce:
        #         value = policy.action_value(
        #             states=states, horizons=horizons, internals=internals, auxiliaries=auxiliaries,
        #             actions=actions
        #         )
        #     else:
        #         value = policy.action_values(
        #             states=states, horizons=horizons, internals=internals, auxiliaries=auxiliaries,
        #             actions=actions
        #         )
        #         value = tf.concat(values=tuple(value.values()), axis=1)

        return tf_util.zeros(shape=tf.shape(input=actions.value())[:1],
                             dtype='float')
コード例 #2
0
    def core_act(self, *, states, internals, auxiliaries, parallel, deterministic, independent):
        assert len(internals) == 0

        actions = TensorDict()
        for name, spec in self.actions_spec.items():
            shape = tf.concat(values=(
                tf_util.cast(x=tf.shape(input=states.value())[:1], dtype='int'),
                tf_util.constant(value=spec.shape, dtype='int')
            ), axis=0)

            if self.action_values is not None and name in self.action_values:
                # If user-specified, choose given action
                action = tf_util.constant(value=self.action_values[name], dtype=spec.type)
                actions[name] = tf.fill(dims=shape, value=action)

            elif self.config.enable_int_action_masking and spec.type == 'int' and \
                    spec.num_values is not None:
                # If masking, choose first unmasked action
                mask = auxiliaries[name]['mask']
                choices = tf_util.constant(
                    value=list(range(spec.num_values)), dtype='int',
                    shape=(tuple(1 for _ in spec.shape) + (1, spec.num_values))
                )
                one = tf_util.constant(value=1, dtype='int', shape=(1,))
                multiples = tf.concat(values=(shape, one), axis=0)
                choices = tf.tile(input=choices, multiples=multiples)
                choices = tf.boolean_mask(tensor=choices, mask=mask)
                mask = tf_util.cast(x=mask, dtype='int')
                num_valid = tf.math.reduce_sum(input_tensor=mask, axis=(spec.rank + 1))
                num_valid = tf.reshape(tensor=num_valid, shape=(-1,))
                masked_offset = tf.math.cumsum(x=num_valid, axis=0, exclusive=True)
                action = tf.gather(params=choices, indices=masked_offset)
                actions[name] = tf.reshape(tensor=action, shape=shape)

            elif spec.type != 'bool' and spec.min_value is not None:
                if spec.max_value is not None:
                    # If min/max_value given, choose mean action
                    action = spec.min_value + 0.5 * (spec.max_value - spec.min_value)
                    action = tf_util.constant(value=action, dtype=spec.type)
                    actions[name] = tf.fill(dims=shape, value=action)

                else:
                    # If only min_value given, choose min_value
                    action = tf_util.constant(value=spec.min_value, dtype=spec.type)
                    actions[name] = tf.fill(dims=shape, value=action)

            elif spec.type != 'bool' and spec.max_value is not None:
                # If only max_value given, choose max_value
                action = tf_util.constant(value=spec.max_value, dtype=spec.type)
                actions[name] = tf.fill(dims=shape, value=action)

            else:
                # Else choose zero
                actions[name] = tf_util.zeros(shape=shape, dtype=spec.type)

        return actions, TensorDict()
コード例 #3
0
    def reference(self, *, states, horizons, internals, auxiliaries, actions,
                  policy):
        # deterministic = tf_util.constant(value=True, dtype='bool')
        # return policy.act(
        #     states=states, horizons=horizons, internals=internals, auxiliaries=auxiliaries,
        #     deterministic=deterministic, independent=True
        # )

        return tf_util.zeros(shape=(tf.shape(input=actions.value())[0], ),
                             dtype='float')
コード例 #4
0
        def no_sync():
            next_sync_updated = self.next_sync.assign_sub(delta=one,
                                                          read_value=False)

            with tf.control_dependencies(control_inputs=(next_sync_updated, )):
                deltas = list()
                for variable in variables:
                    delta = tf_util.zeros(shape=tf_util.shape(x=variable),
                                          dtype='float')
                    deltas.append(delta)
                return deltas
コード例 #5
0
ファイル: model.py プロジェクト: maratimus/tensorforce
    def independent_act(self, *, states, internals=None, auxiliaries=None):
        if internals is None:
            assert len(self.internals_spec) == 0
            internals = TensorDict()
        if auxiliaries is None:
            assert len(self.auxiliaries_spec) == 0
            auxiliaries = TensorDict()
        true = tf_util.constant(value=True, dtype='bool')
        batch_size = tf_util.cast(x=tf.shape(input=states.value())[0], dtype='int')

        # Input assertions
        assertions = list()
        if self.config.create_tf_assertions:
            assertions.extend(self.states_spec.tf_assert(
                x=states, batch_size=batch_size,
                message='Agent.independent_act: invalid {issue} for {name} state input.'
            ))
            assertions.extend(self.internals_spec.tf_assert(
                x=internals, batch_size=batch_size,
                message='Agent.independent_act: invalid {issue} for {name} internal input.'
            ))
            assertions.extend(self.auxiliaries_spec.tf_assert(
                x=auxiliaries, batch_size=batch_size,
                message='Agent.independent_act: invalid {issue} for {name} input.'
            ))
            # Mask assertions
            if self.config.enable_int_action_masking:
                for name, spec in self.actions_spec.items():
                    if spec.type == 'int':
                        assertions.append(tf.debugging.assert_equal(
                            x=tf.reduce_all(input_tensor=tf.math.reduce_any(
                                input_tensor=auxiliaries[name]['mask'], axis=(spec.rank + 1)
                            )), y=true,
                            message="Agent.independent_act: at least one action has to be valid."
                        ))

        with tf.control_dependencies(control_inputs=assertions):
            # Core act
            parallel = tf_util.zeros(shape=(1,), dtype='int')
            actions, internals = self.core_act(
                states=states, internals=internals, auxiliaries=auxiliaries, parallel=parallel,
                independent=True
            )
            # Skip action assertions

            # SavedModel requires flattened output
            if len(self.internals_spec) > 0:
                return OrderedDict(TensorDict(actions=actions, internals=internals))
            else:
                return OrderedDict(actions)
コード例 #6
0
ファイル: layer.py プロジェクト: tensorforce/tensorforce
    def iterative_body(self, x, indices, remaining, current_x,
                       current_internals):
        batch_size = tf_util.cast(x=tf.shape(input=current_x)[:1], dtype='int')
        zeros = tf_util.zeros(shape=batch_size, dtype='int')
        ones = tf_util.ones(shape=batch_size, dtype='int')
        batch_size = batch_size[0]

        current_x = tf.gather(params=x, indices=indices)
        next_x, next_internals = self.iterative_apply(
            x=current_x, internals=current_internals)

        with tf.control_dependencies(control_inputs=(current_x, next_x)):
            is_finished = tf.math.equal(x=remaining, y=zeros)
            if isinstance(next_internals, dict):
                for name, current_internal, next_internal in current_internals.zip_items(
                        next_internals):
                    condition = is_finished
                    for _ in range(tf_util.rank(x=current_internal) - 1):
                        condition = tf.expand_dims(input=condition, axis=1)
                    next_internals[name] = tf.where(condition=condition,
                                                    x=current_internal,
                                                    y=next_internal)

            else:
                condition = is_finished
                for _ in range(tf_util.rank(x=current_internals) - 1):
                    condition = tf.expand_dims(input=condition, axis=1)
                next_internals = tf.where(condition=condition,
                                          x=current_internals,
                                          y=next_internals)

            remaining -= tf.where(condition=is_finished, x=zeros, y=ones)
            indices += tf.where(condition=tf.math.equal(x=remaining, y=zeros),
                                x=zeros,
                                y=ones)

        return x, indices, remaining, next_x, next_internals
コード例 #7
0
            def fn_terminal():
                operations = list()

                # Reset internals
                def function(spec, initial):
                    return tf_util.constant(value=initial, dtype=spec.type)

                initials = self.internals_spec.fmap(
                    function=function,
                    cls=TensorDict,
                    zip_values=self.initial_internals)
                for name, previous, initial in self.previous_internals.zip_items(
                        initials):
                    updates = tf.expand_dims(input=initial, axis=0)
                    value = tf.tensor_scatter_nd_update(
                        tensor=previous,
                        indices=expanded_parallel,
                        updates=updates)
                    operations.append(previous.assign(value=value))
                    # sparse_delta = tf.IndexedSlices(values=initial, indices=parallel)
                    # operations.append(previous.scatter_update(sparse_delta=sparse_delta))

                # Episode length/reward summaries (before episode reward reset / episodes increment)
                dependencies = list()
                if self.summaries == 'all' or 'reward' in self.summaries:
                    with self.summarizer.as_default():
                        x = tf.gather(params=self.episode_length,
                                      indices=parallel)
                        dependencies.append(
                            tf.summary.scalar(name='episode-length',
                                              data=x,
                                              step=self.episodes))
                        x = tf.gather(params=self.episode_reward,
                                      indices=parallel)
                        dependencies.append(
                            tf.summary.scalar(name='episode-reward',
                                              data=x,
                                              step=self.episodes))

                # Reset episode length/reward
                with tf.control_dependencies(control_inputs=dependencies):
                    zeros = tf_util.zeros(shape=(1, ), dtype='int')
                    value = tf.tensor_scatter_nd_update(
                        tensor=self.episode_length,
                        indices=expanded_parallel,
                        updates=zeros)
                    operations.append(self.episode_length.assign(value=value))
                    # sparse_delta = tf.IndexedSlices(values=zero, indices=parallel)
                    # operations.append(self.episode_length.scatter_update(sparse_delta=sparse_delta))
                    zeros = tf_util.zeros(shape=(1, ), dtype='float')
                    value = tf.tensor_scatter_nd_update(
                        tensor=self.episode_reward,
                        indices=expanded_parallel,
                        updates=zeros)
                    operations.append(self.episode_reward.assign(value=value))
                    # zero_float = tf_util.constant(value=0.0, dtype='float')
                    # sparse_delta = tf.IndexedSlices(values=zero_float, indices=parallel)
                    # operations.append(self.episode_reward.scatter_update(sparse_delta=sparse_delta))

                # Increment episodes counter
                operations.append(
                    self.episodes.assign_add(delta=one, read_value=False))

                return tf.group(*operations)
コード例 #8
0
    def variable(self,
                 *,
                 name,
                 spec,
                 initializer,
                 is_trainable,
                 is_saved,
                 initialization_scale=None):
        assert self.is_initialized is False
        # name
        if not isinstance(name, str):
            raise TensorforceError.type(name='variable',
                                        argument='name',
                                        dtype=type(name))
        # spec
        if not isinstance(spec, TensorSpec):
            raise TensorforceError.dtype(name='variable',
                                         argument='spec',
                                         dtype=type(spec))
        if spec.is_underspecified():
            raise TensorforceError.value(name='variable',
                                         argument='spec',
                                         value=spec,
                                         hint='underspecified')
        # initializer
        initializer_names = ('constant', 'normal', 'normal-relu', 'ones',
                             'orthogonal', 'orthogonal-relu', 'zeros')
        if not isinstance(initializer, (spec.py_type(), np.ndarray, tf.Tensor)) and \
                initializer not in initializer_names:
            raise TensorforceError.value(name='variable',
                                         argument='initializer',
                                         value=initializer)
        elif isinstance(initializer,
                        np.ndarray) and initializer.dtype != spec.np_type():
            raise TensorforceError.type(name='variable',
                                        argument='initializer',
                                        dtype=initializer.dtype)
        elif isinstance(
                initializer,
                tf.Tensor) and tf_util.dtype(x=initializer) != spec.tf_type():
            raise TensorforceError.type(name='variable',
                                        argument='initializer',
                                        dtype=tf_util.dtype(x=initializer))
        # initialization_scale
        if initialization_scale is not None:
            if isinstance(initializer, (spec.py_type(), np.ndarray, tf.Tensor)) or \
                    initializer not in ('constant', 'orthogonal', 'orthogonal-relu'):
                raise TensorforceError.invalid(
                    name='variable',
                    argument='initialization_scale',
                    condition='initializer not orthogonal')
            elif not isinstance(initialization_scale, spec.py_type()):
                raise TensorforceError.type(name='variable',
                                            argument='initialization_scale',
                                            dtype=type(initialization_scale),
                                            hint='!= float')
        # is_trainable
        if not isinstance(is_trainable, bool):
            raise TensorforceError.type(name='variable',
                                        argument='is_trainable',
                                        dtype=type(is_trainable))
        elif is_trainable and spec.type != 'float':
            raise TensorforceError.value(name='variable',
                                         argument='is_trainable',
                                         value=is_trainable,
                                         condition='spec.type != float')
        # is_saved
        if not isinstance(is_saved, bool):
            raise TensorforceError.type(name='variable',
                                        argument='is_saved',
                                        dtype=type(is_saved))

        # Variable initializer
        if isinstance(initializer, spec.py_type()):
            initializer = tf_util.constant(value=initializer,
                                           dtype=spec.type,
                                           shape=spec.shape)
        elif isinstance(initializer, np.ndarray):
            if initializer.shape != spec.shape:
                raise TensorforceError.mismatch(name='Module.variable',
                                                value1='shape',
                                                value2='initializer')
            initializer = tf_util.constant(value=initializer, dtype=spec.type)
        elif isinstance(initializer, tf.Tensor):
            if tf_util.shape(x=initializer) != spec.shape:
                raise TensorforceError.mismatch(name='Module.variable',
                                                value1='shape',
                                                value2='initializer')
            initializer = initializer
        elif not isinstance(initializer, str):
            raise TensorforceError(
                "Invalid variable initializer: {}".format(initializer))
        elif initializer.startswith('normal'):
            if spec.type != 'float':
                raise TensorforceError(
                    message=
                    "Invalid variable initializer value for non-float variable: {}."
                    .format(initializer))
            if initializer.endswith('-relu'):
                stddev = min(0.1,
                             np.sqrt(2.0 / util.product(xs=spec.shape[:-1])))
            else:
                stddev = min(
                    0.1,
                    np.sqrt(
                        2.0 /
                        (util.product(xs=spec.shape[:-1]) + spec.shape[-1])))
            initializer = tf.random.normal(shape=spec.shape,
                                           stddev=stddev,
                                           dtype=spec.tf_type())
        elif initializer.startswith('orthogonal'):
            if spec.type != 'float':
                raise TensorforceError(
                    message=
                    "Invalid variable initializer value for non-float variable: {}."
                    .format(initializer))
            if spec.rank < 2:
                raise TensorforceError(
                    message=
                    "Invalid variable initializer value for 0/1-rank variable: {}."
                    .format(initializer))
            normal = np.random.normal(size=(util.product(xs=spec.shape[:-1]),
                                            spec.shape[-1]))
            u, _, v = np.linalg.svd(a=normal, full_matrices=False)
            orthogonal = u if u.shape[1] == spec.shape[-1] else v
            if initializer.endswith('-relu'):
                orthogonal = orthogonal * np.sqrt(2.0)
            if initialization_scale is not None and initialization_scale != 1.0:
                if initialization_scale <= 0.0:
                    raise TensorforceError.value(
                        name='variable',
                        argument='initialization_scale',
                        value=initialization_scale,
                        hint='<= 0.0')
                orthogonal = orthogonal * initialization_scale
            initializer = tf_util.constant(value=orthogonal.reshape(
                spec.shape),
                                           dtype=spec.type)
        elif initializer == 'zeros':
            initializer = tf_util.zeros(shape=spec.shape, dtype=spec.type)
        elif initializer == 'ones':
            initializer = tf_util.ones(shape=spec.shape, dtype=spec.type)
        elif initializer == 'constant':
            initializer = tf.fill(dims=spec.shape,
                                  value=tf_util.constant(
                                      value=initialization_scale,
                                      dtype=spec.type))

        # Variable
        variable = tf.Variable(initial_value=initializer,
                               trainable=is_trainable,
                               validate_shape=True,
                               name=name,
                               dtype=spec.tf_type(),
                               shape=spec.shape)
        variable.is_saved = is_saved

        return variable
コード例 #9
0
    def apply(self, *, x, horizons, internals):
        zero = tf_util.constant(value=0, dtype='int')
        one = tf_util.constant(value=1, dtype='int')
        batch_size = tf_util.cast(x=tf.shape(input=horizons)[0], dtype='int')
        zeros = tf_util.zeros(shape=(batch_size, ), dtype='int')
        ones = tf_util.ones(shape=(batch_size, ), dtype='int')

        # including 0th step
        horizon = self.horizon.value() + one
        # in case of longer horizon than necessary (e.g. main vs baseline policy)
        starts = horizons[:, 0] + tf.maximum(x=(horizons[:, 1] - horizon),
                                             y=zeros)
        lengths = horizons[:, 1] - tf.maximum(x=(horizons[:, 1] - horizon),
                                              y=zeros)
        horizon = tf.minimum(x=horizon,
                             y=tf.math.reduce_max(input_tensor=lengths,
                                                  axis=0))
        output_spec = self.output_spec()

        if self.temporal_processing == 'cumulative':
            if self.horizon.is_constant(value=0):
                x = self.iterative_apply(xs=x, lengths=ones)

            else:

                def body(x, indices, remaining, xs):
                    current_x = tf.gather(params=x, indices=indices)
                    current_x = tf.expand_dims(input=current_x, axis=1)
                    xs = tf.concat(values=(xs, current_x), axis=1)
                    remaining -= tf.where(condition=tf.math.equal(x=remaining,
                                                                  y=zeros),
                                          x=zeros,
                                          y=ones)
                    indices += tf.where(condition=tf.math.equal(x=remaining,
                                                                y=zeros),
                                        x=zeros,
                                        y=ones)
                    return x, indices, remaining, xs

                initial_xs = tf_util.zeros(shape=((batch_size, 0) +
                                                  output_spec.shape),
                                           dtype=output_spec.type)

                _, final_indices, final_remaining, xs = tf.while_loop(
                    cond=tf_util.always_true,
                    body=body,
                    loop_vars=(x, starts, lengths, initial_xs),
                    maximum_iterations=tf_util.int64(x=horizon))

                x = self.cumulative_apply(xs=xs, lengths=lengths)

        elif self.temporal_processing == 'iterative':
            if self.horizon.is_constant(value=0):
                x, final_internals = self.iterative_apply(x=x,
                                                          internals=internals)

            else:
                initial_x = tf_util.zeros(shape=((batch_size, ) +
                                                 output_spec.shape),
                                          dtype=output_spec.type)

                signature = self.input_signature(function='iterative_body')
                internals = signature['current_internals'].kwargs_to_args(
                    kwargs=internals)
                _, final_indices, final_remaining, x, final_internals = tf.while_loop(
                    cond=tf_util.always_true,
                    body=self.iterative_body,
                    loop_vars=(x, starts, lengths, initial_x, internals),
                    maximum_iterations=tf_util.int32(x=horizon))
                internals = signature['current_internals'].args_to_kwargs(
                    args=final_internals)

        assertions = list()
        if self.config.create_tf_assertions:
            assertions.append(
                tf.debugging.assert_equal(x=final_indices,
                                          y=(tf.math.cumsum(x=lengths) -
                                             ones)))
            assertions.append(
                tf.debugging.assert_equal(
                    x=tf.math.reduce_sum(input_tensor=final_remaining),
                    y=zero))

        with tf.control_dependencies(control_inputs=assertions):
            if self.temporal_processing == 'cumulative':
                return tf_util.identity(input=super().apply(x=x))
            elif self.temporal_processing == 'iterative':
                return tf_util.identity(input=super().apply(x=x)), internals