コード例 #1
0
ファイル: layer.py プロジェクト: tensorforce/tensorforce
    def iterative_body(self, x, indices, remaining, current_x,
                       current_internals):
        batch_size = tf_util.cast(x=tf.shape(input=current_x)[:1], dtype='int')
        zeros = tf_util.zeros(shape=batch_size, dtype='int')
        ones = tf_util.ones(shape=batch_size, dtype='int')
        batch_size = batch_size[0]

        current_x = tf.gather(params=x, indices=indices)
        next_x, next_internals = self.iterative_apply(
            x=current_x, internals=current_internals)

        with tf.control_dependencies(control_inputs=(current_x, next_x)):
            is_finished = tf.math.equal(x=remaining, y=zeros)
            if isinstance(next_internals, dict):
                for name, current_internal, next_internal in current_internals.zip_items(
                        next_internals):
                    condition = is_finished
                    for _ in range(tf_util.rank(x=current_internal) - 1):
                        condition = tf.expand_dims(input=condition, axis=1)
                    next_internals[name] = tf.where(condition=condition,
                                                    x=current_internal,
                                                    y=next_internal)

            else:
                condition = is_finished
                for _ in range(tf_util.rank(x=current_internals) - 1):
                    condition = tf.expand_dims(input=condition, axis=1)
                next_internals = tf.where(condition=condition,
                                          x=current_internals,
                                          y=next_internals)

            remaining -= tf.where(condition=is_finished, x=zeros, y=ones)
            indices += tf.where(condition=tf.math.equal(x=remaining, y=zeros),
                                x=zeros,
                                y=ones)

        return x, indices, remaining, next_x, next_internals
コード例 #2
0
    def variable(self,
                 *,
                 name,
                 spec,
                 initializer,
                 is_trainable,
                 is_saved,
                 initialization_scale=None):
        assert self.is_initialized is False
        # name
        if not isinstance(name, str):
            raise TensorforceError.type(name='variable',
                                        argument='name',
                                        dtype=type(name))
        # spec
        if not isinstance(spec, TensorSpec):
            raise TensorforceError.dtype(name='variable',
                                         argument='spec',
                                         dtype=type(spec))
        if spec.is_underspecified():
            raise TensorforceError.value(name='variable',
                                         argument='spec',
                                         value=spec,
                                         hint='underspecified')
        # initializer
        initializer_names = ('constant', 'normal', 'normal-relu', 'ones',
                             'orthogonal', 'orthogonal-relu', 'zeros')
        if not isinstance(initializer, (spec.py_type(), np.ndarray, tf.Tensor)) and \
                initializer not in initializer_names:
            raise TensorforceError.value(name='variable',
                                         argument='initializer',
                                         value=initializer)
        elif isinstance(initializer,
                        np.ndarray) and initializer.dtype != spec.np_type():
            raise TensorforceError.type(name='variable',
                                        argument='initializer',
                                        dtype=initializer.dtype)
        elif isinstance(
                initializer,
                tf.Tensor) and tf_util.dtype(x=initializer) != spec.tf_type():
            raise TensorforceError.type(name='variable',
                                        argument='initializer',
                                        dtype=tf_util.dtype(x=initializer))
        # initialization_scale
        if initialization_scale is not None:
            if isinstance(initializer, (spec.py_type(), np.ndarray, tf.Tensor)) or \
                    initializer not in ('constant', 'orthogonal', 'orthogonal-relu'):
                raise TensorforceError.invalid(
                    name='variable',
                    argument='initialization_scale',
                    condition='initializer not orthogonal')
            elif not isinstance(initialization_scale, spec.py_type()):
                raise TensorforceError.type(name='variable',
                                            argument='initialization_scale',
                                            dtype=type(initialization_scale),
                                            hint='!= float')
        # is_trainable
        if not isinstance(is_trainable, bool):
            raise TensorforceError.type(name='variable',
                                        argument='is_trainable',
                                        dtype=type(is_trainable))
        elif is_trainable and spec.type != 'float':
            raise TensorforceError.value(name='variable',
                                         argument='is_trainable',
                                         value=is_trainable,
                                         condition='spec.type != float')
        # is_saved
        if not isinstance(is_saved, bool):
            raise TensorforceError.type(name='variable',
                                        argument='is_saved',
                                        dtype=type(is_saved))

        # Variable initializer
        if isinstance(initializer, spec.py_type()):
            initializer = tf_util.constant(value=initializer,
                                           dtype=spec.type,
                                           shape=spec.shape)
        elif isinstance(initializer, np.ndarray):
            if initializer.shape != spec.shape:
                raise TensorforceError.mismatch(name='Module.variable',
                                                value1='shape',
                                                value2='initializer')
            initializer = tf_util.constant(value=initializer, dtype=spec.type)
        elif isinstance(initializer, tf.Tensor):
            if tf_util.shape(x=initializer) != spec.shape:
                raise TensorforceError.mismatch(name='Module.variable',
                                                value1='shape',
                                                value2='initializer')
            initializer = initializer
        elif not isinstance(initializer, str):
            raise TensorforceError(
                "Invalid variable initializer: {}".format(initializer))
        elif initializer.startswith('normal'):
            if spec.type != 'float':
                raise TensorforceError(
                    message=
                    "Invalid variable initializer value for non-float variable: {}."
                    .format(initializer))
            if initializer.endswith('-relu'):
                stddev = min(0.1,
                             np.sqrt(2.0 / util.product(xs=spec.shape[:-1])))
            else:
                stddev = min(
                    0.1,
                    np.sqrt(
                        2.0 /
                        (util.product(xs=spec.shape[:-1]) + spec.shape[-1])))
            initializer = tf.random.normal(shape=spec.shape,
                                           stddev=stddev,
                                           dtype=spec.tf_type())
        elif initializer.startswith('orthogonal'):
            if spec.type != 'float':
                raise TensorforceError(
                    message=
                    "Invalid variable initializer value for non-float variable: {}."
                    .format(initializer))
            if spec.rank < 2:
                raise TensorforceError(
                    message=
                    "Invalid variable initializer value for 0/1-rank variable: {}."
                    .format(initializer))
            normal = np.random.normal(size=(util.product(xs=spec.shape[:-1]),
                                            spec.shape[-1]))
            u, _, v = np.linalg.svd(a=normal, full_matrices=False)
            orthogonal = u if u.shape[1] == spec.shape[-1] else v
            if initializer.endswith('-relu'):
                orthogonal = orthogonal * np.sqrt(2.0)
            if initialization_scale is not None and initialization_scale != 1.0:
                if initialization_scale <= 0.0:
                    raise TensorforceError.value(
                        name='variable',
                        argument='initialization_scale',
                        value=initialization_scale,
                        hint='<= 0.0')
                orthogonal = orthogonal * initialization_scale
            initializer = tf_util.constant(value=orthogonal.reshape(
                spec.shape),
                                           dtype=spec.type)
        elif initializer == 'zeros':
            initializer = tf_util.zeros(shape=spec.shape, dtype=spec.type)
        elif initializer == 'ones':
            initializer = tf_util.ones(shape=spec.shape, dtype=spec.type)
        elif initializer == 'constant':
            initializer = tf.fill(dims=spec.shape,
                                  value=tf_util.constant(
                                      value=initialization_scale,
                                      dtype=spec.type))

        # Variable
        variable = tf.Variable(initial_value=initializer,
                               trainable=is_trainable,
                               validate_shape=True,
                               name=name,
                               dtype=spec.tf_type(),
                               shape=spec.shape)
        variable.is_saved = is_saved

        return variable
コード例 #3
0
    def apply(self, *, x, horizons, internals):
        zero = tf_util.constant(value=0, dtype='int')
        one = tf_util.constant(value=1, dtype='int')
        batch_size = tf_util.cast(x=tf.shape(input=horizons)[0], dtype='int')
        zeros = tf_util.zeros(shape=(batch_size, ), dtype='int')
        ones = tf_util.ones(shape=(batch_size, ), dtype='int')

        # including 0th step
        horizon = self.horizon.value() + one
        # in case of longer horizon than necessary (e.g. main vs baseline policy)
        starts = horizons[:, 0] + tf.maximum(x=(horizons[:, 1] - horizon),
                                             y=zeros)
        lengths = horizons[:, 1] - tf.maximum(x=(horizons[:, 1] - horizon),
                                              y=zeros)
        horizon = tf.minimum(x=horizon,
                             y=tf.math.reduce_max(input_tensor=lengths,
                                                  axis=0))
        output_spec = self.output_spec()

        if self.temporal_processing == 'cumulative':
            if self.horizon.is_constant(value=0):
                x = self.iterative_apply(xs=x, lengths=ones)

            else:

                def body(x, indices, remaining, xs):
                    current_x = tf.gather(params=x, indices=indices)
                    current_x = tf.expand_dims(input=current_x, axis=1)
                    xs = tf.concat(values=(xs, current_x), axis=1)
                    remaining -= tf.where(condition=tf.math.equal(x=remaining,
                                                                  y=zeros),
                                          x=zeros,
                                          y=ones)
                    indices += tf.where(condition=tf.math.equal(x=remaining,
                                                                y=zeros),
                                        x=zeros,
                                        y=ones)
                    return x, indices, remaining, xs

                initial_xs = tf_util.zeros(shape=((batch_size, 0) +
                                                  output_spec.shape),
                                           dtype=output_spec.type)

                _, final_indices, final_remaining, xs = tf.while_loop(
                    cond=tf_util.always_true,
                    body=body,
                    loop_vars=(x, starts, lengths, initial_xs),
                    maximum_iterations=tf_util.int64(x=horizon))

                x = self.cumulative_apply(xs=xs, lengths=lengths)

        elif self.temporal_processing == 'iterative':
            if self.horizon.is_constant(value=0):
                x, final_internals = self.iterative_apply(x=x,
                                                          internals=internals)

            else:
                initial_x = tf_util.zeros(shape=((batch_size, ) +
                                                 output_spec.shape),
                                          dtype=output_spec.type)

                signature = self.input_signature(function='iterative_body')
                internals = signature['current_internals'].kwargs_to_args(
                    kwargs=internals)
                _, final_indices, final_remaining, x, final_internals = tf.while_loop(
                    cond=tf_util.always_true,
                    body=self.iterative_body,
                    loop_vars=(x, starts, lengths, initial_x, internals),
                    maximum_iterations=tf_util.int32(x=horizon))
                internals = signature['current_internals'].args_to_kwargs(
                    args=final_internals)

        assertions = list()
        if self.config.create_tf_assertions:
            assertions.append(
                tf.debugging.assert_equal(x=final_indices,
                                          y=(tf.math.cumsum(x=lengths) -
                                             ones)))
            assertions.append(
                tf.debugging.assert_equal(
                    x=tf.math.reduce_sum(input_tensor=final_remaining),
                    y=zero))

        with tf.control_dependencies(control_inputs=assertions):
            if self.temporal_processing == 'cumulative':
                return tf_util.identity(input=super().apply(x=x))
            elif self.temporal_processing == 'iterative':
                return tf_util.identity(input=super().apply(x=x)), internals