コード例 #1
0
        def apply_sync():
            dependencies = list()
            if not self.sync_frequency.is_constant(value=1):
                dependencies.append(
                    self.next_sync.assign(value=self.sync_frequency.value(),
                                          read_value=False))

            with tf.control_dependencies(control_inputs=dependencies):
                deltas = list()
                assignments = list()
                if self.update_weight.is_constant(value=1.0):
                    for source_var, target_var in zip(source_variables,
                                                      variables):
                        deltas.append(source_var - target_var)
                        assignments.append(
                            target_var.assign(value=source_var,
                                              read_value=False))
                else:
                    update_weight = self.update_weight.value()
                    for source_var, target_var in zip(source_variables,
                                                      variables):
                        delta = update_weight * (source_var - target_var)
                        deltas.append(delta)
                        assignments.append(
                            target_var.assign_add(delta=delta,
                                                  read_value=False))

            with tf.control_dependencies(control_inputs=assignments):
                # Trivial operation to enforce control dependency
                return [tf_util.identity(input=delta) for delta in deltas]
コード例 #2
0
    def value(self):
        if self.unit is None:
            step = None
        else:
            step = self.root.units[self.unit]

        parameter = self.parameter_value(step=step)

        dependencies = self.spec.tf_assert(
            x=parameter,
            include_type_shape=True,
            message='Parameter.value: invalid {{issue}} for {name} value.'.
            format(name=self.name))

        name = 'parameters/' + self.name
        if self.unit is None:
            step = 'timesteps'
        else:
            step = self.unit
        dependencies.extend(
            self.summary(label='parameters',
                         name=name,
                         data=parameter,
                         step=step))

        with tf.control_dependencies(control_inputs=dependencies):
            return tf_util.identity(input=parameter)
コード例 #3
0
    def step(self, *, arguments, variables, **kwargs):
        deltas = self.optimizer.step(arguments=arguments, variables=variables, **kwargs)

        with tf.control_dependencies(control_inputs=deltas):
            threshold = self.threshold.value()
            if self.mode == 'global_norm':
                clipped_deltas, update_norm = tf.clip_by_global_norm(
                    t_list=deltas, clip_norm=threshold
                )
            else:
                clipped_deltas = list()
                for delta in deltas:
                    if self.mode == 'norm':
                        clipped_delta = tf.clip_by_norm(t=delta, clip_norm=threshold)
                    elif self.mode == 'value':
                        clipped_delta = tf.clip_by_value(
                            t=delta, clip_value_min=-threshold, clip_value_max=threshold
                        )
                    clipped_deltas.append(clipped_delta)

                def update_norm():
                    return tf.linalg.global_norm(t_list=deltas)

            dependencies = self.summary(
                label='update-norm', name='unclipped-norm', data=update_norm, step='updates'
            )

            for variable, delta, clipped_delta in zip(variables, deltas, clipped_deltas):
                dependencies.append(
                    variable.assign_add(delta=(clipped_delta - delta), read_value=False)
                )

        with tf.control_dependencies(control_inputs=dependencies):
            return [tf_util.identity(input=delta) for delta in clipped_deltas]
コード例 #4
0
    def apply(self, *, x):
        assertions = list()
        if self.config.create_tf_assertions:
            assertions.append(tf.debugging.assert_less_equal(
                x=tf.shape(input=x)[0], y=1,
                message="Deltafier preprocessor currently not compatible with batched Agent.act."
            ))

        # TODO: hack for empty batch (for self.previous.assign below)
        extended = tf.concat(values=(self.previous, x), axis=0)

        def first_delta():
            assignment = self.has_previous.assign(
                value=tf_util.constant(value=True, dtype='bool'), read_value=False
            )
            with tf.control_dependencies(control_inputs=(assignment,)):
                return tf.concat(values=(tf.zeros_like(input=x[:1]), x[1:] - x[:-1]), axis=0)

        def later_delta():
            return x - extended[:-1]

        with tf.control_dependencies(control_inputs=assertions):
            empty_batch = tf.math.equal(x=tf.shape(input=x)[0], y=0)
            pred = tf.math.logical_or(x=self.has_previous, y=empty_batch)
            delta = tf.cond(pred=pred, true_fn=later_delta, false_fn=first_delta)

            assignment = self.previous.assign(value=extended[-1:], read_value=False)

        with tf.control_dependencies(control_inputs=(assignment,)):
            if self.concatenate is False:
                return tf_util.identity(input=delta)
            else:
                return tf.concat(values=(x, delta), axis=(self.concatenate + 1))
コード例 #5
0
ファイル: evolutionary.py プロジェクト: maratimus/tensorforce
    def step(self, *, arguments, variables, fn_loss, **kwargs):
        learning_rate = self.learning_rate.value()

        unperturbed_loss = fn_loss(**arguments.to_kwargs())

        deltas = [tf.zeros_like(input=variable) for variable in variables]
        previous_perturbations = [
            tf.zeros_like(input=variable) for variable in variables
        ]

        def body(deltas, previous_perturbations):
            with tf.control_dependencies(control_inputs=deltas):
                perturbations = [
                    learning_rate *
                    tf.random.normal(shape=tf_util.shape(x=variable),
                                     dtype=tf_util.get_dtype(type='float'))
                    for variable in variables
                ]
                perturbation_deltas = [
                    pert - prev_pert for pert, prev_pert in zip(
                        perturbations, previous_perturbations)
                ]
                assignments = list()
                for variable, delta in zip(variables, perturbation_deltas):
                    assignments.append(
                        variable.assign_add(delta=delta, read_value=False))

            with tf.control_dependencies(control_inputs=assignments):
                perturbed_loss = fn_loss(**arguments.to_kwargs())
                direction = tf.math.sign(x=(unperturbed_loss - perturbed_loss))
                deltas = [
                    delta + direction * perturbation
                    for delta, perturbation in zip(deltas, perturbations)
                ]

            return deltas, perturbations

        num_samples = self.num_samples.value()
        deltas, perturbations = tf.while_loop(
            cond=tf_util.always_true,
            body=body,
            loop_vars=(deltas, previous_perturbations),
            maximum_iterations=tf_util.int32(x=num_samples))

        with tf.control_dependencies(control_inputs=deltas):
            num_samples = tf_util.cast(x=num_samples, dtype='float')
            deltas = [delta / num_samples for delta in deltas]
            perturbation_deltas = [
                delta - pert for delta, pert in zip(deltas, perturbations)
            ]
            assignments = list()
            for variable, delta in zip(variables, perturbation_deltas):
                assignments.append(
                    variable.assign_add(delta=delta, read_value=False))

        with tf.control_dependencies(control_inputs=assignments):
            # Trivial operation to enforce control dependency
            return [tf_util.identity(input=delta) for delta in deltas]
コード例 #6
0
ファイル: preprocessor.py プロジェクト: maratimus/tensorforce
    def reset(self):
        operations = list()

        for layer in self.layers:
            if isinstance(layer, PreprocessingLayer):
                operations.append(layer.reset())

        if len(operations) > 0:
            return tf_util.identity(input=operations[0])
        else:
            return tf_util.constant(value=False, dtype='bool')
コード例 #7
0
            def not_empty_batch():

                def first_timestep():
                    assignment = self.has_previous.assign(
                        value=tf_util.constant(value=True, dtype='bool'), read_value=False
                    )
                    with tf.control_dependencies(control_inputs=(assignment,)):
                        if self.concatenate:
                            current = x
                        else:
                            current = tf.expand_dims(input=x, axis=(self.axis + 1))
                        multiples = tuple(
                            self.length if dims == self.axis + 1 else 1
                            for dims in range(self.output_spec().rank + 1)
                        )
                        return tf.tile(input=current, multiples=multiples)

                def other_timesteps():
                    if self.concatenate:
                        current = x
                    else:
                        current = tf.expand_dims(input=x, axis=(self.axis + 1))
                    return tf.concat(values=(self.previous, current), axis=(self.axis + 1))

                xs = tf.cond(
                    pred=self.has_previous, true_fn=other_timesteps, false_fn=first_timestep
                )

                if self.concatenate:
                    begin = tuple(
                        self.input_spec.shape[dims - 1] if dims == self.axis + 1 else 0
                        for dims in range(self.output_spec().rank + 1)
                    )
                else:
                    begin = tuple(
                        1 if dims == self.axis + 1 else 0
                        for dims in range(self.output_spec().rank + 1)
                    )
                assignment = self.previous.assign(
                    value=tf.slice(input_=xs, begin=begin, size=self.previous.shape),
                    read_value=False
                )

                with tf.control_dependencies(control_inputs=(assignment,)):
                    return tf_util.identity(input=xs)
コード例 #8
0
        def apply_sync():
            next_sync_updated = self.next_sync.assign(
                value=self.sync_frequency.value(), read_value=False)

            with tf.control_dependencies(control_inputs=(next_sync_updated, )):
                update_weight = self.update_weight.value()
                deltas = list()
                assignments = list()
                for source_variable, target_variable in zip(
                        source_variables, variables):
                    delta = update_weight * (source_variable - target_variable)
                    deltas.append(delta)
                    assignments.append(
                        target_variable.assign_add(delta=delta,
                                                   read_value=False))

            with tf.control_dependencies(control_inputs=assignments):
                # Trivial operation to enforce control dependency
                return [tf_util.identity(input=delta) for delta in deltas]
コード例 #9
0
        def apply_step():
            # lambda = sqrt(c' / c)
            lagrange_multiplier = tf.math.sqrt(x=(constant / learning_rate))

            # delta = delta' / lambda  (zero prevented via tf.cond pred below)
            estimated_deltas = deltas.fmap(function=(lambda delta: delta / lagrange_multiplier))

            # Apply natural gradient improvement.
            assignments = list()
            for variable, delta in zip(variables, estimated_deltas.values()):
                assignments.append(variable.assign_add(delta=delta, read_value=False))

            with tf.control_dependencies(control_inputs=assignments):
                if return_estimated_improvement:
                    # improvement = grad(loss) * delta  (= loss_new - loss_old)
                    estimated_improvement = tf.math.add_n(inputs=[
                        tf.math.reduce_sum(input_tensor=(loss_grad * delta))
                        for loss_grad, delta in zip(loss_gradients, estimated_deltas.values())
                    ])

                    return list(estimated_deltas.values()), estimated_improvement
                else:
                    # Trivial operation to enforce control dependency
                    return [tf_util.identity(input=delta) for delta in estimated_deltas.values()]
コード例 #10
0
    def step(self, *, arguments, variables, fn_loss, **kwargs):
        learning_rate = self.learning_rate.value()

        unperturbed_loss = fn_loss(**arguments.to_kwargs())

        if self.num_samples.is_constant(value=1):
            deltas = list()
            for variable in variables:
                delta = tf.random.normal(shape=variable.shape,
                                         dtype=variable.dtype)
                if variable.dtype == tf_util.get_dtype(type='float'):
                    deltas.append(learning_rate * delta)
                else:
                    deltas.append(
                        tf.cast(x=learning_rate, dtype=variable.dtype) * delta)

            assignments = list()
            for variable, delta in zip(variables, deltas):
                assignments.append(
                    variable.assign_add(delta=delta, read_value=False))

            with tf.control_dependencies(control_inputs=assignments):
                perturbed_loss = fn_loss(**arguments.to_kwargs())

                def negate_deltas():
                    neg_two_float = tf_util.constant(value=-2.0, dtype='float')
                    assignments = list()
                    for variable, delta in zip(variables, deltas):
                        if variable.dtype == tf_util.get_dtype(type='float'):
                            assignments.append(
                                variable.assign_add(delta=(neg_two_float *
                                                           delta),
                                                    read_value=False))
                        else:
                            _ng_two_float = tf.constant(value=-2.0,
                                                        dtype=variable.dtype)
                            assignments.append(
                                variable.assign_add(delta=(_ng_two_float *
                                                           delta),
                                                    read_value=False))

                    with tf.control_dependencies(control_inputs=assignments):
                        return [tf.math.negative(x=delta) for delta in deltas]

                return tf.cond(pred=(perturbed_loss < unperturbed_loss),
                               true_fn=(lambda: deltas),
                               false_fn=negate_deltas)

        else:
            deltas = [tf.zeros_like(input=variable) for variable in variables]
            previous_perturbations = [
                tf.zeros_like(input=variable) for variable in variables
            ]

            def body(deltas, previous_perturbations):
                with tf.control_dependencies(control_inputs=deltas):
                    perturbations = list()
                    for variable in variables:
                        perturbation = tf.random.normal(shape=variable.shape,
                                                        dtype=variable.dtype)
                        if variable.dtype == tf_util.get_dtype(type='float'):
                            perturbations.append(learning_rate * perturbation)
                        else:
                            perturbations.append(
                                tf.cast(x=learning_rate, dtype=variable.dtype)
                                * perturbation)

                    perturbation_deltas = [
                        pert - prev_pert for pert, prev_pert in zip(
                            perturbations, previous_perturbations)
                    ]
                    assignments = list()
                    for variable, delta in zip(variables, perturbation_deltas):
                        assignments.append(
                            variable.assign_add(delta=delta, read_value=False))

                with tf.control_dependencies(control_inputs=assignments):
                    perturbed_loss = fn_loss(**arguments.to_kwargs())

                    one_float = tf_util.constant(value=1.0, dtype='float')
                    neg_one_float = tf_util.constant(value=-1.0, dtype='float')
                    direction = tf.where(
                        condition=(perturbed_loss < unperturbed_loss),
                        x=one_float,
                        y=neg_one_float)

                    next_deltas = list()
                    for variable, delta, perturbation in zip(
                            variables, deltas, perturbations):
                        if variable.dtype == tf_util.get_dtype(type='float'):
                            next_deltas.append(delta +
                                               direction * perturbation)
                        else:
                            next_deltas.append(
                                delta +
                                tf.cast(x=direction, dtype=variable.dtype) *
                                perturbation)

                return next_deltas, perturbations

            num_samples = self.num_samples.value()
            deltas, perturbations = tf.while_loop(
                cond=tf_util.always_true,
                body=body,
                loop_vars=(deltas, previous_perturbations),
                maximum_iterations=tf_util.int32(x=num_samples))

            with tf.control_dependencies(control_inputs=deltas):
                num_samples = tf_util.cast(x=num_samples, dtype='float')
                deltas = [delta / num_samples for delta in deltas]

                perturbation_deltas = [
                    delta - pert for delta, pert in zip(deltas, perturbations)
                ]
                assignments = list()
                for variable, delta in zip(variables, perturbation_deltas):
                    assignments.append(
                        variable.assign_add(delta=delta, read_value=False))

            with tf.control_dependencies(control_inputs=assignments):
                # Trivial operation to enforce control dependency
                return [tf_util.identity(input=delta) for delta in deltas]
コード例 #11
0
    def observe(self, *, terminal, reward, parallel):
        zero = tf_util.constant(value=0, dtype='int')
        one = tf_util.constant(value=1, dtype='int')
        batch_size = tf_util.cast(x=tf.shape(input=terminal)[0], dtype='int')
        expanded_parallel = tf.expand_dims(input=tf.expand_dims(input=parallel,
                                                                axis=0),
                                           axis=1)
        is_terminal = tf.math.greater(x=terminal[-1], y=zero)

        # Input assertions
        assertions = list()
        if self.config.create_tf_assertions:
            assertions.extend(
                self.terminal_spec.tf_assert(
                    x=terminal,
                    batch_size=batch_size,
                    message='Agent.observe: invalid {issue} for terminal input.'
                ))
            assertions.extend(
                self.reward_spec.tf_assert(
                    x=reward,
                    batch_size=batch_size,
                    message='Agent.observe: invalid {issue} for terminal input.'
                ))
            assertions.extend(
                self.parallel_spec.tf_assert(
                    x=parallel,
                    message='Agent.observe: invalid {issue} for parallel input.'
                ))
            # Assertion: at most one terminal
            num_terms = tf.math.count_nonzero(
                input=terminal, dtype=tf_util.get_dtype(type='int'))
            assertions.append(
                tf.debugging.assert_less_equal(
                    x=num_terms,
                    y=one,
                    message=
                    "Agent.observe: input contains more than one terminal."))
            # Assertion: if terminal, last timestep in batch
            assertions.append(
                tf.debugging.assert_equal(
                    x=tf.math.greater(x=num_terms, y=zero),
                    y=is_terminal,
                    message=
                    "Agent.observe: terminal is not the last input timestep."))

        with tf.control_dependencies(control_inputs=assertions):
            dependencies = list()

            # Reward summary
            if self.summaries == 'all' or 'reward' in self.summaries:
                with self.summarizer.as_default():
                    x = tf.math.reduce_mean(input_tensor=reward)
                    dependencies.append(
                        tf.summary.scalar(name='reward',
                                          data=x,
                                          step=self.timesteps))

            # Update episode length/reward
            updates = tf.expand_dims(input=batch_size, axis=0)
            value = tf.tensor_scatter_nd_add(tensor=self.episode_length,
                                             indices=expanded_parallel,
                                             updates=updates)
            dependencies.append(self.episode_length.assign(value=value))
            # sparse_delta = tf.IndexedSlices(values=batch_size, indices=parallel)
            # dependencies.append(self.episode_length.scatter_add(sparse_delta=sparse_delta))
            sum_reward = tf.math.reduce_sum(input_tensor=reward, keepdims=True)
            value = tf.tensor_scatter_nd_add(tensor=self.episode_reward,
                                             indices=expanded_parallel,
                                             updates=sum_reward)
            dependencies.append(self.episode_reward.assign(value=value))
            # sum_reward = tf.math.reduce_sum(input_tensor=reward)
            # sparse_delta = tf.IndexedSlices(values=sum_reward, indices=parallel)
            # dependencies.append(self.episode_reward.scatter_add(sparse_delta=sparse_delta))

            # Core observe (before terminal handling)
            updated = self.core_observe(terminal=terminal,
                                        reward=reward,
                                        parallel=parallel)
            dependencies.append(updated)

        # Handle terminal (after core observe and episode reward)
        with tf.control_dependencies(control_inputs=dependencies):

            def fn_terminal():
                operations = list()

                # Reset internals
                def function(spec, initial):
                    return tf_util.constant(value=initial, dtype=spec.type)

                initials = self.internals_spec.fmap(
                    function=function,
                    cls=TensorDict,
                    zip_values=self.initial_internals)
                for name, previous, initial in self.previous_internals.zip_items(
                        initials):
                    updates = tf.expand_dims(input=initial, axis=0)
                    value = tf.tensor_scatter_nd_update(
                        tensor=previous,
                        indices=expanded_parallel,
                        updates=updates)
                    operations.append(previous.assign(value=value))
                    # sparse_delta = tf.IndexedSlices(values=initial, indices=parallel)
                    # operations.append(previous.scatter_update(sparse_delta=sparse_delta))

                # Episode length/reward summaries (before episode reward reset / episodes increment)
                dependencies = list()
                if self.summaries == 'all' or 'reward' in self.summaries:
                    with self.summarizer.as_default():
                        x = tf.gather(params=self.episode_length,
                                      indices=parallel)
                        dependencies.append(
                            tf.summary.scalar(name='episode-length',
                                              data=x,
                                              step=self.episodes))
                        x = tf.gather(params=self.episode_reward,
                                      indices=parallel)
                        dependencies.append(
                            tf.summary.scalar(name='episode-reward',
                                              data=x,
                                              step=self.episodes))

                # Reset episode length/reward
                with tf.control_dependencies(control_inputs=dependencies):
                    zeros = tf_util.zeros(shape=(1, ), dtype='int')
                    value = tf.tensor_scatter_nd_update(
                        tensor=self.episode_length,
                        indices=expanded_parallel,
                        updates=zeros)
                    operations.append(self.episode_length.assign(value=value))
                    # sparse_delta = tf.IndexedSlices(values=zero, indices=parallel)
                    # operations.append(self.episode_length.scatter_update(sparse_delta=sparse_delta))
                    zeros = tf_util.zeros(shape=(1, ), dtype='float')
                    value = tf.tensor_scatter_nd_update(
                        tensor=self.episode_reward,
                        indices=expanded_parallel,
                        updates=zeros)
                    operations.append(self.episode_reward.assign(value=value))
                    # zero_float = tf_util.constant(value=0.0, dtype='float')
                    # sparse_delta = tf.IndexedSlices(values=zero_float, indices=parallel)
                    # operations.append(self.episode_reward.scatter_update(sparse_delta=sparse_delta))

                # Increment episodes counter
                operations.append(
                    self.episodes.assign_add(delta=one, read_value=False))

                return tf.group(*operations)

            handle_terminal = tf.cond(pred=is_terminal,
                                      true_fn=fn_terminal,
                                      false_fn=tf.no_op)

        with tf.control_dependencies(control_inputs=(handle_terminal, )):
            episodes = tf_util.identity(input=self.episodes)
            updates = tf_util.identity(input=self.updates)
            return updated, episodes, updates
コード例 #12
0
    def act(self, *, states, auxiliaries, parallel):
        batch_size = tf_util.cast(x=tf.shape(input=parallel)[0], dtype='int')

        # Input assertions
        assertions = list()
        if self.config.create_tf_assertions:
            assertions.extend(
                self.states_spec.tf_assert(
                    x=states,
                    batch_size=batch_size,
                    message='Agent.act: invalid {issue} for {name} state input.'
                ))
            assertions.extend(
                self.auxiliaries_spec.tf_assert(
                    x=auxiliaries,
                    batch_size=batch_size,
                    message='Agent.act: invalid {issue} for {name} input.'))
            assertions.extend(
                self.parallel_spec.tf_assert(
                    x=parallel,
                    batch_size=batch_size,
                    message='Agent.act: invalid {issue} for parallel input.'))
            # Mask assertions
            if self.config.enable_int_action_masking:
                true = tf_util.constant(value=True, dtype='bool')
                for name, spec in self.actions_spec.items():
                    if spec.type == 'int':
                        assertions.append(
                            tf.debugging.assert_equal(
                                x=tf.reduce_all(
                                    input_tensor=tf.math.reduce_any(
                                        input_tensor=auxiliaries[name]['mask'],
                                        axis=(spec.rank + 1))),
                                y=true,
                                message=
                                "Agent.independent_act: at least one action has to be valid."
                            ))

        with tf.control_dependencies(control_inputs=assertions):
            # Retrieve internals
            internals = self.previous_internals.fmap(
                function=(lambda x: tf.gather(params=x, indices=parallel)),
                cls=TensorDict)

            # Core act
            deterministic = tf_util.constant(value=False, dtype='bool')
            actions, internals = self.core_act(states=states,
                                               internals=internals,
                                               auxiliaries=auxiliaries,
                                               parallel=parallel,
                                               deterministic=deterministic,
                                               independent=False)

        # Action assertions
        assertions = list()
        if self.config.create_tf_assertions:
            assertions.extend(
                self.actions_spec.tf_assert(x=actions, batch_size=batch_size))
            if self.config.enable_int_action_masking:
                for name, spec, action in self.actions_spec.zip_items(actions):
                    if spec.type == 'int':
                        is_valid = tf.reduce_all(input_tensor=tf.gather(
                            params=auxiliaries[name]['mask'],
                            indices=tf.expand_dims(input=action,
                                                   axis=(spec.rank + 1)),
                            batch_dims=(spec.rank + 1)))
                        assertions.append(
                            tf.debugging.assert_equal(
                                x=is_valid,
                                y=true,
                                message="Action mask check."))

        # Remember internals
        dependencies = list()
        for name, previous, internal in self.previous_internals.zip_items(
                internals):
            indices = tf.expand_dims(input=parallel, axis=1)
            value = tf.tensor_scatter_nd_update(tensor=previous,
                                                indices=indices,
                                                updates=internal)
            dependencies.append(previous.assign(value=value))
            # sparse_delta = tf.IndexedSlices(values=internal, indices=parallel)
            # dependencies.append(previous.scatter_update(sparse_delta=sparse_delta))

        # Increment timestep (after core act)
        with tf.control_dependencies(control_inputs=(actions.flatten() +
                                                     internals.flatten())):
            dependencies.append(
                self.timesteps.assign_add(delta=batch_size, read_value=False))

        with tf.control_dependencies(control_inputs=(dependencies +
                                                     assertions)):
            actions = actions.fmap(function=tf_util.identity)
            timestep = tf_util.identity(input=self.timesteps)
            return actions, timestep
コード例 #13
0
 def reset(self):
     timestep = tf_util.identity(input=self.timesteps)
     episode = tf_util.identity(input=self.episodes)
     update = tf_util.identity(input=self.updates)
     return timestep, episode, update
コード例 #14
0
    def update(self, *, arguments, variables, **kwargs):
        assert self.is_initialized_given_variables
        assert all(variable.dtype.is_floating for variable in variables)

        deltas = self.step(arguments=arguments, variables=variables, **kwargs)
        dependencies = list(deltas)

        def fn_summary():
            return tf.linalg.global_norm(t_list=[
                tf_util.cast(x=delta, dtype='float') for delta in deltas
            ])

        assertions = list(deltas)
        # if self.config.create_debug_assertions:
        #     if self.__class__.__name__ != 'Synchronization':
        #         for delta, variable in zip(deltas, variables):
        #             if variable.shape.num_elements() <= 4:
        #                 continue
        #             if '/policy/' in variable.name and '_distribution/' in variable.name:
        #                 continue
        #             assertions.append(tf.debugging.assert_equal(
        #                 x=tf.math.reduce_any(
        #                     input_tensor=tf.math.not_equal(x=delta, y=tf.zeros_like(input=delta))
        #                 ), y=tf_util.constant(value=True, dtype='bool'), message=variable.name
        #             ))

        name = self.name[:self.name.index('_')] + '-update/norm'
        dependencies.extend(
            self.summary(label='update-norm',
                         name=name,
                         data=fn_summary,
                         step='updates'))

        with tf.control_dependencies(control_inputs=assertions):

            def fn_summary():
                xs = list()
                for variable in variables:
                    xs.extend(
                        tf.nn.moments(x=variable,
                                      axes=list(range(
                                          tf_util.rank(x=variable)))))
                return xs

            prefix = self.name[:self.name.index('_')] + '-updates/'
            names = list()
            for variable in variables:
                assert variable.name.startswith(
                    self.root.name + '/') and variable.name[-2:] == ':0'
                names.append(prefix +
                             variable.name[len(self.root.name) + 1:-2] +
                             '-mean')
                names.append(prefix +
                             variable.name[len(self.root.name) + 1:-2] +
                             '-variance')
            dependencies.extend(
                self.summary(label='updates',
                             name=names,
                             data=fn_summary,
                             step='updates'))

        with tf.control_dependencies(control_inputs=dependencies):
            return tf_util.identity(
                input=tf_util.constant(value=True, dtype='bool'))
コード例 #15
0
    def apply(self, *, x, horizons, internals):
        zero = tf_util.constant(value=0, dtype='int')
        one = tf_util.constant(value=1, dtype='int')
        batch_size = tf_util.cast(x=tf.shape(input=horizons)[0], dtype='int')
        zeros = tf_util.zeros(shape=(batch_size, ), dtype='int')
        ones = tf_util.ones(shape=(batch_size, ), dtype='int')

        # including 0th step
        horizon = self.horizon.value() + one
        # in case of longer horizon than necessary (e.g. main vs baseline policy)
        starts = horizons[:, 0] + tf.maximum(x=(horizons[:, 1] - horizon),
                                             y=zeros)
        lengths = horizons[:, 1] - tf.maximum(x=(horizons[:, 1] - horizon),
                                              y=zeros)
        horizon = tf.minimum(x=horizon,
                             y=tf.math.reduce_max(input_tensor=lengths,
                                                  axis=0))
        output_spec = self.output_spec()

        if self.temporal_processing == 'cumulative':
            if self.horizon.is_constant(value=0):
                x = self.iterative_apply(xs=x, lengths=ones)

            else:

                def body(x, indices, remaining, xs):
                    current_x = tf.gather(params=x, indices=indices)
                    current_x = tf.expand_dims(input=current_x, axis=1)
                    xs = tf.concat(values=(xs, current_x), axis=1)
                    remaining -= tf.where(condition=tf.math.equal(x=remaining,
                                                                  y=zeros),
                                          x=zeros,
                                          y=ones)
                    indices += tf.where(condition=tf.math.equal(x=remaining,
                                                                y=zeros),
                                        x=zeros,
                                        y=ones)
                    return x, indices, remaining, xs

                initial_xs = tf_util.zeros(shape=((batch_size, 0) +
                                                  output_spec.shape),
                                           dtype=output_spec.type)

                _, final_indices, final_remaining, xs = tf.while_loop(
                    cond=tf_util.always_true,
                    body=body,
                    loop_vars=(x, starts, lengths, initial_xs),
                    maximum_iterations=tf_util.int64(x=horizon))

                x = self.cumulative_apply(xs=xs, lengths=lengths)

        elif self.temporal_processing == 'iterative':
            if self.horizon.is_constant(value=0):
                x, final_internals = self.iterative_apply(x=x,
                                                          internals=internals)

            else:
                initial_x = tf_util.zeros(shape=((batch_size, ) +
                                                 output_spec.shape),
                                          dtype=output_spec.type)

                signature = self.input_signature(function='iterative_body')
                internals = signature['current_internals'].kwargs_to_args(
                    kwargs=internals)
                _, final_indices, final_remaining, x, final_internals = tf.while_loop(
                    cond=tf_util.always_true,
                    body=self.iterative_body,
                    loop_vars=(x, starts, lengths, initial_x, internals),
                    maximum_iterations=tf_util.int32(x=horizon))
                internals = signature['current_internals'].args_to_kwargs(
                    args=final_internals)

        assertions = list()
        if self.config.create_tf_assertions:
            assertions.append(
                tf.debugging.assert_equal(x=final_indices,
                                          y=(tf.math.cumsum(x=lengths) -
                                             ones)))
            assertions.append(
                tf.debugging.assert_equal(
                    x=tf.math.reduce_sum(input_tensor=final_remaining),
                    y=zero))

        with tf.control_dependencies(control_inputs=assertions):
            if self.temporal_processing == 'cumulative':
                return tf_util.identity(input=super().apply(x=x))
            elif self.temporal_processing == 'iterative':
                return tf_util.identity(input=super().apply(x=x)), internals
コード例 #16
0
 def reset(self):
     false = tf_util.constant(value=False, dtype='bool')
     assignment = self.has_previous.assign(value=false, read_value=False)
     with tf.control_dependencies(control_inputs=(assignment, )):
         return tf_util.identity(input=false)
コード例 #17
0
ファイル: optimizer.py プロジェクト: xkarlx/tensorforce
    def update(self, *, arguments, variables, **kwargs):
        assert self.is_initialized_given_variables
        assert all(variable.dtype.is_floating for variable in variables)

        deltas = self.step(arguments=arguments, variables=variables, **kwargs)

        assertions = list(deltas)
        if self.config.create_debug_assertions:
            from tensorforce.core.optimizers import DoublecheckStep, NaturalGradient, \
                Synchronization, UpdateModifier
            optimizer = self
            while isinstance(optimizer, UpdateModifier):
                if isinstance(optimizer, DoublecheckStep):
                    break
                optimizer = optimizer.optimizer
            if not isinstance(optimizer, DoublecheckStep) and (
                    not isinstance(optimizer, NaturalGradient)
                    or not optimizer.only_positive_updates) and (
                        not isinstance(self, Synchronization)
                        or self.sync_frequency is None):
                for delta, variable in zip(deltas, variables):
                    if '_distribution/mean/linear/' in variable.name:
                        # Gaussian.state_value does not use mean
                        continue
                    # if variable.name.endswith('/bias:0') and isinstance(self, Synchronization) \
                    #         and self.root.updates.numpy() == 0:
                    #     # Initialization values are equivalent for bias
                    #     continue
                    assertions.append(
                        tf.debugging.assert_equal(x=tf.math.logical_or(
                            x=tf.math.reduce_all(input_tensor=tf.math.greater(
                                x=tf.math.count_nonzero(
                                    input=delta,
                                    dtype=tf_util.get_dtype(type='int')),
                                y=tf_util.constant(value=0, dtype='int'))),
                            y=tf.reduce_all(input_tensor=tf.math.equal(
                                x=arguments['reward'],
                                y=tf_util.constant(value=0.0,
                                                   dtype='float')))),
                                                  y=tf_util.constant(
                                                      value=True,
                                                      dtype='bool'),
                                                  message=variable.name))

        with tf.control_dependencies(control_inputs=assertions):
            dependencies = list()

            if self.root.summaries == 'all' or 'update-norm' in self.root.summaries:
                with self.root.summarizer.as_default():
                    x = tf.linalg.global_norm(t_list=[
                        tf_util.cast(x=delta, dtype='float')
                        for delta in deltas
                    ])
                    dependencies.append(
                        tf.summary.scalar(name='update-norm',
                                          data=x,
                                          step=self.root.updates))

            if self.root.summaries == 'all' or 'updates' in self.root.summaries:
                with self.root.summarizer.as_default():
                    for var in variables:
                        assert var.name.startswith(
                            self.root.name + '/') and var.name[-2:] == ':0'
                        mean_name = var.name[len(self.root.name) +
                                             1:-2] + '-mean'
                        var_name = var.name[len(self.root.name) +
                                            1:-2] + '-variance'
                        mean, variance = tf.nn.moments(
                            x=var, axes=list(range(tf_util.rank(x=var))))
                        dependencies.append(
                            tf.summary.scalar(name=mean_name,
                                              data=mean,
                                              step=self.root.updates))
                        dependencies.append(
                            tf.summary.scalar(name=var_name,
                                              data=variance,
                                              step=self.root.updates))

        with tf.control_dependencies(control_inputs=dependencies):
            return tf_util.identity(
                input=tf_util.constant(value=True, dtype='bool'))
コード例 #18
0
    def step(self, *, arguments, variables, fn_loss, **kwargs):
        # Trivial operation to enforce control dependency
        previous_values = list(
            tf_util.identity(input=variable) for variable in variables)

        # Remember variables before update
        with tf.control_dependencies(control_inputs=previous_values):

            with tf.GradientTape(persistent=False,
                                 watch_accessed_variables=False) as tape:
                for variable in variables:
                    tape.watch(tensor=variable)
                loss = fn_loss(**arguments.to_kwargs())

            gradients = tape.gradient(
                target=loss, sources=variables)  # , output_gradients=initial

            assertions = list()
            gradients = list(gradients)
            grads_and_vars = list(zip(gradients, variables))
            for n in range(len(gradients) - 1, -1, -1):
                if gradients[n] is None:
                    gradients.pop(n)
                    grads_and_vars.pop(n)
                elif self.config.create_tf_assertions:
                    assertions.append(
                        tf.debugging.assert_all_finite(
                            x=gradients[n],
                            message="Invalid gradient: contains inf or nan."))
            assert len(gradients) > 0

        with tf.control_dependencies(control_inputs=assertions):

            dependencies = list()
            if self.gradient_norm_clipping is not None:
                clip_norm = self.gradient_norm_clipping.value()
                gradients, grads_norm = tf.clip_by_global_norm(
                    t_list=[
                        tf_util.cast(x=g, dtype='float') for g in gradients
                    ],
                    clip_norm=clip_norm)
                dependencies.extend(
                    self.summary(label='update-norm',
                                 name='unclipped-gradient-norm',
                                 data=grads_norm,
                                 step='updates'))
                grads_and_vars = [
                    (grad, var)
                    for grad, (_, var) in zip(gradients, grads_and_vars)
                ]

            applied = self.tf_optimizer.apply_gradients(
                grads_and_vars=grads_and_vars)
            dependencies.append(applied)

        # Return deltas after actually having change the variables.
        with tf.control_dependencies(control_inputs=dependencies):
            return [
                variable - previous
                for variable, previous in zip(variables, previous_values)
            ]
コード例 #19
0
ファイル: optimizer.py プロジェクト: tensorforce/tensorforce
    def update(self, *, arguments, variables, **kwargs):
        assert self.is_initialized_given_variables
        assert all(variable.dtype.is_floating for variable in variables)

        deltas = self.step(arguments=arguments, variables=variables, **kwargs)

        operations = list(deltas)
        if self.config.create_debug_assertions:
            from tensorforce.core.optimizers import DoublecheckStep, NaturalGradient, \
                Synchronization, UpdateModifier
            optimizer = self
            while isinstance(optimizer, UpdateModifier):
                if isinstance(optimizer, DoublecheckStep):
                    break
                optimizer = optimizer.optimizer
            if not isinstance(optimizer, DoublecheckStep) and (
                    not isinstance(optimizer, NaturalGradient)
                    or not optimizer.only_positive_updates) and (
                        not isinstance(self, Synchronization)
                        or self.sync_frequency is None):
                false = tf_util.constant(value=False, dtype='bool')
                zero = tf_util.constant(value=0, dtype='int')
                one = tf_util.constant(value=1, dtype='int')
                zero_float = tf_util.constant(value=0.0, dtype='float')
                y = tf.reduce_any(input_tensor=tf.math.not_equal(
                    x=arguments['reward'], y=zero_float))
                for index, (delta,
                            variable) in enumerate(zip(deltas, variables)):
                    if '_distribution/mean/linear/' in variable.name:
                        # Gaussian.state_value does not use mean
                        continue
                    is_zero = tf.math.logical_and(x=tf.math.equal(
                        x=tf.math.count_nonzero(
                            input=delta, dtype=tf_util.get_dtype(type='int')),
                        y=zero),
                                                  y=y)
                    index = tf_util.constant(value=index,
                                             dtype='int',
                                             shape=(1, ))
                    index = tf.stack(values=(tf.expand_dims(
                        input=self.zero_check_index, axis=0), index),
                                     axis=1)
                    operations.append(
                        tf.tensor_scatter_nd_update(
                            tensor=self.zero_check_history,
                            indices=index,
                            updates=tf.expand_dims(input=is_zero, axis=0)))

                operations.append(
                    tf.debugging.assert_equal(x=tf.math.reduce_any(
                        input_tensor=tf.math.reduce_all(
                            input_tensor=self.zero_check_history, axis=1),
                        axis=0),
                                              y=false))
                with tf.control_dependencies(control_inputs=operations):
                    operations = [
                        self.zero_check_index.assign(
                            value=tf.math.mod(x=one, y=3))
                    ]

        with tf.control_dependencies(control_inputs=operations):
            dependencies = list()

            if self.root.summaries == 'all' or 'update-norm' in self.root.summaries:
                with self.root.summarizer.as_default():
                    x = tf.linalg.global_norm(t_list=[
                        tf_util.cast(x=delta, dtype='float')
                        for delta in deltas
                    ])
                    dependencies.append(
                        tf.summary.scalar(name='update-norm',
                                          data=x,
                                          step=self.root.updates))

            if self.root.summaries == 'all' or 'updates' in self.root.summaries:
                with self.root.summarizer.as_default():
                    for var in variables:
                        assert var.name[-2] == ':'
                        if var.name.startswith(self.root.name + '/'):
                            mean_name = var.name[len(self.root.name) +
                                                 1:-2] + '-mean'
                            var_name = var.name[len(self.root.name) +
                                                1:-2] + '-variance'
                        else:
                            mean_name = var.name[:-2] + '-mean'
                            var_name = var.name[:-2] + '-variance'
                        mean, variance = tf.nn.moments(
                            x=var, axes=list(range(tf_util.rank(x=var))))
                        dependencies.append(
                            tf.summary.scalar(name=mean_name,
                                              data=mean,
                                              step=self.root.updates))
                        dependencies.append(
                            tf.summary.scalar(name=var_name,
                                              data=variance,
                                              step=self.root.updates))

        with tf.control_dependencies(control_inputs=dependencies):
            return tf_util.identity(
                input=tf_util.constant(value=True, dtype='bool'))