Python get_backend Examples, yarl.get_backend Python Examples

Example #1

0

Show file

File: graph_builder.py Project: pascalwhoop/YARL

    def run_through_graph_fn_with_device_and_scope(self, graph_fn,
                                                   assigned_device):
        """
        Assigns device to the ops generated by a graph_fn.

        Args:
            graph_fn (GraphFunction): GraphFunction to assign device to.
            assigned_device (str): Device identifier.
        """
        if get_backend() == "tf":
            if assigned_device not in self.available_devices:
                self.logger.error(
                    "Assigned device {} for graph_fn {} not in available devices:\n {}"
                    .format(assigned_device, graph_fn, self.available_devices))

            # Assign proper device to all ops created in this context manager.
            with tf.device(assigned_device):
                # Name ops correctly according to our Component hierarchy.
                with tf.name_scope(graph_fn.component.global_scope + (
                        '/' if graph_fn.component.global_scope else "")):
                    self.logger.debug(
                        "Assigning device {} to graph_fn {} (scope {}).".
                        format(assigned_device, graph_fn,
                               graph_fn.component.global_scope))
                    self.run_through_graph_fn(graph_fn)

Example #2

0

Show file

File: ppo_loss_function.py Project: pascalwhoop/YARL

    def _graph_fn_loss_per_item(self, distribution, actions, rewards,
                                terminals, prev_log_likelihood):
        """
        Args:
            distribution (Distribution): Distribution object which must provide a log likelihood function.
            actions (SingleDataOp): The batch of actions that were actually taken in states s (from a memory).
            rewards (SingleDataOp): The batch of rewards that we received after having taken a in s (from a memory).
            terminals (SingleDataOp): The batch of terminal signals that we received after having taken a in s
                (from a memory).
            prev_log_likelihood (SingleDataOp): Log likelihood to compare to when computing likelihood ratios.
        Returns:
            SingleDataOp: The loss values vector (one single value for each batch item).
        """
        if get_backend() == "tf":
            # Call graph_fn of a sub-Component directly.
            current_log_likelihood = self.distribution._graph_fn_log_prob(
                distribution, values=actions)

            likelihood_ratio = tf.exp(x=(current_log_likelihood /
                                         prev_log_likelihood))
            unclipped_objective = likelihood_ratio * rewards
            clipped_objective = tf.clip_by_value(
                t=likelihood_ratio,
                clip_value_min=(1 - self.clip_ratio),
                clip_value_max=(1 + self.clip_ratio),
            ) * rewards

            surrogate_objective = tf.minimum(x=unclipped_objective,
                                             y=clipped_objective)
            return tf.reduce_mean(input_tensor=-surrogate_objective, axis=0)

Example #3

0

Show file

File: action_adapter.py Project: pascalwhoop/YARL

    def _graph_fn_reshape(self, action_layer_output):
        """
        Only reshapes some NN output according to our action space.

        Args:
            action_layer_output (SingleDataOp): The output of the action_layer of this Component (last, flattened data
                coming from the NN).

        Returns:
            SingleDataOp: The reshaped action_layer_output.
        """
        # Reshape action_output to action shape.
        if isinstance(self.target_space, IntBox):
            shape = list(
                self.target_space.get_shape(with_batch_rank=-1,
                                            with_category_rank=True))
        elif isinstance(self.target_space, FloatBox):
            shape = [-1, 2] + list(
                self.target_space.get_shape(
                    with_batch_rank=False))  # Manually add moments rank
        else:
            raise NotImplementedError

        if get_backend() == "tf":
            action_layer_output_reshaped = tf.reshape(
                tensor=action_layer_output, shape=shape)
            return action_layer_output_reshaped

Example #4

0

Show file

File: fixed_loop.py Project: pascalwhoop/YARL

    def _graph_fn_call_loop(self, *inputs):
        """
        Calls the subcomponent of this loop the specified number of times and returns the final result.
        Args:
            *inputs (FlattenedDataOp): Parameters for the call component.

        Returns:
            any: Result of the call.
        """

        if get_backend() == "tf":
            # Initial call.
            result = self.graph_fn_to_call(*inputs)

            def body(result, i):
                with tf.control_dependencies(control_inputs=[result]):
                    result = self.graph_fn_to_call(*inputs)
                return result, i + 1

            def cond(result, i):
                return i < self.num_iterations - 1

            result, _ = tf.while_loop(cond=cond,
                                      body=body,
                                      loop_vars=(result, 0))
            return result

Example #5

0

Show file

File: dense_layer.py Project: pascalwhoop/YARL

    def create_variables(self, input_spaces, action_space):
        in_space = input_spaces["input"]

        # Create weights.
        weights_shape = (in_space.shape[0], self.units
                         )  # [0] b/c Space.shape never includes batch-rank
        self.weights_init = Initializer.from_spec(
            shape=weights_shape, specification=self.weights_spec)
        # And maybe biases.
        biases_shape = (self.units, )
        self.biases_init = Initializer.from_spec(
            shape=biases_shape, specification=self.biases_spec)

        # Wrapper for backend.
        if get_backend() == "tf":
            self.layer = tf.layers.Dense(
                units=self.units,
                activation=get_activation_function(self.activation,
                                                   *self.activation_params),
                kernel_initializer=self.weights_init.initializer,
                use_bias=(self.biases_spec is not False),
                bias_initializer=(self.biases_init.initializer
                                  or tf.zeros_initializer()),
            )

            # Now build the layer so that its variables get created.
            self.layer.build(in_space.get_shape(with_batch_rank=True))
            # Register the generated variables with our registry.
            self.register_variables(*self.layer.variables)

Example #6

0

Show file

    def _graph_fn_pick(self, do_explore, sample_deterministic,
                       sample_stochastic):
        """
        Exploration for discrete action spaces.
        Either pick random daction (if `do_explore` is True), or return non-explorative action.

        Args:
            do_explore (DataOp): The bool coming from the epsilon-exploration component specifying
                whether to use exploration or not.
            sample_deterministic (DataOp): The output from our distribution's "sample_deterministic" Socket.
            sample_stochastic (DataOp): The output from our distribution's "sample_stochastic" Socket.

        Returns:
            DataOp: The DataOp representing the action. This will match the shape of self.action_space.
        """
        if get_backend() == "tf":
            return tf.cond(
                do_explore,
                # (1,) = Adding artificial batch rank.
                true_fn=lambda: tf.random_uniform(
                    shape=(1, ) + self.action_space.shape,
                    maxval=self.action_space.num_categories,
                    dtype=dtype("int")),
                false_fn=lambda: sample_deterministic
                if self.non_explore_behavior == "max-likelihood" else
                sample_stochastic)

Example #7

0

Show file

File: decay_components.py Project: pascalwhoop/YARL

 def decay(self, time_steps_in_decay_window):
     if get_backend() == "tf":
         return tf.train.exponential_decay(
             learning_rate=self.from_,
             global_step=time_steps_in_decay_window,
             decay_steps=self.half_life_timesteps,
             decay_rate=0.5)

Example #8

0

Show file

File: box_space.py Project: pascalwhoop/YARL

 def get_tensor_variable(self,
                         name,
                         is_input_feed=False,
                         add_batch_rank=None,
                         **kwargs):
     add_batch_rank = self.has_batch_rank if add_batch_rank is None else add_batch_rank
     batch_rank = () if add_batch_rank is False else (
         None, ) if add_batch_rank is True else (add_batch_rank, )
     shape = tuple(batch_rank + self.shape)
     if get_backend() == "tf":
         import tensorflow as tf
         # TODO: re-evaluate the cutting of a leading '/_?' (tf doesn't like it)
         name = re.sub(r'^/_?', "", name)
         if is_input_feed:
             return tf.placeholder(dtype=dtype(self.dtype),
                                   shape=shape,
                                   name=name)
         else:
             # TODO: what about initializer spec?
             yarl_initializer = Initializer.from_spec(
                 shape=shape, specification=kwargs.pop("initializer", None))
             return tf.get_variable(
                 name,
                 shape=shape,
                 dtype=dtype(self.dtype),
                 initializer=yarl_initializer.initializer,
                 **kwargs)
     else:
         raise YARLError("ERROR: Pytorch not supported yet!")

Example #9

0

Show file

File: noise_components.py Project: pascalwhoop/YARL

    def _graph_fn_value(self):
        drift = self.theta * (self.mu - self.ou_state)
        diffusion = self.sigma * tf.random_normal(
            shape=self.action_space.shape, dtype=self.action_space.dtype)

        delta = drift + diffusion
        if get_backend() == "tf":
            return tf.assign_add(ref=self.ou_state, value=delta)

Example #10

0

Show file

File: decay_components.py Project: pascalwhoop/YARL

 def decay(self, time_steps_in_decay_window):
     if get_backend() == "tf":
         return tf.train.polynomial_decay(
             learning_rate=self.from_,
             global_step=time_steps_in_decay_window,
             decay_steps=self.num_timesteps,
             end_learning_rate=self.to_,
             power=self.power)

Example #11

0

Show file

File: local_optimizers.py Project: pascalwhoop/YARL

    def _graph_fn_calculate_gradients(self, variables, loss, *inputs):
        if get_backend() == "tf":
            grads_and_vars = DataOpTuple(
                self.optimizer.compute_gradients(
                    loss=loss,
                    var_list=list(variables.values()) if isinstance(
                        variables, dict) else variables))

            return grads_and_vars

Example #12

0

Show file

File: local_optimizers.py Project: pascalwhoop/YARL

    def __init__(self, learning_rate, **kwargs):
        super(RMSPropOptimizer,
              self).__init__(learning_rate=learning_rate,
                             scope=kwargs.pop("scope", "rmsprop-optimizer"),
                             **kwargs)

        if get_backend() == "tf":
            self.optimizer = tf.train.AdadeltaOptimizer(
                learning_rate=self.learning_rate, rho=kwargs.pop("rho", 0.95))

Example #13

0

Show file

File: local_optimizers.py Project: pascalwhoop/YARL

    def __init__(self, learning_rate, **kwargs):
        super(GradientDescentOptimizer,
              self).__init__(learning_rate=learning_rate,
                             scope=kwargs.pop("scope",
                                              "gradient-descent-optimizer"),
                             **kwargs)

        if get_backend() == "tf":
            self.optimizer = tf.train.GradientDescentOptimizer(
                learning_rate=self.learning_rate)

Example #14

0

Show file

    def _graph_fn_loss_per_item(self, q_values_s, actions, rewards, terminals, qt_values_sp, q_values_sp=None):
        """
        Args:
            q_values_s (SingleDataOp): The batch of Q-values representing the expected accumulated discounted returns
                when in s and taking different actions a.
            actions (SingleDataOp): The batch of actions that were actually taken in states s (from a memory).
            rewards (SingleDataOp): The batch of rewards that we received after having taken a in s (from a memory).
            terminals (SingleDataOp): The batch of terminal signals that we received after having taken a in s
                (from a memory).
            qt_values_sp (SingleDataOp): The batch of Q-values representing the expected accumulated discounted
                returns (estimated by the target net) when in s' and taking different actions a'.
            q_values_sp (Optional[SingleDataOp]): If `self.double_q` is True: The batch of Q-values representing the
                expected accumulated discounted returns (estimated by the (main) policy net) when in s' and taking
                different actions a'.

        Returns:
            SingleDataOp: The loss values vector (one single value for each batch item).
        """
        if get_backend() == "tf":
            if self.double_q:
                # For double-Q, we no longer use the max(a')Qt(s'a') value.
                # Instead, the a' used to get the Qt(s'a') is given by argmax(a') Q(s',a') <- Q=q-net, not target net!
                a_primes = tf.argmax(input=q_values_sp, axis=-1)

                # Now lookup Q(s'a') with the calculated a'.
                one_hot = tf.one_hot(indices=a_primes, depth=self.action_space.num_categories)
                qt_sp_ap_values = tf.reduce_sum(input_tensor=(qt_values_sp * one_hot), axis=-1)
            else:
                # Qt(s',a') -> Use the max(a') value (from the target network).
                qt_sp_ap_values = tf.reduce_max(input_tensor=qt_values_sp, axis=-1)

            # Make sure the rewards vector (batch) is broadcast correctly.
            for _ in range(get_rank(qt_sp_ap_values) - 1):
                rewards = tf.expand_dims(rewards, axis=1)

            # Ignore Q(s'a') values if s' is a terminal state. Instead use 0.0 as the state-action value for s'a'.
            # Note that in that case, the next_state (s') is not the correct next state and should be disregarded.
            # See Chapter 3.4 in "RL - An Introduction" (2017 draft) by A. Barto and R. Sutton for a detailed analysis.
            qt_sp_ap_values = tf.where(condition=terminals,
                                       x=tf.zeros_like(qt_sp_ap_values),
                                       y=qt_sp_ap_values)

            # Q(s,a) -> Use the Q-value of the action actually taken before.
            one_hot = tf.one_hot(indices=actions, depth=self.action_space.num_categories)
            q_s_a_values = tf.reduce_sum(input_tensor=(q_values_s * one_hot), axis=-1)

            # Calculate the TD-delta (target - current estimate).
            td_delta = (rewards + self.discount * qt_sp_ap_values) - q_s_a_values

            # Reduce over the composite actions, if any.
            if get_rank(td_delta) > 1:
                td_delta = tf.reduce_mean(input_tensor=td_delta, axis=list(range(1, self.ranks_to_reduce + 1)))

            return tf.pow(x=td_delta, y=2)

Example #15

0

Show file

File: local_optimizers.py Project: pascalwhoop/YARL

    def __init__(self, learning_rate, **kwargs):
        super(AdamOptimizer,
              self).__init__(learning_rate=learning_rate,
                             scope=kwargs.pop("scope", "adam-optimizer"),
                             **kwargs)

        if get_backend() == "tf":
            self.optimizer = tf.train.AdamOptimizer(
                learning_rate=self.learning_rate,
                beta1=kwargs.pop("beta_1", kwargs.pop("beta1", 0.9)),
                beta2=kwargs.pop("beta_2", kwargs.pop("beta2", 0.999)))

Example #16

0

Show file

File: local_optimizers.py Project: pascalwhoop/YARL

    def __init__(self, learning_rate, **kwargs):
        super(AdagradOptimizer,
              self).__init__(learning_rate=learning_rate,
                             scope=kwargs.pop("scope", "adagrad-optimizer"),
                             **kwargs)

        if get_backend() == "tf":
            self.optimizer = tf.train.AdagradOptimizer(
                learning_rate=self.learning_rate,
                initial_accumulator_value=kwargs.pop(
                    "initial_accumulator_value", 0.1))

Example #17

0

Show file

    def _graph_fn_loss(self, loss_per_item):
        """
        The actual loss function that an optimizer will try to minimize. This is usually the average over a batch.

        Args:
            loss_per_item (SingleDataOp): The output of our loss_per_item graph_fn.

        Returns:
            SingleDataOp: The final loss tensor holding the average loss over the entire batch.
        """
        if get_backend() == "tf":
            return tf.reduce_mean(input_tensor=loss_per_item, axis=0)

Example #18

0

Show file

File: local_optimizers.py Project: pascalwhoop/YARL

    def __init__(self, learning_rate, **kwargs):
        super(NadamOptimizer,
              self).__init__(learning_rate=learning_rate,
                             scope=kwargs.pop("scope", "nadam-optimizer"),
                             **kwargs)

        if get_backend() == "tf":
            self.optimizer = tf.keras.optimizers.Nadam(
                lr=self.learning_rate,
                beta_1=kwargs.pop("beta_1", kwargs.pop("beta1", 0.9)),
                beta_2=kwargs.pop("beta_2", kwargs.pop("beta2", 0.999)),
                schedule_decay=kwargs.pop("schedule_decay", 0.004))

Example #19

0

Show file

File: local_optimizers.py Project: pascalwhoop/YARL

    def __init__(self, learning_rate, **kwargs):
        super(SGDOptimizer,
              self).__init__(learning_rate=learning_rate,
                             scope=kwargs.pop("scope", "sgd-optimizer"),
                             **kwargs)

        if get_backend() == "tf":
            self.optimizer = tf.keras.optimizers.SGD(
                lr=self.learning_rate,
                momentum=kwargs.pop("momentum", 0.0),
                decay=kwargs.pop("decay", 0.0),
                nesterov=kwargs.pop("nesterov", False))

Example #20

0

Show file

File: distribution.py Project: pascalwhoop/YARL

    def _graph_fn_kl_divergence(distribution_a, distribution_b):
        """
        Kullback-Leibler divergence between two distribution objects.

        Args:
            distribution_a (tf.Distribution): A Distribution object.
            distribution_b (tf.Distribution): A distribution object.

        Returns:
            DataOp: (batch-wise) KL-divergence between the two distributions.
        """
        if get_backend() == "tf":
            return tf.no_op()

Example #21

0

Show file

File: bernoulli.py Project: pascalwhoop/YARL

 def _graph_fn_parameterize(self, prob):
     """
     Args:
         prob (DataOp): The p value (probability that distribution returns True).
     """
     # TODO: Move this into some sort of NN-output-cleanup component. This is repetitive stuff.
     ## Clamp raw_input between 0 and 1 to make it interpretable as a probability.
     #p = tf.sigmoid(x=flat_input)
     ## Clamp to avoid 0.0 or 1.0 probabilities (adds numerical stability).
     #p = tf.clip_by_value(p, clip_value_min=SMALL_NUMBER, clip_value_max=(1.0 - SMALL_NUMBER))
     if get_backend() == "tf":
         return tf.distributions.Bernoulli(probs=prob,
                                           dtype=util.dtype("bool"))

Example #22

0

Show file

File: distribution.py Project: pascalwhoop/YARL

    def _graph_fn_log_prob(distribution, values):
        """
        Probability density/mass function.

        Args:
            distribution (DataOp): The (already parameterized) backend-specific distribution for which the log
                probabilities should be calculated. This is simply the output of `self._graph_fn_parameterize`.
            values (SingleDataOp): Values for which to compute the log probabilities given `distribution`.

        Returns:
            DataOp: The log probability of the given values.
        """
        if get_backend() == "tf":
            return distribution.log_prob(value=values)

Example #23

0

Show file

File: action_adapter.py Project: pascalwhoop/YARL

    def _graph_fn_generate_parameters(self, action_layer_output_reshaped):
        """
        Creates properties/parameters and logits from some reshaped output.

        Args:
            action_layer_output_reshaped (SingleDataOp): The output of some layer that is already reshaped
                according to our action Space.

        Returns:
            tuple:
                "parameters" (SingleDataOp): The parameters, ready to be passed to a Distribution object's in-Socket
                    "parameters" (usually some probabilities or loc/scale pairs).
                "logits" (SingleDataOp): The log(parameters) values.
        """
        if get_backend() == "tf":
            if isinstance(self.target_space, IntBox):
                # Discrete actions.
                parameters = tf.maximum(x=tf.nn.softmax(
                    logits=action_layer_output_reshaped, axis=-1),
                                        y=SMALL_NUMBER)

                # Log probs.
                logits = tf.log(x=parameters)
            elif isinstance(self.target_space, FloatBox):
                # Continuous actions.
                mean, log_sd = tf.split(value=action_layer_output_reshaped,
                                        num_or_size_splits=2,
                                        axis=1)

                # Remove moments rank.
                mean = tf.squeeze(input=mean, axis=1)
                log_sd = tf.squeeze(input=log_sd, axis=1)

                # Clip log_sd. log(SMALL_NUMBER) is negative.
                log_sd = tf.clip_by_value(t=log_sd,
                                          clip_value_min=log(SMALL_NUMBER),
                                          clip_value_max=-log(SMALL_NUMBER))

                # Turn log sd into sd.
                sd = tf.exp(x=log_sd)

                logits = (tf.log(x=mean), log_sd)
                parameters = (mean, sd)
            else:
                raise NotImplementedError

            # Convert logits into probabilities and clamp them at SMALL_NUMBER.
            return logits, parameters

Example #24

0

Show file

    def _graph_fn_apply(self, input_):
        min_value = input_
        max_value = input_

        if get_backend() == "tf":
            import tensorflow as tf
            # Iteratively reduce dimensionality across all axes to get the min/max values for each sample in the batch.
            for axis in self.axes:
                min_value = tf.reduce_min(input_tensor=min_value,
                                          axis=axis,
                                          keep_dims=True)
                max_value = tf.reduce_max(input_tensor=max_value,
                                          axis=axis,
                                          keep_dims=True)

        # Add some small constant to never let the range be zero.
        return (input_ - min_value) / (max_value - min_value + SMALL_NUMBER)

Example #25

0

Show file

File: component_test.py Project: pascalwhoop/YARL

    def __init__(self,
                 component,
                 input_spaces=None,
                 action_space=None,
                 seed=10,
                 logging_level=None):
        """
        Args:
            component (Component): The Component to be tested (may contain sub-components).
            input_spaces (Optional[dict]): Dict with component's in-Socket names as keys and Space objects as values.
                Describes the input Spaces for the component. None if the Component to be tested has no
                in-Sockets.
            action_space (Optional[Space]): The action space to pass into the GraphBuilder.
            seed (Optional[int]): The seed to use for random-seeding the Model object.
                If None, do not seed the Graph (things may behave non-deterministically).
            logging_level (Optional[int]): When provided, sets YARL's root_logger's logging level to this value.
        """
        self.seed = seed
        if logging_level is not None:
            root_logger.setLevel(logging_level)

        # Create a GraphBuilder.
        self.graph_builder = GraphBuilder(action_space=action_space)

        # Add the component to test and expose all its Sockets to the core component of our Model.
        self.core = self.graph_builder.get_default_model()
        self.core.add_component(component, connections=CONNECT_ALL)

        # Add the input-spaces to the in-Sockets.
        for in_socket in self.core.input_sockets:
            name = in_socket.name
            assert input_spaces is not None and name in input_spaces,\
                "ERROR: Parameter `input_spaces` in {}'s constructor does not contain Space information for " \
                "in-Socket '{}'!".format(type(self).__name__, name)
            self.core.connect(input_spaces[name], name)

        # Build the model.
        self.graph_executor = GraphExecutor.from_spec(
            get_backend(),
            graph_builder=self.graph_builder,
            execution_spec=dict(seed=self.seed))
        self.graph_executor.build()

Example #26

0

Show file

File: synchronizable.py Project: pascalwhoop/YARL

    def _graph_fn_sync(self, values_):
        """
        Generates the op that syncs this Synchronizable's parent's variable values from another Synchronizable
        Component.

        Args:
            values_ (DataOpDict): The dict of variable values (coming from the "_variables"-Socket of any other
                Component) that need to be assigned to this Component's parent's variables.
                The keys in the dict refer to the names of our parent's variables and must match their names.

        Returns:
            DataOp: The op that executes the syncing.
        """
        # Loop through all incoming vars and our own and collect assign ops.
        syncs = list()
        parents_vars = self.parent_component.get_variables(
            collections=self.collections, custom_scope_separator="-")

        # Sanity checking
        syncs_from, syncs_to = (values_.items(), parents_vars.items())
        if len(syncs_from) != len(syncs_to):
            raise YARLError("ERROR: Number of Variables to sync must match! "
                            "We have {} syncs_from and {} syncs_to.".format(
                                len(syncs_from), len(syncs_to)))
        for (key_from, var_from), (key_to,
                                   var_to) in zip(syncs_from, syncs_to):
            # Sanity checking. TODO: Check the names' ends? Without the global scope?
            #if key_from != key_to:
            #    raise YARLError("ERROR: Variable names for syncing must match in order and name! "
            #                    "Mismatch at from={} and to={}.".format(key_from, key_to))
            if get_shape(var_from) != get_shape(var_to):
                raise YARLError(
                    "ERROR: Variable shapes for syncing must match! "
                    "Shape mismatch between from={} ({}) and to={} ({}).".
                    format(key_from, get_shape(var_from), key_to,
                           get_shape(var_to)))
            syncs.append(self.assign_variable(var_to, var_from))

        # Bundle everything into one "sync"-op.
        if get_backend() == "tf":
            with tf.control_dependencies(syncs):
                return tf.no_op()

Example #27

0

Show file

    def _graph_fn_add_noise(self, noise, sample_deterministic,
                            sample_stochastic):
        """
        Noise for continuous action spaces.
        Return the action with added noise.

        Args:
            noise (DataOp): The noise coming from the noise component.
            sample_deterministic (DataOp): The output from our distribution's "sample_deterministic" Socket.
            sample_stochastic (DataOp): The output from our distribution's "sample_stochastic" Socket.

        Returns:
            DataOp: The DataOp representing the action. This will match the shape of self.action_space.

        """
        if get_backend() == "tf":
            if self.non_explore_behavior == 'max-likelihood':
                return sample_deterministic + noise
            else:
                return sample_stochastic + noise

Example #28

0

Show file

File: distribution.py Project: pascalwhoop/YARL

    def _graph_fn_draw(self, distribution, max_likelihood):
        """
        Takes a sample from the (already parameterized) distribution. The parameterization also includes a possible
        batch size.

        Args:
            distribution (DataOp): The (already parameterized) backend-specific distribution DataOp to use for
                sampling. This is simply the output of `self._graph_fn_parameterize`.
            max_likelihood (bool): Whether to return the maximum-likelihood result, instead of a random sample.
                Can be used to pick deterministic actions from discrete ("greedy") or continuous (mean-value)
                distributions.

        Returns:
            DataOp: The taken sample(s).
        """
        if get_backend() == "tf":
            return tf.cond(pred=max_likelihood,
                           true_fn=lambda: self._graph_fn_sample_deterministic(
                               distribution),
                           false_fn=lambda: self._graph_fn_sample_stochastic(
                               distribution))

Example #29

0

Show file

File: decay_components.py Project: pascalwhoop/YARL

    def _graph_fn_value(self, time_step):
        """
        Args:
            time_step (DataOp): The int-type DataOp that holds the current global time_step.

        Returns:
            DataOp: The decay'd value depending on the current time step.
        """
        if get_backend() == "tf":
            return tf.cond(
                pred=(time_step <= self.start_timestep),
                # We are still in pre-decay time.
                true_fn=lambda: self.from_,
                false_fn=lambda: tf.cond(
                    pred=(time_step >= self.start_timestep + self.num_timesteps
                          ),
                    # We are in post-decay time.
                    true_fn=lambda: self.to_,
                    # We are inside the decay time window.
                    false_fn=lambda: self.decay(
                        tf.cast(x=time_step - self.start_timestep,
                                dtype=util.dtype("float")))))

Example #30

0

Show file

File: sampler.py Project: pascalwhoop/YARL

    def _graph_fn_sample(self, sample_size, inputs):
        """
        Takes a set of input tensors and uniformly samples a subset of the
        specified size from them.

        Args:
            sample_size (SingleDataOp[int]): Subsample size.
            inputs (FlattenedDataOp): Input tensors (in a FlattenedDataOp) to sample from.
                All values (tensors) should all be the same size.

        Returns:
            FlattenedDataOp: The sub-sampled inputs (will be unflattened automatically).
        """
        batch_size = get_batch_size(next(iter(inputs.values())))

        if get_backend() == "tf":
            sample_indices = tf.random_uniform(shape=(sample_size, ),
                                               maxval=batch_size,
                                               dtype=tf.int32)
            sample = FlattenedDataOp()
            for key, tensor in inputs.items():
                sample[key] = tf.gather(params=tensor, indices=sample_indices)
            return sample