Example #1
0
    def __init__(self, shape, specification=None, **kwargs):
        """
        Args:
            shape (tuple): The shape of the Variables to initialize.
            specification (any): A spec that determines the nature of this initializer.

        Raises:
            RLGraphError: If a fixed shape in `specification` does not match `shape`.
        """
        super(Initializer, self).__init__()

        # The shape of the variable to be initialized.
        self.shape = shape
        # The actual underlying initializer object.
        self.initializer = None

        # Truncated Normal.
        if specification == "truncated_normal":
            if get_backend() == "tf":
                # Use the first dimension (num_rows or batch rank) to figure out the stddev.
                stddev = 1 / math.sqrt(shape[0] if isinstance(
                    shape, (tuple, list)) and len(shape) > 0 else 1.0)
                self.initializer = tf.truncated_normal_initializer(
                    stddev=stddev)
            elif get_backend() == "pytorch":
                stddev = 1 / math.sqrt(shape[0] if isinstance(
                    shape, (tuple, list)) and len(shape) > 0 else 1.0)
                self.initializer = lambda t: torch.nn.init.normal_(tensor=t,
                                                                   std=stddev)

        # No spec -> Leave initializer as None for TF (will then use default;
        #  e.g. for tf weights: Xavier uniform). For PyTorch, still have to set Xavier.
        # TODO this is None or is False is very unclean because TF and PT have different defaults ->
        # change to clean default values for weights and biases.
        elif specification is None or specification is False:
            if get_backend() == "tf":
                pass
            elif get_backend() == "pytorch":
                self.initializer = torch.nn.init.xavier_uniform_

        # Fixed values spec -> Use them, just do sanity checking.
        else:
            # Constant value across the variable.
            if isinstance(specification, (float, int)):
                pass
            # A 1D initializer (e.g. for biases).
            elif isinstance(specification, list):
                array = np.asarray(specification,
                                   dtype=convert_dtype("float32", "np"))
                if array.shape != self.shape:
                    raise RLGraphError(
                        "ERROR: Number/shape of given items ({}) not identical with shape ({})!"
                        .format(array.shape, self.shape))
            # A nD initializer (numpy-array).
            elif isinstance(specification, np.ndarray):
                if specification.shape != self.shape:
                    raise RLGraphError(
                        "ERROR: Shape of given items ({}) not identical with shape ({})!"
                        .format(specification.shape, self.shape))
            # Unknown type.
            else:
                raise RLGraphError(
                    "ERROR: Bad specification given ({}) for Initializer object!"
                    .format(specification))

            # Create the backend initializer object.
            if get_backend() == "tf":
                self.initializer = tf.constant_initializer(
                    value=specification, dtype=convert_dtype("float32"))
            elif get_backend() == "pytorch":
                self.initializer = lambda t: torch.nn.init.constant_(
                    tensor=t, val=specification)
Example #2
0
def get_activation_function(activation_function=None, *other_parameters):
    """
    Returns an activation function (callable) to use in a NN layer.

    Args:
        activation_function (Optional[callable,str]): The activation function to lookup. Could be given as:
            - already a callable (return just that)
            - a lookup key (str)
            - None: Use linear activation.

        other_parameters (any): Possible extra parameter(s) used for some of the activation functions.

    Returns:
        callable: The backend-dependent activation function.
    """
    if get_backend() == "tf":
        if activation_function is None or callable(activation_function):
            return activation_function
        elif activation_function == "linear":
            return tf.identity
        # Rectifier linear unit (ReLU) : 0 if x < 0 else x
        elif activation_function == "relu":
            return tf.nn.relu
        # Exponential linear: exp(x) - 1 if x < 0 else x
        elif activation_function == "elu":
            return tf.nn.elu
        # Sigmoid: 1 / (1 + exp(-x))
        elif activation_function == "sigmoid":
            return tf.sigmoid
        # Scaled exponential linear unit: scale * [alpha * (exp(x) - 1) if < 0 else x]
        # https://arxiv.org/pdf/1706.02515.pdf
        elif activation_function == "selu":
            return tf.nn.selu
        # Swish function: x * sigmoid(x)
        # https://arxiv.org/abs/1710.05941
        elif activation_function == "swish":
            return lambda x: x * tf.sigmoid(x=x)
        # Leaky ReLU: x * [alpha if x < 0 else 1.0]
        elif activation_function in ["lrelu", "leaky_relu"]:
            alpha = other_parameters[0] if len(other_parameters) > 0 else 0.2
            return partial(tf.nn.leaky_relu, alpha=alpha)
        # Concatenated ReLU:
        elif activation_function == "crelu":
            return tf.nn.crelu
        # Softmax function:
        elif activation_function == "softmax":
            return tf.nn.softmax
        # Softplus function:
        elif activation_function == "softplus":
            return tf.nn.softplus
        # Softsign function:
        elif activation_function == "softsign":
            return tf.nn.softsign
        # tanh activation function:
        elif activation_function == "tanh":
            return tf.nn.tanh
        else:
            raise RLGraphError(
                "ERROR: Unknown activation_function '{}' for TensorFlow backend!"
                .format(activation_function))
    elif get_backend() == "pytorch":
        # Have to instantiate objects here.
        if activation_function is None or callable(activation_function):
            return activation_function
        elif activation_function == "linear":
            # Do nothing.
            return None
        # Rectifier linear unit (ReLU) : 0 if x < 0 else x
        elif activation_function == "relu":
            return nn.ReLU()
        # Exponential linear: exp(x) - 1 if x < 0 else x
        elif activation_function == "elu":
            return nn.ELU()
        # Sigmoid: 1 / (1 + exp(-x))
        elif activation_function == "sigmoid":
            return nn.Sigmoid()
        # Scaled exponential linear unit: scale * [alpha * (exp(x) - 1) if < 0 else x]
        # https://arxiv.org/pdf/1706.02515.pdf
        elif activation_function == "selu":
            return nn.SELU()
        # Leaky ReLU: x * [alpha if x < 0 else 1.0]
        elif activation_function in ["lrelu", "leaky_relu"]:
            alpha = other_parameters[0] if len(other_parameters) > 0 else 0.2
            return nn.LeakyReLU(negative_slope=alpha)
        # Softmax function:
        elif activation_function == "softmax":
            return nn.Softmax()
        # Softplus function:
        elif activation_function == "softplus":
            return nn.Softplus()
        # Softsign function:
        elif activation_function == "softsign":
            return nn.Softsign()
        # tanh activation function:
        elif activation_function == "tanh":
            return nn.Tanh()
        else:
            raise RLGraphError(
                "ERROR: Unknown activation_function '{}' for PyTorch backend!".
                format(activation_function))
Example #3
0
    def __init__(self,
                 state_space,
                 action_space,
                 discount=0.98,
                 preprocessing_spec=None,
                 network_spec=None,
                 internal_states_space=None,
                 policy_spec=None,
                 value_function_spec=None,
                 exploration_spec=None,
                 execution_spec=None,
                 optimizer_spec=None,
                 value_function_optimizer_spec=None,
                 observe_spec=None,
                 update_spec=None,
                 summary_spec=None,
                 saver_spec=None,
                 auto_build=True,
                 name="agent"):
        """
        Args:
            state_space (Union[dict,Space]): Spec dict for the state Space or a direct Space object.
            action_space (Union[dict,Space]): Spec dict for the action Space or a direct Space object.

            preprocessing_spec (Optional[list,PreprocessorStack]): The spec list for the different necessary states
                preprocessing steps or a PreprocessorStack object itself.

            discount (float): The discount factor (gamma).

            network_spec (Optional[list,NeuralNetwork]): Spec list for a NeuralNetwork Component or the NeuralNetwork
                object itself.

            internal_states_space (Optional[Union[dict,Space]]): Spec dict for the internal-states Space or a direct
                Space object for the Space(s) of the internal (RNN) states.

            policy_spec (Optional[dict]): An optional dict for further kwargs passing into the Policy c'tor.
            value_function_spec (list): Neural network specification for baseline.

            exploration_spec (Optional[dict]): The spec-dict to create the Exploration Component.
            execution_spec (Optional[dict,Execution]): The spec-dict specifying execution settings.
            optimizer_spec (Optional[dict,Optimizer]): The spec-dict to create the Optimizer for this Agent.

            value_function_optimizer_spec (dict): Optimizer config for value function otpimizer. If None, the optimizer
                spec for the policy is used (same learning rate and optimizer type).

            observe_spec (Optional[dict]): Spec-dict to specify `Agent.observe()` settings.
            update_spec (Optional[dict]): Spec-dict to specify `Agent.update()` settings.
            summary_spec (Optional[dict]): Spec-dict to specify summary settings.
            saver_spec (Optional[dict]): Spec-dict to specify saver settings.

            auto_build (Optional[bool]): If True (default), immediately builds the graph using the agent's
                graph builder. If false, users must separately call agent.build(). Useful for debugging or analyzing
                components before building.

            name (str): Some name for this Agent object.
        """
        super(Agent, self).__init__()

        self.name = name
        self.auto_build = auto_build
        self.graph_built = False
        self.logger = logging.getLogger(__name__)

        self.state_space = Space.from_spec(state_space).with_batch_rank(False)
        self.flat_state_space = self.state_space.flatten() if isinstance(
            self.state_space, ContainerSpace) else None
        self.logger.info("Parsed state space definition: {}".format(
            self.state_space))
        self.action_space = Space.from_spec(action_space).with_batch_rank(
            False)
        self.flat_action_space = self.action_space.flatten() if isinstance(
            self.action_space, ContainerSpace) else None
        self.logger.info("Parsed action space definition: {}".format(
            self.action_space))

        self.discount = discount

        # The agent's root-Component.
        self.root_component = Component(name=self.name, nesting_level=0)

        # Define the input-Spaces:
        # Tag the input-Space to `self.set_weights` as equal to whatever the variables-Space will be for
        # the Agent's policy Component.
        self.input_spaces = dict(states=self.state_space.with_batch_rank(), )

        # Construct the Preprocessor.
        self.preprocessor = PreprocessorStack.from_spec(preprocessing_spec)
        self.preprocessed_state_space = self.preprocessor.get_preprocessed_space(
            self.state_space)
        self.preprocessing_required = preprocessing_spec is not None and len(
            preprocessing_spec) > 0
        if self.preprocessing_required:
            self.logger.info("Preprocessing required.")
            self.logger.info(
                "Parsed preprocessed-state space definition: {}".format(
                    self.preprocessed_state_space))
        else:
            self.logger.info("No preprocessing required.")

        # Construct the Policy network.
        policy_spec = policy_spec or dict()
        if network_spec is not None:
            policy_spec["network_spec"] = network_spec
        if "action_space" not in policy_spec:
            policy_spec["action_space"] = self.action_space
        self.policy_spec = policy_spec
        # The behavioral policy of the algorithm. Also the one that gets updated.
        self.policy = Policy.from_spec(self.policy_spec)
        # Done by default.
        self.policy.add_components(Synchronizable(), expose_apis="sync")

        # Create non-shared baseline network.
        self.value_function = None
        if value_function_spec is not None:
            self.value_function = ValueFunction(
                network_spec=value_function_spec)
            self.value_function.add_components(Synchronizable(),
                                               expose_apis="sync")
            self.vars_merger = DictMerger("policy",
                                          "vf",
                                          scope="variable-dict-merger")
            self.vars_splitter = ContainerSplitter(
                "policy", "vf", scope="variable-container-splitter")
        else:
            self.vars_merger = DictMerger("policy",
                                          scope="variable-dict-merger")
            self.vars_splitter = ContainerSplitter(
                "policy", scope="variable-container-splitter")

        self.internal_states_space = Space.from_spec(internal_states_space)

        # An object implementing the loss function interface is only strictly needed
        # if automatic device strategies like multi-gpu are enabled. This is because
        # the device strategy needs to know the name of the loss function to infer the appropriate
        # operations.
        self.loss_function = None

        self.exploration = Exploration.from_spec(exploration_spec)
        self.execution_spec = parse_execution_spec(execution_spec)

        # Python-side experience buffer for better performance (may be disabled).
        self.default_env = "env_0"

        def factory_(i):
            if i < 2:
                return []
            return tuple([[] for _ in range(i)])

        self.states_buffer = defaultdict(
            list)  #partial(fact_, len(self.flat_state_space)))
        self.actions_buffer = defaultdict(
            partial(factory_, len(self.flat_action_space or [])))
        self.internals_buffer = defaultdict(list)
        self.rewards_buffer = defaultdict(list)
        self.next_states_buffer = defaultdict(
            list)  #partial(fact_, len(self.flat_state_space)))
        self.terminals_buffer = defaultdict(list)

        self.observe_spec = parse_observe_spec(observe_spec)

        # Global time step counter.
        self.timesteps = 0

        # Create the Agent's optimizer based on optimizer_spec and execution strategy.
        self.optimizer = None
        if optimizer_spec is not None:
            # Save spec in case agent needs to create more optimizers e.g. for baseline.
            self.optimizer_spec = optimizer_spec
            self.optimizer = Optimizer.from_spec(optimizer_spec)

        self.value_function_optimizer = None
        if self.value_function is not None:
            if value_function_optimizer_spec is None:
                vf_optimizer_spec = self.optimizer_spec
            else:
                vf_optimizer_spec = value_function_optimizer_spec
            vf_optimizer_spec["scope"] = "value-function-optimizer"
            self.value_function_optimizer = Optimizer.from_spec(
                vf_optimizer_spec)

        # Update-spec dict tells the Agent how to update (e.g. memory batch size).
        self.update_spec = parse_update_spec(update_spec)

        # Create our GraphBuilder and -Executor.
        self.graph_builder = GraphBuilder(action_space=self.action_space,
                                          summary_spec=summary_spec)
        self.graph_executor = GraphExecutor.from_spec(
            get_backend(),
            graph_builder=self.graph_builder,
            execution_spec=self.execution_spec,
            saver_spec=saver_spec)  # type: GraphExecutor
Example #4
0
    def build_auto_api_method(self,
                              stack_api_method_name,
                              component_api_method_name,
                              fold_time_rank=False,
                              unfold_time_rank=False,
                              ok_to_overwrite=False):
        if get_backend(
        ) == "pytorch" and self.execution_mode == "define_by_run":

            @rlgraph_api(name=stack_api_method_name,
                         component=self,
                         ok_to_overwrite=ok_to_overwrite)
            def method(self, nn_input, *nn_inputs, **kwargs):
                # Avoid jumping back between layers and calls at runtime.
                return self._pytorch_fast_path_exec(
                    *([nn_input] + list(nn_inputs)), **kwargs)

        # Auto apply-API -> Handle LSTMs correctly.
        elif self.custom_api_given is False:

            @rlgraph_api(component=self, ok_to_overwrite=ok_to_overwrite)
            def apply(self_, nn_input, *nn_inputs, **kwargs):
                inputs = [nn_input] + list(nn_inputs)
                original_input = inputs[0]

                # Keep track of the folding status.
                fold_status = "unfolded" if self.has_rnn() else None
                # Fold time rank? For now only support 1st arg folding/unfolding.
                if fold_time_rank is True:
                    args_ = tuple([self.folder.apply(original_input)] +
                                  list(inputs[1:]))
                    fold_status = "folded"
                else:
                    # TODO: If only unfolding: Assume for now that 2nd input is the original one (so we can infer
                    # TODO: batch/time dims).
                    if unfold_time_rank is True:
                        assert len(inputs) >= 2, \
                            "ERROR: In Stack: If unfolding w/o folding, second arg must be the original input!"
                        original_input = inputs[1]
                        args_ = tuple([inputs[0]] + list(inputs[2:]))
                    else:
                        args_ = inputs
                kwargs_ = kwargs

                # TODO: keep track of LSTMLayers that only return the last time-step (outputs after these Layers
                # TODO: can no longer be folded, their time-rank is gone for the rest of the NN.
                for i, sub_component in enumerate(
                        self_.sub_components.values()):  # type: Component
                    if sub_component.scope in [
                            "time-rank-folder_", "time-rank-unfolder_"
                    ]:
                        continue

                    # Unfold before an LSTM.
                    if isinstance(sub_component,
                                  LSTMLayer) and fold_status != "unfolded":
                        args_, kwargs_ = self._unfold(original_input, *args_,
                                                      **kwargs_)
                        fold_status = "unfolded"
                    # Fold before a non-LSTM if not already done so.
                    elif not isinstance(
                            sub_component,
                            LSTMLayer) and fold_status == "unfolded":
                        args_, kwargs_ = self._fold(*args_, **kwargs_)
                        fold_status = "folded"

                    results = sub_component.apply(*args_, **kwargs_)

                    # Recycle args_, kwargs_ for reuse in next sub-Component's API-method call.
                    if isinstance(results, dict):
                        args_ = ()
                        kwargs_ = results
                    else:
                        args_ = force_tuple(results)
                        kwargs_ = {}

                if unfold_time_rank:
                    args_, kwargs_ = self._unfold(original_input, *args_,
                                                  **kwargs_)
                if args_ == ():
                    return kwargs_
                elif len(args_) == 1:
                    return dict(output=args_[0])
                else:
                    return dict(output=args_)

        else:
            super(NeuralNetwork,
                  self).build_auto_api_method(stack_api_method_name,
                                              component_api_method_name,
                                              fold_time_rank, unfold_time_rank,
                                              ok_to_overwrite)
Example #5
0
    def _graph_fn_apply(self, key, preprocessing_inputs, input_before_time_rank_folding=None):
        """
        Reshapes the input to the specified new shape.

        Args:
            preprocessing_inputs (SingleDataOp): The input to reshape.
            input_before_time_rank_folding (Optional[SingleDataOp]): The original input (before!) the time-rank had
                been folded (this was done in a different ReShape Component). Serves if `self.unfold_time_rank` is True
                to figure out the exact time-rank dimension to unfold.

        Returns:
            SingleDataOp: The reshaped input.
        """
        assert self.unfold_time_rank is False or input_before_time_rank_folding is not None

        if self.backend == "python" or get_backend() == "python":
            # Create a one-hot axis for the categories at the end?
            num_categories = self.get_num_categories(key, get_space_from_op(preprocessing_inputs))
            if num_categories and num_categories > 1:
                preprocessing_inputs = one_hot(preprocessing_inputs, depth=num_categories)

            if self.unfold_time_rank:
                new_shape = (-1, -1) + preprocessing_inputs.shape[1:]
            elif self.fold_time_rank:
                new_shape = (-1,) + preprocessing_inputs.shape[2:]
            else:
                new_shape = self.get_preprocessed_space(get_space_from_op(preprocessing_inputs)).get_shape(
                    with_batch_rank=-1, with_time_rank=-1
                )

            # Dynamic new shape inference:
            # If both batch and time rank must be left alone OR the time rank must be unfolded from a currently common
            # batch+time 0th rank, get these two dynamically.
            if len(preprocessing_inputs.shape) > 2 and new_shape[0] == -1 and new_shape[1] == -1:
                # Time rank unfolding. Get the time rank from original input.
                if self.unfold_time_rank is True:
                    original_shape = input_before_time_rank_folding.shape
                    new_shape = (original_shape[0], original_shape[1]) + new_shape[2:]
                # No time-rank unfolding, but we do have both batch- and time-rank.
                else:
                    input_shape = preprocessing_inputs.shape
                    # Batch and time rank stay as is.
                    new_shape = (input_shape[0], input_shape[1]) + new_shape[2:]

            return np.reshape(preprocessing_inputs, newshape=new_shape)

        elif get_backend() == "pytorch":
            # Create a one-hot axis for the categories at the end?
            num_categories = self.get_num_categories(key, get_space_from_op(preprocessing_inputs))
            if num_categories and num_categories > 1:
                preprocessing_inputs = pytorch_one_hot(preprocessing_inputs, depth=num_categories)

            if self.unfold_time_rank:
                new_shape = (-1, -1) + preprocessing_inputs.shape[1:]
            elif self.fold_time_rank:
                new_shape = (-1,) + preprocessing_inputs.shape[2:]
            else:
                new_shape = self.get_preprocessed_space(get_space_from_op(preprocessing_inputs)).get_shape(
                    with_batch_rank=-1, with_time_rank=-1
                )

            # Dynamic new shape inference:
            # If both batch and time rank must be left alone OR the time rank must be unfolded from a currently common
            # batch+time 0th rank, get these two dynamically.
            if len(new_shape) > 2 and new_shape[0] == -1 and new_shape[1] == -1:
                # Time rank unfolding. Get the time rank from original input.
                if self.unfold_time_rank is True:
                    original_shape = input_before_time_rank_folding.shape
                    new_shape = (original_shape[0], original_shape[1]) + new_shape[2:]
                # No time-rank unfolding, but we do have both batch- and time-rank.
                else:
                    input_shape = preprocessing_inputs.shape
                    # Batch and time rank stay as is.
                    new_shape = (input_shape[0], input_shape[1]) + new_shape[2:]

            # print("Reshaping input of shape {} to new shape {} ".format(preprocessing_inputs.shape, new_shape))

            # The problem here is the following: Input has dim e.g. [4, 256, 1, 1]
            # -> If shape inference in spaces failed, output dim is not correct -> reshape will attempt
            # something like reshaping to [256].
            if self.flatten or (preprocessing_inputs.size(0) > 1 and preprocessing_inputs.dim() > 1):
                return preprocessing_inputs.squeeze()
            else:
                return torch.reshape(preprocessing_inputs, new_shape)

        elif get_backend() == "tf":
            # Create a one-hot axis for the categories at the end?
            space = get_space_from_op(preprocessing_inputs)
            num_categories = self.get_num_categories(key, space)
            if num_categories and num_categories > 1:
                preprocessing_inputs_ = tf.one_hot(
                    preprocessing_inputs, depth=num_categories, axis=-1, dtype="float32"
                )
                if hasattr(preprocessing_inputs, "_batch_rank"):
                    preprocessing_inputs_._batch_rank = preprocessing_inputs._batch_rank
                    preprocessing_inputs_._time_rank = preprocessing_inputs._time_rank
                preprocessing_inputs = preprocessing_inputs_

            if self.unfold_time_rank:
                list_shape = preprocessing_inputs.shape.as_list()
                assert len(list_shape) == 1 or list_shape[1] is not None,\
                    "ERROR: Cannot unfold. `preprocessing_inputs` (with shape {}) " \
                    "already seems to be unfolded!".format(list_shape)
                new_shape = (-1, -1) + tuple(list_shape[1:])
            elif self.fold_time_rank:
                new_shape = (-1,) + tuple(preprocessing_inputs.shape.as_list()[2:])
            else:
                new_shape = self.get_preprocessed_space(get_space_from_op(preprocessing_inputs)).get_shape(
                    with_batch_rank=-1, with_time_rank=-1
                )

            # Dynamic new shape inference:
            # If both batch and time rank must be left alone OR the time rank must be unfolded from a currently common
            # batch+time 0th rank, get these two dynamically.
            if len(new_shape) >= 2 and new_shape[0] == -1 and new_shape[1] == -1:
                # Time rank unfolding. Get the time rank from original input.
                if self.unfold_time_rank is True:
                    original_shape = tf.shape(input_before_time_rank_folding)
                    new_shape = (original_shape[0], original_shape[1]) + new_shape[2:]
                # No time-rank unfolding, but we do have both batch- and time-rank.
                else:
                    input_shape = tf.shape(preprocessing_inputs)
                    # Batch and time rank stay as is.
                    new_shape = (input_shape[0], input_shape[1]) + new_shape[2:]

            reshaped = tf.reshape(tensor=preprocessing_inputs, shape=new_shape, name="reshaped")

            # Have to place the time rank back in as unknown (for the auto Space inference).
            if type(self.unfold_time_rank) == int:
                # TODO: replace placeholder with default value by _batch_rank/_time_rank properties.
                return tf.placeholder_with_default(reshaped, shape=(None, None) + new_shape[2:])
            else:
                # TODO: add other cases of reshaping and fix batch/time rank hints.
                if self.fold_time_rank:
                    reshaped._batch_rank = 0
                elif self.unfold_time_rank:
                    reshaped._batch_rank = 1 if self.time_major is True else 0
                    reshaped._time_rank = 0 if self.time_major is True else 1
                else:
                    if space.has_batch_rank is True:
                        if space.time_major is False:
                            reshaped._batch_rank = 0
                        else:
                            reshaped._time_rank = 0
                            reshaped._batch_rank = 1
                    if space.has_time_rank is True:
                        reshaped._time_rank = 0 if space.time_major is True else 1

                return reshaped
Example #6
0
def get_space_from_op(op):
    """
    Tries to re-create a Space object given some DataOp.
    This is useful for shape inference when passing a Socket's ops through a GraphFunction and
    auto-inferring the resulting shape/Space.

    Args:
        op (DataOp): The op to create a corresponding Space for.

    Returns:
        Space: The inferred Space object.
    """
    # a Dict
    if isinstance(op, dict):  # DataOpDict
        spec = {}
        add_batch_rank = False
        add_time_rank = False
        for key, value in op.items():
            spec[key] = get_space_from_op(value)
            if spec[key].has_batch_rank:
                add_batch_rank = True
            if spec[key].has_time_rank:
                add_time_rank = True
        return Dict(spec,
                    add_batch_rank=add_batch_rank,
                    add_time_rank=add_time_rank)
    # a Tuple
    elif isinstance(op, tuple):  # DataOpTuple
        spec = []
        add_batch_rank = False
        add_time_rank = False
        for i in op:
            space = get_space_from_op(i)
            if space == 0:
                return 0
            spec.append(space)
            if spec[-1].has_batch_rank:
                add_batch_rank = True
            if spec[-1].has_time_rank:
                add_time_rank = True
        return Tuple(spec,
                     add_batch_rank=add_batch_rank,
                     add_time_rank=add_time_rank)
    # primitive Space -> infer from op dtype and shape
    else:
        # Op itself is a single value, simple python type.
        if isinstance(op, (bool, int, float)):
            return BoxSpace.from_spec(spec=type(op), shape=())
        elif isinstance(op, str):
            raise RLGraphError(
                "Cannot derive Space from non-allowed op ({})!".format(op))
        # A single numpy array.
        elif isinstance(op, np.ndarray):
            return BoxSpace.from_spec(spec=convert_dtype(str(op.dtype), "np"),
                                      shape=op.shape)
        elif isinstance(op, list):
            return try_space_inference_from_list(op)
        # No Space: e.g. the tf.no_op, a distribution (anything that's not a tensor).
        # PyTorch Tensors do not have get_shape so must check backend.
        elif hasattr(op, "dtype") is False or (get_backend() == "tf" and
                                               not hasattr(op, "get_shape")):
            return 0
        # Some tensor: can be converted into a BoxSpace.
        else:
            shape = get_shape(op)
            # Unknown shape (e.g. a cond op).
            if shape is None:
                return 0
            add_batch_rank = False
            add_time_rank = False
            time_major = False
            new_shape = list(shape)

            # New way: Detect via op._batch_rank and op._time_rank properties where these ranks are.
            if hasattr(op, "_batch_rank") and isinstance(op._batch_rank, int):
                add_batch_rank = True
                new_shape[op._batch_rank] = -1

            # elif get_backend() == "pytorch":
            #     if isinstance(op, torch.Tensor):
            #         if op.dim() > 1 and shape[0] == 1:
            #             add_batch_rank = True
            #             new_shape[0] = 1
            if hasattr(op, "_time_rank") and isinstance(op._time_rank, int):
                add_time_rank = True
                if op._time_rank == 0:
                    time_major = True
                new_shape[op._time_rank] = -1
            shape = tuple(n for n in new_shape if n != -1)

            # Old way: Detect automatically whether the first rank(s) are batch and/or time rank.
            if add_batch_rank is False and add_time_rank is False and shape != (
            ) and shape[0] is None:
                if len(shape) > 1 and shape[1] is None:
                    #raise RLGraphError(
                    #    "ERROR: Cannot determine time-major flag if both batch- and time-ranks are in an op w/o saying "
                    #    "which rank goes to which position!"
                    #)
                    shape = shape[2:]
                    add_time_rank = True
                else:
                    shape = shape[1:]
                add_batch_rank = True

            base_dtype = op.dtype.base_dtype if hasattr(
                op.dtype, "base_dtype") else op.dtype
            # PyTorch does not have a bool type
            if get_backend() == "pytorch":
                if op.dtype is torch.uint8:
                    base_dtype = bool
            base_dtype_str = str(base_dtype)

            # FloatBox
            if "float" in base_dtype_str:
                return FloatBox(shape=shape,
                                add_batch_rank=add_batch_rank,
                                add_time_rank=add_time_rank,
                                time_major=time_major,
                                dtype=convert_dtype(base_dtype, "np"))
            # IntBox
            elif "int" in base_dtype_str:
                return IntBox(shape=shape,
                              add_batch_rank=add_batch_rank,
                              add_time_rank=add_time_rank,
                              time_major=time_major,
                              dtype=convert_dtype(base_dtype, "np"))
            # a BoolBox
            elif "bool" in base_dtype_str:
                return BoolBox(shape=shape,
                               add_batch_rank=add_batch_rank,
                               add_time_rank=add_time_rank,
                               time_major=time_major)
            # a TextBox
            elif "string" in base_dtype_str:
                return TextBox(shape=shape,
                               add_batch_rank=add_batch_rank,
                               add_time_rank=add_time_rank,
                               time_major=time_major)

    raise RLGraphError(
        "ERROR: Cannot derive Space from op '{}' (unknown type?)!".format(op))
Example #7
0
    def _graph_fn_call(self, *inputs):
        """
        The actual calculation on one or more input Ops.

        Args:
            inputs (SingleDataOp): The single (non-container) input(s) to the layer.

        Returns:
            The output(s) after having pushed input(s) through the layer.
        """
        # `self.layer` is not given: Only apply the activation function.
        if self.layer is None:
            # No activation function.
            if self.activation is None:
                return tuple(inputs)
            # Pass inputs through activation function.
            else:
                activation_function = get_activation_function(self.activation, self.activation_params)
                output = activation_function(*inputs)
                # TODO: Move into util function.
                # Add batch-/time-rank flags.
                output._batch_rank = 0 if self.time_major is False else 1
                if self.in_space_0 and self.in_space_0.has_time_rank:
                    output._time_rank = 0 if self.in_space_0.time_major is True else 1
                return output
        # `self.layer` already includes activation function details.
        else:
            if get_backend() == "tf":
                output = self.layer.call(*inputs)
                # Add batch-/time-rank flags.
                output._batch_rank = 0 if self.time_major is False else 1
                if self.in_space_0 and self.in_space_0.has_time_rank:
                    output._time_rank = 0 if self.in_space_0.time_major is True else 1
                return output
            elif get_backend() == "pytorch":
                # Strip empty internal states:
                # Ensure inputs are float tensors.
                input_tensors = []
                for value in inputs:
                    if value is not None and hasattr(value, "float"):
                        input_tensors.append(value.float())
                if not input_tensors:
                    return None

                # Common debug print:
                # print("in net work layer: ", self.name)
                # import torch
                # shapes = []
                # for inp in inputs:
                #     if hasattr(inp, "shape"):
                #         shapes.append(inp.shape)
                #     else:
                #         shapes.append(type(inp))
                # print("input shapes = ", shapes)
                # PyTorch layers are called, not `applied`.
                out = self.layer(*input_tensors)
                # print("layer output shape = ", out.shape)
                if self.activation_fn is None:
                    return out
                else:
                    # Apply activation fn.
                    return self.activation_fn(out)
    def _graph_fn_calc_sequence_decays(self, sequence_indices, decay=0.9):
        """
        Computes decays for sequence indices, e.g. for generalized advantage estimation.
        That is, a sequence with terminals is used to compute for each subsequence the decay
        values and the length of the sequence.

        Example:
        decay = 0.5, sequence_indices = [0 0 1 0 1] will return lengths [3, 2] and
        decays [1 0.5 0.25 1 0.5] (decay^0, decay^1, ..decay^k) where k = sequence length for
        each sub-sequence.

        Args:
            sequence_indices (DataOp): Indices denoting sequences, e.g. terminal values.
            decay (float): Initial decay value to start sub-sequence with.

        Returns:
            tuple:
                - Sequence lengths.
                - Decays.
        """
        if get_backend() == "tf":
            elems = tf.shape(input=sequence_indices)[0]
            sequence_indices = tf.cast(sequence_indices, dtype=tf.int32)

            # TensorArray:
            sequence_lengths = tf.TensorArray(dtype=tf.int32,
                                              infer_shape=False,
                                              size=1,
                                              dynamic_size=True,
                                              clear_after_read=False)
            decays = tf.TensorArray(dtype=tf.float32,
                                    infer_shape=False,
                                    size=1,
                                    dynamic_size=True,
                                    clear_after_read=False)

            def update(write_index, sequence_array, length):
                # Write to index, increase
                sequence_array = sequence_array.write(write_index, length)
                return sequence_array, write_index + 1, 0

            def insert_body(index, length, sequence_lengths, write_index,
                            decays):
                # Decay is based on length, so val = decay^length
                decay_val = tf.pow(x=decay,
                                   y=tf.cast(length, dtype=tf.float32))

                # Write decay val into array.
                decays = decays.write(index, decay_val)
                length += 1

                # Update tensor array, reset length to 0.
                sequence_lengths, write_index, length = tf.cond(
                    pred=tf.equal(sequence_indices[index], 1),
                    true_fn=lambda: update(write_index, sequence_lengths,
                                           length),
                    false_fn=lambda: (sequence_lengths, write_index, length))
                return index + 1, length, sequence_lengths, write_index, decays

            def cond(index, length, sequence_lengths, write_index, decays):
                return index < elems

            index, final_length, sequence_lengths, write_index, decays = tf.while_loop(
                cond=cond,
                body=insert_body,
                loop_vars=[0, 0, sequence_lengths, 0, decays],
                back_prop=False)

            # If the final element was terminal -> already included.
            # Decays need no updating because we just wrote them always.
            sequence_lengths, _, _ = tf.cond(
                pred=tf.greater(final_length, 0),
                true_fn=lambda: update(write_index, sequence_lengths,
                                       final_length),
                false_fn=lambda: (sequence_lengths, write_index, final_length))
            return tf.stop_gradient(
                sequence_lengths.stack()), tf.stop_gradient(decays.stack())
        elif get_backend() == "pytorch":
            sequence_lengths = []
            decays = []

            length = 0
            for index in sequence_indices:
                # Compute decay based on sequence length.
                decays.append(pow(decay, length))
                length += 1
                if index == 1:
                    sequence_lengths.append(length)
                    length = 0

            # Append final sequence.
            if length > 0:
                sequence_lengths.append(length)
            return torch.tensor(sequence_lengths,
                                dtype=torch.int32), torch.tensor(
                                    decays, dtype=torch.int32)
    def _graph_fn_reverse_apply_decays_to_sequence(self,
                                                   values,
                                                   sequence_indices,
                                                   decay=0.9):
        """
        Computes decays for sequence indices and applies them (in reverse manner to a sequence of values).
        Useful to compute discounted reward estimates across a sequence of estimates.

        Args:
            values (DataOp): Values to apply decays to.
            sequence_indices (DataOp): Indices denoting sequences, e.g. terminal values.
            decay (float): Initial decay value to start sub-sequence with.

        Returns:
            Decayed sequence values.
        """
        if get_backend() == "tf":
            elems = tf.shape(input=sequence_indices)[0]
            decayed_values = tf.TensorArray(dtype=tf.float32,
                                            infer_shape=False,
                                            size=1,
                                            dynamic_size=True,
                                            clear_after_read=False)
            sequence_indices = tf.cast(sequence_indices, dtype=tf.int32)

            def insert_body(index, prev_v, decayed_values):
                # NOTE: We cannot prev_v to 0.0 because values[index] might have a more complex shape,
                # so this violates shape checks.
                prev_v = tf.cond(pred=tf.equal(
                    sequence_indices[index],
                    tf.ones_like(sequence_indices[index])),
                                 true_fn=lambda: tf.zeros_like(prev_v),
                                 false_fn=lambda: prev_v)
                # index = tf.Print(index, [index, prev_v], summarize=100, message="index, prev = ")

                # Decay is based on length, so val = decay^length
                accum_v = values[index] + decay * prev_v

                # Write decayed val into array.
                decayed_values = decayed_values.write(index, accum_v)
                prev_v = accum_v

                # Increase write-index and length of sub-sequence, decrease loop index in reverse iteration.
                return index - 1, prev_v, decayed_values

            def cond(index, prev_v, decayed_values):
                # Scan in reverse.
                return index >= 0

            _, _, decayed_values = tf.while_loop(
                cond=cond,
                body=insert_body,
                # loop index, index writing to tensor array, current length of sub-sequence, previous val (float)
                loop_vars=[
                    elems - 1,
                    tf.zeros_like(values[-1]), decayed_values
                ],
                back_prop=False)

            decayed_values = decayed_values.stack()
            return tf.stop_gradient(decayed_values)

        elif get_backend() == "pytorch":
            # Scan sequences in reverse:
            decayed_values = []
            i = len(values.data) - 1
            prev_v = 0
            for v in reversed(values.data):
                # Arrived at new sequence, start over.
                if sequence_indices[i] == 1:
                    prev_v = 0

                # Accumulate prior value.
                accum_v = v + decay * prev_v
                decayed_values.append(accum_v)
                prev_v = accum_v

                i -= 1

            # Reverse, convert, and return final.
            return torch.tensor(list(reversed(decayed_values)),
                                dtype=torch.float32)
Example #10
0
    def get_variable(self, name, is_input_feed=False, add_batch_rank=None, add_time_rank=None,
                     time_major=None, is_python=False, local=False, **kwargs):
        add_batch_rank = self.has_batch_rank if add_batch_rank is None else add_batch_rank
        batch_rank = () if add_batch_rank is False else (None,) if add_batch_rank is True else (add_batch_rank,)

        add_time_rank = self.has_time_rank if add_time_rank is None else add_time_rank
        time_rank = () if add_time_rank is False else (None,) if add_time_rank is True else (add_time_rank,)

        time_major = self.time_major if time_major is None else time_major

        if time_major is False:
            shape = batch_rank + time_rank + self.shape
        else:
            shape = time_rank + batch_rank + self.shape

        if is_python is True or get_backend() == "python":
            if isinstance(add_batch_rank, int):
                if isinstance(add_time_rank, int) and add_time_rank > 0:
                    if time_major:
                        var = [[0 for _ in range_(add_batch_rank)] for _ in range_(add_time_rank)]
                    else:
                        print([0 for _ in range_(add_time_rank)])
                        var = [[0 for _ in range_(add_time_rank)] for _ in range_(add_batch_rank)]
                else:
                    var = [0 for _ in range_(add_batch_rank)]
            elif isinstance(add_time_rank, int) and add_time_rank > 0:
                var = [0 for _ in range_(add_time_rank)]
            else:
                var = []

            # Un-indent and just directly construct pytorch?
            if get_backend() == "pytorch" and is_input_feed:
                # Convert to PyTorch tensors as a faux placehodler.
                return torch.zeros(shape, dtype=convert_dtype(dtype=self.dtype, to="pytorch"))
            else:
                # TODO also convert?
                return var

        elif get_backend() == "tf":
            # TODO: re-evaluate the cutting of a leading '/_?' (tf doesn't like it)
            name = re.sub(r'^/_?', "", name)
            if is_input_feed:
                variable = tf.placeholder(dtype=convert_dtype(self.dtype), shape=shape, name=name)
                if self.has_batch_rank:
                    variable._batch_rank = self.has_batch_rank
                if self.has_time_rank:
                    variable._time_rank = self.has_time_rank
            else:
                init_spec = kwargs.pop("initializer", None)
                # Bools should be initializable via 0 or not 0.
                if self.dtype == np.bool_ and isinstance(init_spec, (int, float)):
                    init_spec = (init_spec != 0)

                if self.dtype == np.str_ and init_spec == 0:
                    initializer = None
                else:
                    initializer = Initializer.from_spec(shape=shape, specification=init_spec).initializer

                variable = tf.get_variable(
                    name, shape=shape, dtype=convert_dtype(self.dtype), initializer=initializer,
                    collections=[tf.GraphKeys.GLOBAL_VARIABLES if local is False else tf.GraphKeys.LOCAL_VARIABLES],
                    **kwargs
                )
            # Add batch/time rank flags to the op.
            if self.has_batch_rank:
                variable._batch_rank = 0 if self.time_major is False else 1
            if self.has_time_rank:
                variable._time_rank = 1 if self.time_major is False else 0
            return variable
Example #11
0
 def _graph_fn_get_distribution(self, parameters):
     if get_backend() == "tf":
         if self.parameterize_via_diagonal:
             return tfp.distributions.MultivariateNormalDiag(
                 loc=parameters[0], scale_diag=parameters[1]
             )
Example #12
0
 def tensor_backed_bounds(self):
     if get_backend() == "pytorch":
         return torch.tensor(self.low), torch.tensor(self.high)
     else:
         return self.low, self.high
    def _graph_fn_loss_per_item(self,
                                parameters,
                                labels,
                                sequence_length=None,
                                time_percentage=None):
        """
        Supervised cross entropy classification loss.

        Args:
            parameters (SingleDataOp): The parameters output by a DistributionAdapter (before sampling from a
                possible distribution).

            labels (SingleDataOp): The corresponding labels (ideal probabilities) or int categorical labels.
            sequence_length (SingleDataOp[int]): The lengths of each sequence (if applicable) in the given batch.

            time_percentage (SingleDataOp[bool]): The time-percentage (0.0 to 1.0) with respect to the max number of
                timesteps.

        Returns:
            SingleDataOp: The loss values vector (one single value for each batch item).
        """
        if get_backend() == "tf":
            batch_rank = parameters._batch_rank
            time_rank = 0 if batch_rank == 1 else 1

            # TODO: This softmaxing is duplicate computation (waste) as `parameters` are already softmaxed.
            if self.sparse is True:
                cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=labels, logits=parameters)
            else:
                cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(
                    labels=labels, logits=parameters)

            # TODO: Make it possible to customize the time-step decay (or increase?) behavior.
            # Weight over time-steps (linearly decay weighting over time rank, cutting out entirely values past the
            # sequence length).
            if sequence_length is not None:
                # Add KL Divergence between given distribution and uniform.
                if self.with_kl_regularizer is True:
                    uniform_probs = tf.fill(
                        tf.shape(parameters),
                        1.0 / float(parameters.shape.as_list()[-1]))
                    # Subtract KL-divergence from loss term such that
                    kl = -tf.reduce_sum(uniform_probs * tf.log(
                        (tf.maximum(parameters, SMALL_NUMBER)) /
                        uniform_probs),
                                        axis=-1)
                    cross_entropy += kl

                max_time_steps = tf.cast(tf.shape(labels)[time_rank],
                                         dtype=tf.float32)
                sequence_mask = tf.sequence_mask(sequence_length,
                                                 max_time_steps,
                                                 dtype=tf.float32)
                # no sequence decay anymore (no one does this):
                # sequence_decay = tf.range(start=1.0, limit=0.0, delta=-1.0 / max_time_steps, dtype=tf.float32)
                # sequence_decay = tf.range(start=0.5, limit=1.0, delta=0.5 / max_time_steps, dtype=tf.float32)
                weighting = sequence_mask  # * sequence_decay
                cross_entropy = tf.multiply(cross_entropy, weighting)

                # Reduce away the time-rank.
                cross_entropy = tf.reduce_sum(cross_entropy, axis=time_rank)
                # Average?
                if self.average_time_steps is True:
                    cross_entropy = tf.divide(
                        cross_entropy,
                        tf.cast(sequence_length, dtype=tf.float32))
            else:
                # Reduce away the time-rank.
                if hasattr(parameters, "_time_rank"):
                    cross_entropy = tf.reduce_sum(cross_entropy,
                                                  axis=time_rank)

            return cross_entropy
Example #14
0
def parse_execution_spec(execution_spec):
    """
    Parses execution parameters and inserts default values where necessary.

    Args:
        execution_spec (Optional[dict]): Execution spec dict. Must specify an execution mode
            "single" or "distributed". If mode "distributed", must specify a "distributed_spec"
            containing:
             - a key cluster_spec mapping to a ClusterSpec object,
             - a "job" for the job name,
             - an integer "task_index"

    Returns:
        dict: The sanitized execution_spec dict.
    """
    # TODO these are tensorflow specific
    # If no spec given.
    if get_backend() == "tf":
        default_spec = dict(
            mode="single",
            distributed_spec=None,
            # Using a monitored session enabling summaries and hooks per default.
            disable_monitoring=False,
            # Gpu settings.
            gpu_spec=dict(
                # Are GPUs allowed to be used if they are detected?
                gpus_enabled=False,
                # If yes, how many GPUs are to be used?
                max_usable_gpus=0,
                # If True, use `max_usable_gpus` fake-GPUs (CPU) iff no GPUs are available.
                fake_gpus_if_necessary=False,
                # Specify specific CUDA devices to be used, e.g. gpu 0 and 2 = [0, 2].
                # If None, we use CUDA devices [0, max_usable_gpus - 1]
                cuda_devices=None,
                # Fraction of the overall amount of memory that each visible GPU should be allocated.
                per_process_gpu_memory_fraction=None,
                # If True, not all memory will be allocated which is relevant on shared resources.
                allow_memory_growth=False),
            # Device placement settings.
            device_strategy="default",
            default_device=None,
            device_map={},
            session_config=None,
            # Random seed for the tf graph.
            seed=None,
            # Enabling the tf profiler?
            enable_profiler=False,
            # With which frequency do we print out profiler information?
            profiler_frequency=1000,
            # Enabling a timeline write?
            enable_timeline=False,
            # With which frequency do we write out a timeline file?
            timeline_frequency=1,
        )
        execution_spec = default_dict(execution_spec, default_spec)

        # Sub specifications:

        # Distributed specifications.
        if execution_spec.get("mode") == "distributed":
            default_distributed = dict(job="ps",
                                       task_index=0,
                                       cluster_spec=dict(
                                           ps=["localhost:22222"],
                                           worker=["localhost:22223"]),
                                       protocol=None)
            execution_spec["distributed_spec"] = default_dict(
                execution_spec.get("distributed_spec"), default_distributed)

        # Session config.
        default_session_config = dict(type="monitored-training-session",
                                      allow_soft_placement=True,
                                      log_device_placement=False)
        execution_spec["session_config"] = default_dict(
            execution_spec.get("session_config"), default_session_config)
    elif get_backend() == "pytorch":
        # No session configs, different GPU options.
        default_spec = dict(
            mode="single",
            distributed_spec=None,
            # Using a monitored session enabling summaries and hooks per default.
            disable_monitoring=False,
            # Gpu settings.
            gpu_spec=dict(
                # Are GPUs allowed to be used if they are detected?
                gpus_enabled=False,
                # If yes, how many GPUs are to be used?
                max_usable_gpus=0,
                # Specify specific CUDA devices to be used, e.g. gpu 0 and 2 = [0, 2].
                # If None, we use CUDA devices [0, max_usable_gpus - 1]
                cuda_devices=None),
            # Device placement settings.
            device_strategy="default",
            default_device=None,
            device_map={},
            # TODO potentially set to nproc?
            torch_num_threads=1,
            OMP_NUM_THREADS=1)
        execution_spec = default_dict(execution_spec, default_spec)

    return execution_spec
Example #15
0
    def _graph_fn_apply(self, preprocessing_inputs):
        """
        Gray-scales images of arbitrary rank.
        Normally, the images' rank is 3 (width/height/colors), but can also be: batch/width/height/colors, or any other.
        However, the last rank must be of size: len(self.weights).

        Args:
            preprocessing_inputs (tensor): Single image or a batch of images to be gray-scaled (last rank=n colors, where
                n=len(self.weights)).

        Returns:
            DataOp: The op for processing the images.
        """
        # The reshaped weights used for the grayscale operation.
        if isinstance(preprocessing_inputs, list):
            preprocessing_inputs = np.asarray(preprocessing_inputs)
        images_shape = get_shape(preprocessing_inputs)
        assert images_shape[-1] == self.last_rank,\
            "ERROR: Given image's shape ({}) does not match number of weights (last rank must be {})!".\
            format(images_shape, self.last_rank)
        if self.backend == "python" or get_backend() == "python":
            if preprocessing_inputs.ndim == 4:
                grayscaled = []
                for i in range_(len(preprocessing_inputs)):
                    scaled = cv2.cvtColor(preprocessing_inputs[i], cv2.COLOR_RGB2GRAY)
                    grayscaled.append(scaled)
                scaled_images = np.asarray(grayscaled)

                # Keep last dim.
                if self.keep_rank:
                    scaled_images = scaled_images[:, :, :, np.newaxis]
            else:
                # Sample by sample.
                scaled_images = cv2.cvtColor(preprocessing_inputs, cv2.COLOR_RGB2GRAY)

            return scaled_images
        elif get_backend() == "pytorch":
            if len(preprocessing_inputs.shape) == 4:
                grayscaled = []
                for i in range_(len(preprocessing_inputs)):
                    scaled = cv2.cvtColor(preprocessing_inputs[i].numpy(), cv2.COLOR_RGB2GRAY)
                    grayscaled.append(scaled)
                scaled_images = np.asarray(grayscaled)
                # Keep last dim.
                if self.keep_rank:
                    scaled_images = scaled_images[:, :, :, np.newaxis]
            else:
                # Sample by sample.
                scaled_images = cv2.cvtColor(preprocessing_inputs.numpy(), cv2.COLOR_RGB2GRAY)
            return torch.tensor(scaled_images)
        elif get_backend() == "tf":
            weights_reshaped = np.reshape(
                self.weights, newshape=tuple([1] * (get_rank(preprocessing_inputs) - 1)) + (self.last_rank,)
            )

            # Do we need to convert?
            # The dangerous thing is that multiplying an int tensor (image) with float weights results in an all
            # 0 tensor).
            if "int" in str(dtype_(preprocessing_inputs.dtype)):
                weighted = weights_reshaped * tf.cast(preprocessing_inputs, dtype=dtype_("float"))
            else:
                weighted = weights_reshaped * preprocessing_inputs

            reduced = tf.reduce_sum(weighted, axis=-1, keepdims=self.keep_rank)

            # Cast back to original dtype.
            if "int" in str(dtype_(preprocessing_inputs.dtype)):
                reduced = tf.cast(reduced, dtype=preprocessing_inputs.dtype)

            return reduced
Example #16
0
    def _graph_fn_bootstrap_values(self,
                                   rewards,
                                   values,
                                   terminals,
                                   sequence_indices,
                                   discount=0.99):
        """
        Inserts value estimates at the end of each sub-sequence for a given sequence and computes deltas
        for generalized advantage estimation. That is, 0 is inserted after teach terminal and the final value of the
        sub-sequence if the sequence does not end with a terminal. We then compute for each subsequence

        delta = reward + discount * bootstrapped_values[1:] - bootstrapped_values[:-1]


        Args:
            rewards (DataOp): Rewards for the observed sequences.
            values (DataOp): Value estimates for the observed sequences.
            terminals (DataOp): Terminals in sequences
            sequence_indices (DataOp): Int indices denoting sequences (which may be non-terminal episode fragments
                from multiple environments.
            discount (float): Discount to apply to delta computation.

        Returns:
            Sequence of deltas.
        """
        if get_backend() == "tf":
            values = tf.squeeze(input=values)
            num_values = tf.shape(input=values)[0]

            # Again ensure last index is 1 for any sub-sample arriving here.
            last_sequence = tf.expand_dims(sequence_indices[-1], -1)
            sequence_indices = tf.concat([
                sequence_indices[:-1],
                tf.ones_like(last_sequence, dtype=tf.bool)
            ],
                                         axis=0)

            # Cannot use 0.0 because unknown shape.
            deltas = tf.TensorArray(dtype=tf.float32,
                                    infer_shape=False,
                                    size=num_values,
                                    dynamic_size=False,
                                    clear_after_read=False,
                                    name="bootstrap-deltas")

            # Boot-strap with 0 only if terminals[i] and sequence_indices[i] are both true.
            boot_strap_zeros = tf.where(
                condition=tf.logical_and(sequence_indices, terminals),
                x=tf.ones_like(sequence_indices),
                y=tf.zeros_like(sequence_indices),
            )

            def write(index, deltas, start_index):
                # First: Concat the slice of values representing the current sequence with bootstrap value.
                baseline_slice = values[start_index:index + 1]
                # Expand so value has a batch dim when we concat.

                # If true terminal, append 0. Otherwise, append boot-strap val -> last observed val.
                bootstrap_value = tf.cond(
                    pred=tf.equal(boot_strap_zeros[index],
                                  tf.ones_like(sequence_indices[index])),
                    true_fn=lambda: tf.zeros_like(tensor=values[index],
                                                  dtype=tf.float32),
                    false_fn=lambda: values[index],
                )

                value = tf.expand_dims(bootstrap_value, 0)
                adjusted_v = tf.concat([baseline_slice, value], axis=0)

                # Compute deltas for this sequence.
                sequence_deltas = rewards[
                    start_index:index +
                    1] + discount * adjusted_v[1:] - adjusted_v[:-1]

                # Write delta to tensor-array.
                write_indices = tf.range(start=start_index, limit=index + 1)
                deltas = deltas.scatter(write_indices, sequence_deltas)

                start_index = index + 1
                # Set start-index for the next sub-sequence to index + 1
                return deltas, start_index

            def body(index, start_index, deltas):
                # Whenever we encounter a sequence end, we compute deltas for that sequence.
                deltas, start_index = tf.cond(
                    pred=tf.equal(sequence_indices[index],
                                  tf.ones_like(sequence_indices[index])),
                    true_fn=lambda: write(index, deltas, start_index),
                    false_fn=lambda: (deltas, start_index))
                return index + 1, start_index, deltas

            def cond(index, start_index, deltas):
                return index < num_values

            index, start_index, deltas = tf.while_loop(
                cond=cond,
                body=body,
                loop_vars=[0, 0, deltas],
                parallel_iterations=1,
                back_prop=False)

            deltas = deltas.stack()
            # Squeeze because we inserted
            return tf.squeeze(deltas)
        elif get_backend() == "pytorch":
            deltas = []
            discount_tensor = torch.tensor(discount)
            start_index = 0
            i = 0
            if len(values) > 1:
                last_sequence = torch.unsqueeze(sequence_indices[-1], -1)
                sequence_indices = torch.cat(
                    (sequence_indices[:-1], torch.ones_like(last_sequence)), 0)

            for _ in range(len(values)):
                if sequence_indices[i]:
                    # Compute deltas for this sub-sequence.
                    # Cannot do this all at once because we would need the correct offsets for each sub-sequence.
                    baseline_slice = list(values[start_index:i + 1])
                    if terminals[i]:
                        baseline_slice.append(0)
                    else:
                        baseline_slice.append(values[-1])
                    adjusted_v = torch.tensor(baseline_slice)

                    # +1 because we want to include i-th value.
                    delta = rewards[
                        start_index:i +
                        1] + discount_tensor * adjusted_v[1:] - adjusted_v[:-1]
                    deltas.extend(delta)
                    start_index = i + 1
                i += 1

            return torch.tensor(deltas)
Example #17
0
    def _graph_fn_calc_v_trace_values(self, logits_actions_pi,
                                      log_probs_actions_mu, actions,
                                      actions_flat, discounts, rewards, values,
                                      bootstrapped_values):
        """
        Returns the V-trace values calculated from log importance weights (see [1] for details).
        Calculation:
        vs = V(xs) + SUM[t=s to s+N-1]( gamma^t-s * ( PROD[i=s to t-1](ci) ) * dt_V )
        with:
            dt_V = rho_t * (rt + gamma V(xt+1) - V(xt))
            rho_t and ci being the clipped IS weights

        Args:
            logits_actions_pi (SingleDataOp): The raw logits output of the pi-network (one logit per discrete action).
            log_probs_actions_mu (SingleDataOp): The log-probs of the mu-network (one log-prob per discrete action).
            actions (SingleDataOp): The (int encoded) actually taken actions.
            actions_flat (SingleDataOp): The one-hot converted actually taken actions.
            discounts (SingleDataOp): DataOp (time x batch x values) holding the discounts collected when stepping
                through the environment (for the timesteps s=t to s=t+N-1).
            rewards (SingleDataOp): DataOp (time x batch x values) holding the rewards collected when stepping
                through the environment (for the timesteps s=t to s=t+N-1).
            values (SingleDataOp): DataOp (time x batch x values) holding the the value function estimates
                wrt. the learner's policy (pi) (for the timesteps s=t to s=t+N-1).
            bootstrapped_values (SingleDataOp): DataOp (time(1) x batch x values) holding the last (bootstrapped)
                value estimate to use as a value function estimate after n time steps (V(xs) for s=t+N).

        Returns:
            tuple:
                - v-trace values (vs) in time x batch dimensions used to train the value-function (baseline).
                - PG-advantage values in time x batch dimensions used for training via policy gradient with baseline.
        """
        # Simplified (not performance optimized!) numpy implementation of v-trace for testing purposes.
        if get_backend() == "python" or self.backend == "python":
            probs_actions_pi = softmax(logits_actions_pi, axis=-1)
            log_probs_actions_pi = np.log(probs_actions_pi)

            log_is_weights = log_probs_actions_pi - log_probs_actions_mu  # log(a/b) = log(a) - log(b)
            log_is_weights_actions_taken = np.sum(log_is_weights *
                                                  actions_flat,
                                                  axis=-1,
                                                  keepdims=True)
            is_weights = np.exp(log_is_weights_actions_taken)

            # rho_t = min(rho_bar, is_weights) = [1.0, 1.0], [0.67032005, 1.0], [1.0, 0.36787944]
            if self.rho_bar is not None:
                rho_t = np.minimum(self.rho_bar, is_weights)
            else:
                rho_t = is_weights

            # Same for rho-PG (policy gradients).
            if self.rho_bar_pg is not None:
                rho_t_pg = np.minimum(self.rho_bar_pg, is_weights)
            else:
                rho_t_pg = is_weights

            # Calculate ci terms for all timesteps:
            # ci = min(c_bar, is_weights) = [1.0, 1.0], [0.67032005, 1.0], [1.0, 0.36787944]
            if self.c_bar is not None:
                c_i = np.minimum(self.c_bar, is_weights)
            else:
                c_i = is_weights

            # Values t+1 -> shift by one time step.
            values_t_plus_1 = np.concatenate((values[1:], bootstrapped_values),
                                             axis=0)
            deltas = rho_t * (rewards + discounts * values_t_plus_1 - values)

            # Reverse everything for recursive v_s calculation.
            discounts_reversed = discounts[::-1]
            c_i_reversed = c_i[::-1]
            deltas_reversed = deltas[::-1]

            vs_minus_v_xs = [
                np.zeros_like(np.squeeze(bootstrapped_values, axis=0))
            ]

            # Do the recursive calculations.
            for d, c, delta in zip(discounts_reversed, c_i_reversed,
                                   deltas_reversed):
                vs_minus_v_xs.append(delta + d * c * vs_minus_v_xs[-1])

            # Convert into numpy array and revert back.
            vs_minus_v_xs = np.array(vs_minus_v_xs[::-1])[:-1]

            # Add V(x_s) to get v_s.
            vs = vs_minus_v_xs + values

            # Advantage for policy gradient.
            vs_t_plus_1 = np.concatenate([vs[1:], bootstrapped_values], axis=0)
            pg_advantages = (rho_t_pg *
                             (rewards + discounts * vs_t_plus_1 - values))

            return vs, pg_advantages

        elif get_backend() == "tf":
            # Calculate the log IS-weight values via: logIS = log(pi(a|s)) - log(mu(a|s)).
            # Use the action_probs_pi values only of the actions actually taken.
            log_probs_actions_taken_pi = tf.expand_dims(
                -tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=logits_actions_pi, labels=actions),
                axis=-1)
            log_probs_actions_taken_mu = tf.reduce_sum(
                input_tensor=log_probs_actions_mu * actions_flat,
                axis=-1,
                keepdims=True,
                name="log-probs-actions-taken-mu")
            log_is_weights = log_probs_actions_taken_pi - log_probs_actions_taken_mu

            is_weights = tf.exp(x=log_is_weights, name="is-weights-from-logs")

            # Apply rho-bar (also for PG) and c-bar clipping to all IS-weights.
            if self.rho_bar is not None:
                rho_t = tf.minimum(x=self.rho_bar,
                                   y=is_weights,
                                   name="clip-rho-bar")
            else:
                rho_t = is_weights

            if self.rho_bar_pg is not None:
                rho_t_pg = tf.minimum(x=self.rho_bar_pg,
                                      y=is_weights,
                                      name="clip-rho-bar-pg")
            else:
                rho_t_pg = is_weights

            if self.c_bar is not None:
                c_i = tf.minimum(x=self.c_bar, y=is_weights, name="clip-c-bar")
            else:
                c_i = is_weights

            # This is the same vector as `values` except that it will be shifted by 1 timestep to the right and
            # include - as the last item - the bootstrapped V value at s=t+N.
            values_t_plus_1 = tf.concat(
                values=[values[1:], bootstrapped_values],
                axis=0,
                name="values-t-plus-1")
            # Calculate the temporal difference terms (delta-t-V in the paper) for each s=t to s=t+N-1.
            dt_vs = rho_t * (rewards + discounts * values_t_plus_1 - values)

            # V-trace values can be calculated recursively (starting from the end of a trajectory) via:
            #    vs = V(xs) + dsV + gamma * cs * (vs+1 - V(s+1))
            # => (vs - V(xs)) = dsV + gamma * cs * (vs+1 - V(s+1))
            # We will thus calculate all terms: [vs - V(xs)] for all timesteps first, then add V(xs) again to get the
            # v-traces.
            elements = (tf.reverse(tensor=discounts,
                                   axis=[0],
                                   name="revert-discounts"),
                        tf.reverse(tensor=c_i, axis=[0], name="revert-c-i"),
                        tf.reverse(tensor=dt_vs, axis=[0],
                                   name="revert-dt-vs"))

            def scan_func(vs_minus_v_xs_, elements_):
                gamma_t, c_t, dt_v = elements_
                return dt_v + gamma_t * c_t * vs_minus_v_xs_

            vs_minus_v_xs = tf.scan(
                fn=scan_func,
                elems=elements,
                initializer=tf.zeros_like(
                    tensor=tf.squeeze(bootstrapped_values, axis=0)),
                parallel_iterations=1,
                back_prop=False,
                name="v-trace-scan")
            # Reverse the results back to original order.
            vs_minus_v_xs = tf.reverse(tensor=vs_minus_v_xs,
                                       axis=[0],
                                       name="revert-vs-minus-v-xs")

            # Add V(xs) to get vs.
            vs = tf.add(x=vs_minus_v_xs, y=values)

            # Calculate the advantage values (for policy gradient loss term) according to:
            # A = Q - V with Q based on vs (v-trace) values: qs = rs + gamma * vs and V being the
            # approximate value function output.
            vs_t_plus_1 = tf.concat(values=[vs[1:], bootstrapped_values],
                                    axis=0)
            pg_advantages = rho_t_pg * (rewards + discounts * vs_t_plus_1 -
                                        values)

            # Return v-traces and policy gradient advantage values based on: A=r+gamma*v-trace(s+1) - V(s).
            # With `r+gamma*v-trace(s+1)` also called `qs` in the paper.
            return tf.stop_gradient(vs), tf.stop_gradient(pg_advantages)
Example #18
0
    def _graph_fn_calc_sequence_lengths(self, sequence_indices):
        """
        Computes sequence lengths for a tensor containing sequence indices, where 1 indicates start
        of a new sequence.
        Args:
            sequence_indices (DataOp): Indices denoting sequences, e.g. terminal values.
        Returns:
            Sequence lengths.
        """
        if get_backend() == "tf":
            # TensorArray:
            elems = tf.shape(input=sequence_indices)[0]
            sequence_lengths = tf.TensorArray(dtype=tf.int32,
                                              infer_shape=False,
                                              size=1,
                                              dynamic_size=True,
                                              clear_after_read=False)

            def update(write_index, sequence_array, length):
                # Write to index, increase
                sequence_array = sequence_array.write(write_index, length)
                return sequence_array, write_index + 1, 0

            def insert_body(index, length, sequence_lengths, write_index):
                length += 1

                # Update tensor array, reset length to 0.
                sequence_lengths, write_index, length = tf.cond(
                    pred=tf.equal(sequence_indices[index],
                                  tf.ones_like(sequence_indices[index])),
                    true_fn=lambda: update(write_index, sequence_lengths,
                                           length),
                    false_fn=lambda: (sequence_lengths, write_index, length))
                return index + 1, length, sequence_lengths, write_index

            def cond(index, length, sequence_lengths, write_index):
                return index < elems

            _, final_length, sequence_lengths, write_index = tf.while_loop(
                cond=cond,
                body=insert_body,
                loop_vars=[0, 0, sequence_lengths, 0],
                back_prop=False)
            # If the final element was terminal -> already included.
            sequence_lengths, _, _ = tf.cond(
                pred=tf.greater(final_length, 0),
                true_fn=lambda: update(write_index, sequence_lengths,
                                       final_length),
                false_fn=lambda: (sequence_lengths, write_index, final_length))
            return sequence_lengths.stack()
        elif get_backend() == "pytorch":
            sequence_lengths = []
            length = 0
            for index in sequence_indices:
                length += 1
                if index == 1:
                    sequence_lengths.append(length)
                    length = 0
            # Append final sequence.
            if length > 0:
                sequence_lengths.append(length)
            return torch.tensor(sequence_lengths, dtype=torch.int32)
Example #19
0
    def _graph_fn_get_probabilities_log_probs(self, logits):
        """
        Creates properties/parameters and log-probs from some reshaped output.

        Args:
            logits (SingleDataOp): The output of some layer that is already reshaped
                according to our action Space.

        Returns:
            tuple (2x SingleDataOp):
                parameters (DataOp): The parameters, ready to be passed to a Distribution object's
                    get_distribution API-method (usually some probabilities or loc/scale pairs).
                log_probs (DataOp): Simply the log(parameters).
        """
        if get_backend() == "tf":
            if isinstance(self.action_space, IntBox):
                # Discrete actions.
                parameters = tf.maximum(x=tf.nn.softmax(logits=logits,
                                                        axis=-1),
                                        y=SMALL_NUMBER)
                parameters._batch_rank = 0
                # Log probs.
                log_probs = tf.log(x=parameters)
                log_probs._batch_rank = 0
            elif isinstance(self.action_space, FloatBox):
                # Continuous actions.
                mean, log_sd = tf.split(value=logits,
                                        num_or_size_splits=2,
                                        axis=1)
                # Remove moments rank.
                mean = tf.squeeze(input=mean, axis=1)
                log_sd = tf.squeeze(input=log_sd, axis=1)

                # Clip log_sd. log(SMALL_NUMBER) is negative.
                log_sd = tf.clip_by_value(t=log_sd,
                                          clip_value_min=log(SMALL_NUMBER),
                                          clip_value_max=-log(SMALL_NUMBER))

                # Turn log sd into sd.
                sd = tf.exp(x=log_sd)

                parameters = DataOpTuple(mean, sd)
                log_probs = DataOpTuple(tf.log(x=mean), log_sd)
            else:
                raise NotImplementedError

            return parameters, log_probs

        elif get_backend() == "pytorch":
            if isinstance(self.action_space, IntBox):
                # Discrete actions.
                softmax_logits = torch.softmax(logits, dim=-1)
                parameters = torch.max(softmax_logits, SMALL_NUMBER_TORCH)
                # Log probs.
                log_probs = torch.log(parameters)
            elif isinstance(self.action_space, FloatBox):
                # Continuous actions.
                mean, log_sd = torch.split(logits,
                                           split_size_or_sections=2,
                                           dim=1)
                # Remove moments rank.
                mean = torch.squeeze(mean, dim=1)
                log_sd = torch.squeeze(log_sd, dim=1)

                # Clip log_sd. log(SMALL_NUMBER) is negative.
                log_sd = torch.clamp(log_sd,
                                     min=LOG_SMALL_NUMBER,
                                     max=-LOG_SMALL_NUMBER)

                # Turn log sd into sd.
                sd = torch.exp(log_sd)

                parameters = DataOpTuple(mean, sd)
                log_probs = DataOpTuple(torch.log(mean), log_sd)
            else:
                raise NotImplementedError

            return parameters, log_probs
Example #20
0
    def _graph_fn_decayed_value(self, time_step):
        """
        Args:
            time_step (DataOp): The int-type DataOp that holds the current global time_step.

        Returns:
            DataOp: The decay'd value depending on the current time step.
        """
        if get_backend() == "tf":
            smaller_than_start = time_step <= self.start_timestep

            shape = tf.shape(time_step)
            # time_step comes in as a time-sequence of time-steps.
            if shape.shape[0] > 0:
                return tf.where(
                    condition=smaller_than_start,
                    # We are still in pre-decay time.
                    x=tf.tile(tf.constant([self.from_]), multiples=shape),
                    # We are past pre-decay time.
                    y=tf.where(
                        condition=(time_step >=
                                   self.start_timestep + self.num_timesteps),
                        # We are in post-decay time.
                        x=tf.tile(tf.constant([self.to_]), multiples=shape),
                        # We are inside the decay time window.
                        y=self._graph_fn_decay(
                            tf.cast(x=time_step - self.start_timestep,
                                    dtype=util.convert_dtype("float"))),
                        name="cond-past-end-time"),
                    name="cond-before-start-time")
            # Single 0D time step.
            else:
                return tf.cond(
                    pred=smaller_than_start,
                    # We are still in pre-decay time.
                    true_fn=lambda: self.from_,
                    # We are past pre-decay time.
                    false_fn=lambda: tf.cond(
                        pred=(time_step >= self.start_timestep + self.
                              num_timesteps),
                        # We are in post-decay time.
                        true_fn=lambda: self.to_,
                        # We are inside the decay time window.
                        false_fn=lambda: self._graph_fn_decay(
                            tf.cast(x=time_step - self.start_timestep,
                                    dtype=util.convert_dtype("float"))),
                    ),
                )
        elif get_backend() == "pytorch":
            if time_step is None:
                time_step = torch.tensor([0])
            smaller_than_start = time_step <= self.start_timestep
            if time_step.dim() == 0:
                time_step = time_step.unsqueeze(-1)
            shape = time_step.shape
            # time_step comes in as a time-sequence of time-steps.
            # TODO tile shape is confusing -> num tiles should be shape[0] not shape?
            if shape[0] > 0:
                past_decay = torch.where(
                    (time_step >= self.start_timestep + self.num_timesteps),
                    # We are in post-decay time.
                    pytorch_tile(torch.tensor([self.to_]), shape),
                    # We are inside the decay time window.
                    torch.tensor(
                        self._graph_fn_decay(
                            torch.FloatTensor(
                                [time_step - self.start_timestep]))))
                return torch.where(
                    smaller_than_start,
                    # We are still in pre-decay time.
                    pytorch_tile(torch.tensor([self.from_]), shape),
                    # We are past pre-decay time.
                    past_decay)
            # Single 0D time step.
            else:
                if smaller_than_start:
                    return self.from_
                else:
                    if time_step >= self.start_timestep + self.num_timesteps:
                        return self.to_
                    else:
                        return self._graph_fn_decay(
                            torch.FloatTensor(
                                [time_step - self.start_timestep]))
Example #21
0
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from rlgraph import get_backend
from rlgraph.components.component import Component
from rlgraph.components.layers.nn.lstm_layer import LSTMLayer
from rlgraph.components.neural_networks.stack import Stack
from rlgraph.utils import force_tuple, force_list
from rlgraph.utils.decorators import rlgraph_api

if get_backend() == "pytorch":
    import torch


class NeuralNetwork(Stack):
    """
    A NeuralNetwork is a Stack, in which the `apply` method is defined either by custom-API-method OR by connecting
    through all sub-Components' `apply` methods.
    In both cases, a dict should be returned with at least the `output` key set. Possible further keys could
    be `last_internal_states` for RNN-based NNs and other keys.

    No other API methods other than `apply` should be defined/used.

    TODO: A NeuralNetwork Component correctly handles RNN layers in terms of
    """
    def __init__(self, *layers, **kwargs):
Example #22
0
    def __init__(
            self,
            component,
            input_spaces=None,
            action_space=None,
            seed=10,
            logging_level=None,
            execution_spec=None,
            # TODO: Move all the below into execution_spec just like for Agent class.
            enable_profiler=False,
            disable_monitoring=False,
            device_strategy="default",
            device_map=None,
            backend=None,
            auto_build=True):
        """
        Args:
            component (Component): The Component to be tested (may contain sub-components).
            input_spaces (Optional[dict]): Dict with component's API input-parameter' names as keys and Space objects
                or Space specs as values. Describes the input Spaces for the component.
                None, if the Component to be tested has no API methods with input parameters.
            action_space (Optional[Space]): The action space to pass into the GraphBuilder.
            seed (Optional[int]): The seed to use for random-seeding the Model object.
                If None, do not seed the Graph (things may behave non-deterministically).
            logging_level (Optional[int]): When provided, sets RLGraph's root_logger's logging level to this value.
            execution_spec (Optional[dict]): Specification dict for execution settings.
            enable_profiler (bool): When enabled, activates backend profiling. Default: False.
            disable_monitoring (bool): When True, will not use a monitored session. Default: False.
            device_strategy (str): Optional device-strategy to be passed into GraphExecutor.
            device_map (Optional[Dict[str,str]]): Optional device-map to be passed into GraphExecutor.
            backend (Optional[str]): Override global backend settings for a test by passing in a specific
                backend, convenience method.
            auto_build (Optional[bool]): If false, build has to be triggered manually to eval build stats.
        """
        self.seed = seed
        np.random.seed(seed)
        random.seed(seed)

        if logging_level is not None:
            root_logger.setLevel(logging_level)

        # Create a GraphBuilder.
        self.graph_builder = GraphBuilder(action_space=action_space)
        self.component = component
        self.input_spaces = input_spaces

        # Build the model.
        execution_spec = parse_execution_spec(
            execution_spec or dict(seed=self.seed,
                                   enable_profiler=enable_profiler,
                                   profiler_frequency=1,
                                   device_strategy=device_strategy,
                                   disable_monitoring=disable_monitoring,
                                   device_map=device_map))
        use_backend = backend if backend is not None else get_backend()
        self.graph_executor = GraphExecutor.from_spec(
            use_backend,
            graph_builder=self.graph_builder,
            execution_spec=execution_spec)
        if auto_build:
            self.build()
        else:
            print(
                "Auto-build false, did not build. Waiting for manual build..")
Example #23
0
        def call(*args):
            if isinstance(self.output_spaces, dict):
                assert method_name in self.output_spaces, "ERROR: Method '{}' not specified in output_spaces: {}!".\
                    format(method_name, self.output_spaces)
                specs = self.output_spaces[method_name]
            else:
                specs = self.output_spaces(method_name)

            if specs is None:
                raise RLGraphError(
                    "No Space information received for method '{}:{}'".format(self.class_.__name__, method_name)
                )

            dtypes = []
            shapes = []
            return_slots = []
            for i, space in enumerate(force_list(specs)):
                assert not isinstance(space, ContainerSpace)
                # Expecting an op (space 0).
                if space == 0:
                    dtypes.append(0)
                    shapes.append(0)
                    return_slots.append(i)
                # Expecting a tensor.
                elif space is not None:
                    dtypes.append(dtype(space.dtype))
                    shapes.append(space.shape)
                    return_slots.append(i)

            if get_backend() == "tf":
                # This function will send the method-call-comment via the out-pipe to the remote (server) Specifiable
                # object - all in-graph - and return the results to be used further by other graph ops.
                def py_call(*args_):
                    args_ = [arg.decode('UTF-8') if isinstance(arg, bytes) else arg for arg in args_]
                    try:
                        self.out_pipe.send(args_)
                        result_ = self.out_pipe.recv()

                        # If an error occurred, it'll be passed back through the pipe.
                        if isinstance(result_, Exception):
                            raise result_
                        elif result_ is not None:
                            return result_

                    except Exception as e:
                        if isinstance(e, IOError):
                            raise StopIteration()  # Clean exit.
                        else:
                            raise

                results = tf.py_func(py_call, (method_name,) + tuple(args), dtypes, name=method_name)

                # Force known shapes on the returned tensors.
                for i, (result, shape) in enumerate(zip(results, shapes)):
                    # Not an op (which have shape=0).
                    if shape != 0:
                        result.set_shape(shape)
            else:
                raise NotImplementedError

            return results[0] if len(dtypes) == 1 else tuple(results)
Example #24
0
 def _graph_fn_set_episode_reward(root, episode_reward):
     if get_backend() == "tf":
         return tf.assign(root.episode_reward, episode_reward)
     elif get_backend() == "pytorch":
         root.episode_reward = episode_reward
         return None
Example #25
0
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from functools import partial

from rlgraph import get_backend
from rlgraph.utils.rlgraph_errors import RLGraphError

if get_backend() == "tf":
    import tensorflow as tf
elif get_backend() == "pytorch":
    import torch.nn as nn


def get_activation_function(activation_function=None, *other_parameters):
    """
    Returns an activation function (callable) to use in a NN layer.

    Args:
        activation_function (Optional[callable,str]): The activation function to lookup. Could be given as:
            - already a callable (return just that)
            - a lookup key (str)
            - None: Use linear activation.
Example #26
0
 def _graph_fn_simple_square_loss(self):
     loss = None
     if get_backend() == "tf":
         loss = tf.square(x=tf.log(self.variable))
     return loss
Example #27
0
 def _graph_fn_group(root, *ops):
     if get_backend() == "tf":
         return tf.group(*ops)
     return ops[0]
 def _graph_fn_reset(self, *preprocessor_resets):
     if get_backend() == "tf":
         with tf.control_dependencies(preprocessor_resets):
             return tf.no_op()
Example #29
0
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import contextlib

from rlgraph import get_backend

# TF specific scope/device utilities.
if get_backend() == "tf":
    import tensorflow as tf

    @contextlib.contextmanager
    def pin_global_variables(device):
        """Pins global variables to the specified device."""
        def getter(getter, *args, **kwargs):
            var_collections = kwargs.get('collections', None)
            if var_collections is None:
                var_collections = [tf.GraphKeys.GLOBAL_VARIABLES]
            if tf.GraphKeys.GLOBAL_VARIABLES in var_collections:
                with tf.device(device):
                    return getter(*args, **kwargs)
            else:
                return getter(*args, **kwargs)
Example #30
0
    def init_execution(self):         \
        # TODO Import guards here are annoying but otherwise breaks if torch is not installed.

        if get_backend() == "torch":
            torch.set_num_threads(self.torch_num_threads)
            os.environ["OMP_NUM_THREADS"] = str(self.omp_num_threads)