Beispiel #1
0
    def translate_space(space, dtype=None, force_float32=False):
        """
        Translates openAI spaces into RLGraph Space classes.

        Args:
            space (gym.spaces.Space): The openAI Space to be translated.

        Returns:
            Space: The translated rlgraph Space.
        """
        if isinstance(space, gym.spaces.Discrete):
            return IntBox(space.n)
        elif isinstance(space, gym.spaces.MultiBinary):
            return BoolBox(shape=(space.n, ))
        elif isinstance(space, gym.spaces.MultiDiscrete):
            return IntBox(low=np.zeros((space.nvec.ndim, ),
                                       dtype("uint8", "np")),
                          high=space.nvec)
        elif isinstance(space, gym.spaces.Box):
            # Decide by dtype:
            box_dtype = str(dtype or space.low.dtype)
            if "int" in box_dtype:
                return IntBox(low=space.low, high=space.high, dtype=box_dtype)
            elif "float" in box_dtype:
                return FloatBox(
                    low=space.low,
                    high=space.high,
                    dtype="float32" if force_float32 is True else box_dtype)
            elif "bool" in box_dtype:
                return BoolBox(shape=space.shape)
        elif isinstance(space, gym.spaces.Tuple):
            return Tuple(
                *[OpenAIGymEnv.translate_space(s) for s in space.spaces])
        elif isinstance(space, gym.spaces.Dict):
            return Dict({
                key: OpenAIGymEnv.translate_space(value, dtype, force_float32)
                for key, value in space.spaces.items()
            })

        raise RLGraphError(
            "Unknown openAI gym Space class ({}) for state_space!".format(
                space))
Beispiel #2
0
    def reduce(self, start, limit, reduce_op=operator.add):
        """
        Applies an operation to specified segment.

        Args:
            start (int): Start index to apply reduction to.
            limit (end): End index to apply reduction to.
            reduce_op (Union(operator.add, min, max)): Reduce op to apply.

        Returns:
            Number: Result of reduce operation
        """
        if limit is None:
            limit = self.capacity
        if limit < 0:
            limit += self.capacity

        # Init result with neutral element of reduce op.
        # Note that all of these are commutative reduce ops.
        if reduce_op == operator.add:
            result = 0.0
        elif reduce_op == min:
            result = float('inf')
        elif reduce_op == max:
            result = float('-inf')
        else:
            raise RLGraphError(
                "Unsupported reduce OP. Support ops are [add, min, max].")
        start += self.capacity
        limit += self.capacity

        while start < limit:
            if start & 1:
                result = reduce_op(result, self.values[start])
                start += 1
            if limit & 1:
                limit -= 1
                result = reduce_op(result, self.values[limit])
            start = start >> 1
            limit = limit >> 1
        return result
Beispiel #3
0
def _sanity_check_call_parameters(self, params, method, method_type, add_auto_key_as_first_param):
    raw_signature_parameters = inspect.signature(method).parameters
    actual_params = list(raw_signature_parameters.values())
    if add_auto_key_as_first_param is True:
        actual_params = actual_params[1:]
    if len(params) != len(actual_params):
        # Check whether the last arg is var_positional (e.g. *inputs; in that case it's ok if the number of params
        # is larger than that of the actual graph_fn params or its one smaller).
        if actual_params[-1].kind == inspect.Parameter.VAR_POSITIONAL and (len(params) > len(actual_params) > 0 or
                                                                           len(params) == len(actual_params) - 1):
            pass
        # Some actual params have default values: Number of given params must be at least as large as the number
        # of non-default actual params but maximally as large as the number of actual_parameters.
        elif len(actual_params) >= len(params) >= sum(
                [p.default is inspect.Parameter.empty for p in actual_params]):
            pass
        else:
            raise RLGraphError(
                "ERROR: {} '{}/{}' has {} input-parameters, but {} ({}) were being provided in the "
                "`Component.call` method!".format(method_type, self.name, method.__name__,
                                                  len(actual_params), len(params), params)
            )
Beispiel #4
0
    def check_input_spaces(self, input_spaces, action_space=None):
        action_sample_space = input_spaces["actions"]

        if get_backend() == "tf":
            sanity_check_space(action_sample_space, must_have_batch_rank=True)

        assert action_space is not None
        self.action_space = action_space

        if self.epsilon_exploration and self.noise_component:
            # Check again at graph creation? This is currently redundant to the check in __init__
            raise RLGraphError(
                "Cannot use both epsilon exploration and a noise component at the same time."
            )

        if self.epsilon_exploration:
            sanity_check_space(self.action_space,
                               allowed_types=[IntBox],
                               must_have_categories=True,
                               num_categories=(1, None))
        elif self.noise_component:
            sanity_check_space(self.action_space, allowed_types=[FloatBox])
Beispiel #5
0
    def define_observations(self, observation_spec):
        """
        Creates a RLgraph Space for the given deepmind Lab's observation specifier.

        Args:
            observation_spec (List[str]): A list with the wanted names from the deepmind Lab available observations.
                Each available observation is a dict with the following keys: name, shape and dtype.

        Returns:
            Space: The RLgraph equivalent observation Space.
        """
        dict_space = dict()
        space = None
        available_observations = self.level.observation_spec()
        for observation_name in observation_spec:
            # Find the observation_item in the observation_spec of the Env.
            observation_item = [
                o for o in available_observations
                if o["name"] == observation_name
            ][0]
            if "float" in str(observation_item["dtype"]):
                space = FloatBox(shape=observation_item["shape"],
                                 dtype=observation_item["dtype"])
            elif "int" in str(observation_item["dtype"]):
                space = IntBox(shape=observation_item["shape"],
                               dtype=observation_item["dtype"])
            elif "str" in str(observation_item["dtype"]):
                space = TextBox(shape=observation_item["shape"])
            else:
                raise RLGraphError(
                    "Unknown Deepmind Lab Space class for state_space!")

            dict_space[observation_name] = space

        if len(dict_space) == 1:
            return space
        else:
            return Dict(dict_space)
Beispiel #6
0
    def __init__(self,
                 num_iterations,
                 call_component,
                 graph_fn_name,
                 scope="fixed-loop",
                 **kwargs):
        """
        Args:
            num_iterations (int): How often to call the given GraphFn.
            call_component (Component): Component providing graph fn to call within loop.
            graph_fn_name (str): The name of the graph_fn in call_component.
        """
        assert num_iterations > 0

        super(FixedLoop, self).__init__(scope=scope, **kwargs)

        self.num_iterations = num_iterations
        self.graph_fn_to_call = None

        flatten_ops = False
        for graph_fn in call_component.graph_fns:
            if graph_fn.name == graph_fn_name:
                self.graph_fn_to_call = graph_fn.get_method()
                flatten_ops = graph_fn.flatten_ops
                break
        if not self.graph_fn_to_call:
            raise RLGraphError(
                "ERROR: GraphFn '{}' not found in Component '{}'!".format(
                    graph_fn_name, call_component.global_scope))

        # TODO: Do we sum up, append to list, ...?
        #self.define_inputs("inputs")
        # self.define_outputs("fixed_loop_result")

        self.add_components(call_component)
        self.define_api_method(name="call_loop",
                               func=self._graph_fn_call_loop,
                               flatten_ops={"inputs"} if flatten_ops else None)
Beispiel #7
0
    def connect_to(self, next_op_rec):
        """
        Connects this op-rec to a next one by passing on the `op` and `space` properties
        and correctly setting the `next` and `previous` pointers in both op-recs.

        Args:
            next_op_rec (DataOpRecord): The next DataOpRecord to connect this one to.
        """
        # If already connected, make sure connection is the same as the already existing one.
        if next_op_rec.previous is not None:
            assert next_op_rec.previous is self
        else:
            # Set `previous` pointer.
            next_op_rec.previous = self

        # We do have an op -> Pass it (and its Space) on to the next op-rec.
        if self.op is not None:
            # Push op and Space into next op-record.
            # With op-instructions?
            #if "key-lookup" in next_op_rec.op_instructions:
            if "key-lookup" in self.op_instructions:
                lookup_key = self.op_instructions["key-lookup"]
                if isinstance(lookup_key, str):
                    found_op = None
                    found_space = None
                    if isinstance(self.op, dict):
                        assert isinstance(self.op, DataOpDict)
                        if lookup_key in self.op:
                            found_op = self.op[lookup_key]
                            found_space = self.space[lookup_key]
                        # Lookup-key could also be a flat-key. -> Try to find entry in nested (dict) op.
                        else:
                            found_op = self.op.flat_key_lookup(lookup_key)
                            if found_op is not None:
                                found_space = self.space.flat_key_lookup(lookup_key)

                    # Did we find anything? If not, error for invalid key-lookup.
                    if found_op is None or found_space is None:
                        raise RLGraphError(
                            "Op ({}) is not a dict or does not contain the lookup key '{}'!". \
                            format(self.op, lookup_key)
                        )

                    next_op_rec.op = found_op
                    next_op_rec.space = found_space

                elif isinstance(lookup_key, int) and \
                        (not isinstance(self.op, (list, tuple)) or lookup_key >= len(self.op)):
                    raise RLGraphError(
                        "Op ({}) is not a list/tuple or contains not enough items for lookup "
                        "index '{}'!".format(self.op, lookup_key)
                    )

                else:
                    next_op_rec.op = self.op[lookup_key]
                    next_op_rec.space = self.space[lookup_key]
            # No instructions -> simply pass on.
            else:
                next_op_rec.op = self.op
                next_op_rec.space = self.space

            assert next_op_rec.space is not None
            #next_op_rec.space = get_space_from_op(self.op)

        # Add `next` connection.
        self.next.add(next_op_rec)
Beispiel #8
0
    def _graph_fn_sync(self, values_):
        """
        Generates the op that syncs this Synchronizable's parent's variable values from another Synchronizable
        Component.

        Args:
            values_ (DataOpDict): The dict of variable values (coming from the "_variables"-Socket of any other
                Component) that need to be assigned to this Component's parent's variables.
                The keys in the dict refer to the names of our parent's variables and must match their names.

            strict (bool): Whether to check strictly if the given `values_` match the variables of the to-be-synced
                Component (in the number of vars).

        Returns:
            DataOp: The op that executes the syncing.
        """
        # Loop through all incoming vars and our own and collect assign ops.
        syncs = []
        # Sanity checking
        if get_backend() == "tf":
            parents_vars = self.parent_component.get_variables(
                collections=self.collections, custom_scope_separator="-")
            syncs_from, syncs_to = (sorted(values_.items()),
                                    sorted(parents_vars.items()))
            if len(syncs_from) != len(syncs_to):
                raise RLGraphError(
                    "ERROR: Number of Variables to sync must match! "
                    "We have {} syncs_from and {} syncs_to.".format(
                        len(syncs_from), len(syncs_to)))
            for (key_from, var_from), (key_to,
                                       var_to) in zip(syncs_from, syncs_to):
                # Sanity checking. TODO: Check the names' ends? Without the global scope?
                #if key_from != key_to:
                #    raise RLGraphError("ERROR: Variable names for syncing must match in order and name! "
                #                    "Mismatch at from={} and to={}.".format(key_from, key_to))
                if get_shape(var_from) != get_shape(var_to):
                    raise RLGraphError(
                        "ERROR: Variable shapes for syncing must match! "
                        "Shape mismatch between from={} ({}) and to={} ({}).".
                        format(key_from, get_shape(var_from), key_to,
                               get_shape(var_to)))
                syncs.append(self.assign_variable(var_to, var_from))

            # Bundle everything into one "sync"-op.
            with tf.control_dependencies(syncs):
                return tf.no_op(
                    name="sync-to-{}".format(self.parent_component.name))

        elif get_backend() == "pytorch":
            # Get refs(!)
            parents_vars = self.parent_component.get_variables(
                collections=self.collections,
                custom_scope_separator="-",
                get_ref=True)
            syncs_from, sync_to_ref = (sorted(values_.items()),
                                       sorted(parents_vars.items()))

            # Assign parameters of layers.
            for (key_from, var_from), (key_to,
                                       ref_to) in zip(syncs_from, sync_to_ref):
                ref_to.set_value(var_from)
            return None
Beispiel #9
0
        def api_method_wrapper(self, *args, **kwargs):
            api_fn_name = name or re.sub(r'^_graph_fn_', "",
                                         wrapped_func.__name__)
            # Direct evaluation of function.
            if self.execution_mode == "define_by_run":
                type(self).call_count += 1

                start = time.perf_counter()
                # Check with owner if extra args needed.
                if api_fn_name in self.api_methods and self.api_methods[
                        api_fn_name].add_auto_key_as_first_param:
                    output = wrapped_func(self, "", *args, **kwargs)
                else:
                    output = wrapped_func(self, *args, **kwargs)

                # Store runtime for this method.
                type(self).call_times.append(  # Component.call_times
                    (self.name, wrapped_func.__name__,
                     time.perf_counter() - start))
                return output

            api_method_rec = self.api_methods[api_fn_name]

            # Sanity check input args for accidential dict-return values being passed into the next API as
            # supposed DataOpRecord.
            dict_args = [
                next(iter(a.values())) for a in args if isinstance(a, dict)
            ]
            if len(dict_args) > 0 and isinstance(dict_args[0], DataOpRecord):
                raise RLGraphError(
                    "One of your input args to API-method '{}.{}()' is a dict of DataOpRecords! This is probably "
                    "coming from a previous call to an API-method (returning a dict) and the DataOpRecord should be "
                    "extracted by string-key and passed into '{}' "
                    "directly.".format(api_method_rec.component.global_scope,
                                       api_fn_name, api_fn_name))
            # Create op-record column to call API method with. Ignore None input params. These should not be sent
            # to the API-method.
            in_op_column = DataOpRecordColumnIntoAPIMethod(
                component=self,
                api_method_rec=api_method_rec,
                args=args,
                kwargs=kwargs)
            # Add the column to the API-method record.
            api_method_rec.in_op_columns.append(in_op_column)

            # Check minimum number of passed args.
            minimum_num_call_params = len(in_op_column.api_method_rec.non_args_kwargs) - \
                len(in_op_column.api_method_rec.default_args)
            if len(in_op_column.op_records) < minimum_num_call_params:
                raise RLGraphAPICallParamError(
                    "Number of call params ({}) for call to API-method '{}' is too low. Needs to be at least {} "
                    "params!".format(len(in_op_column.op_records),
                                     api_method_rec.name,
                                     minimum_num_call_params))

            # Link from incoming op_recs into the new column or populate new column with ops/Spaces (this happens
            # if this call was made from within a graph_fn such that ops and Spaces are already known).
            all_args = [(i, a) for i, a in enumerate(args) if a is not None] + \
                       [(k, v) for k, v in sorted(kwargs.items()) if v is not None]
            flex = None
            build_when_done = False
            for i, (key, value) in enumerate(all_args):
                # Named arg/kwarg -> get input_name from that and peel op_rec.
                if isinstance(key, str):
                    param_name = key
                # Positional arg -> get input_name from input_names list.
                else:
                    slot = key if flex is None else flex
                    if slot >= len(api_method_rec.input_names):
                        raise RLGraphAPICallParamError(
                            "Too many input args given in call to API-method '{}'!"
                            .format(api_method_rec.name))
                    param_name = api_method_rec.input_names[slot]

                # Var-positional arg, attach the actual position to input_name string.
                if self.api_method_inputs.get(param_name, "") == "*flex":
                    if flex is None:
                        flex = i
                    param_name += "[{}]".format(i - flex)
                # Actual kwarg (not in list of api_method_inputs).
                elif api_method_rec.kwargs_name is not None and param_name not in self.api_method_inputs:
                    param_name = api_method_rec.kwargs_name + "[{}]".format(
                        param_name)

                # We are already in building phase (params may be coming from inside graph_fn).
                if self.graph_builder is not None and self.graph_builder.phase == "building":
                    # If Space not stored yet, determine it from op.
                    assert in_op_column.op_records[i].op is not None
                    if in_op_column.op_records[i].space is None:
                        in_op_column.op_records[i].space = get_space_from_op(
                            in_op_column.op_records[i].op)
                    self.api_method_inputs[
                        param_name] = in_op_column.op_records[i].space
                    # Check input-completeness of Component (but not strict as we are only calling API, not a graph_fn).
                    if self.input_complete is False:
                        # Build right after this loop in case more Space information comes in through next args/kwargs.
                        build_when_done = True

                # A DataOpRecord from the meta-graph.
                elif isinstance(value, DataOpRecord):
                    # Create entry with unknown Space if it doesn't exist yet.
                    if param_name not in self.api_method_inputs:
                        self.api_method_inputs[param_name] = None

                # Fixed value (instead of op-record): Store the fixed value directly in the op.
                else:
                    if self.api_method_inputs.get(param_name) is None:
                        self.api_method_inputs[
                            param_name] = in_op_column.op_records[i].space

            if build_when_done:
                # Check Spaces and create variables.
                self.graph_builder.build_component_when_input_complete(self)

            # Regular API-method: Call it here.
            api_fn_args, api_fn_kwargs = in_op_column.get_args_and_kwargs()

            if api_method_rec.is_graph_fn_wrapper is False:
                return_values = wrapped_func(self, *api_fn_args,
                                             **api_fn_kwargs)
            # Wrapped graph_fn: Call it through yet another wrapper.
            else:
                return_values = graph_fn_wrapper(
                    self, wrapped_func, returns,
                    dict(
                        flatten_ops=flatten_ops,
                        split_ops=split_ops,
                        add_auto_key_as_first_param=add_auto_key_as_first_param,
                        requires_variable_completeness=
                        requires_variable_completeness), *api_fn_args,
                    **api_fn_kwargs)

            # Process the results (push into a column).
            out_op_column = DataOpRecordColumnFromAPIMethod(
                component=self,
                api_method_name=api_fn_name,
                args=util.force_tuple(return_values)
                if type(return_values) != dict else None,
                kwargs=return_values if type(return_values) == dict else None)

            # If we already have actual op(s) and Space(s), push them already into the
            # DataOpRecordColumnFromAPIMethod's records.
            if self.graph_builder is not None and self.graph_builder.phase == "building":
                # Link the returned ops to that new out-column.
                for i, rec in enumerate(out_op_column.op_records):
                    out_op_column.op_records[i].op = rec.op
                    out_op_column.op_records[i].space = rec.space
            # And append the new out-column to the api-method-rec.
            api_method_rec.out_op_columns.append(out_op_column)

            # Do we need to return the raw ops or the op-recs?
            # Only need to check if False, otherwise, we return ops directly anyway.
            return_ops = False
            stack = inspect.stack()
            f_locals = stack[1][0].f_locals
            # We may be in a list comprehension, try next frame.
            if f_locals.get(".0"):
                f_locals = stack[2][0].f_locals
            # Check whether the caller component is a parent of this one.
            caller_component = f_locals.get(
                "root", f_locals.get("self_", f_locals.get("self")))

            # Potential call from a lambda.
            if caller_component is None and "fn" in stack[2][0].f_locals:
                # This is the component.
                prev_caller_component = TraceContext.PREV_CALLER
                lambda_obj = stack[2][0].f_locals["fn"]
                if "lambda" in inspect.getsource(lambda_obj):
                    # Try to reconstruct caller by using parent of prior caller.
                    caller_component = prev_caller_component.parent_component

            if caller_component is None:
                raise RLGraphError(
                    "API-method '{}' must have as 1st parameter (the component) either `root` or `self`. Other names "
                    "are not allowed!".format(api_method_rec.name))
            # Not directly called by this method itself (auto-helper-component-API-call).
            # AND call is coming from some caller Component, but that component is not this component
            # OR a parent -> Error.
            elif caller_component is not None and \
                    type(caller_component).__name__ != "MetaGraphBuilder" and \
                    caller_component not in [self] + self.get_parents():
                if not (stack[1][3] == "__init__"
                        and re.search(r'op_records\.py$', stack[1][1])):
                    raise RLGraphError(
                        "The component '{}' is not a child (or grand-child) of the caller ({})! Maybe you forgot to "
                        "add it as a sub-component via `add_components()`.".
                        format(self.global_scope,
                               caller_component.global_scope))

            # Update trace context.
            TraceContext.PREV_CALLER = caller_component

            for stack_item in stack[1:]:  # skip current frame
                # If we hit an API-method call -> return op-recs.
                if stack_item[3] == "api_method_wrapper" and re.search(
                        r'decorators\.py$', stack_item[1]):
                    break
                # If we hit a graph_fn call -> return ops.
                elif stack_item[3] == "run_through_graph_fn" and re.search(
                        r'graph_builder\.py$', stack_item[1]):
                    return_ops = True
                    break

            if return_ops is True:
                if type(return_values) == dict:
                    return {
                        key: value.op
                        for key, value in out_op_column.get_args_and_kwargs()
                        [1].items()
                    }
                else:
                    tuple_returns = tuple(
                        map(lambda x: x.op,
                            out_op_column.get_args_and_kwargs()[0]))
                    return tuple_returns[0] if len(
                        tuple_returns) == 1 else tuple_returns
            # Parent caller is non-graph_fn: Return op-recs.
            else:
                if type(return_values) == dict:
                    return return_values
                else:
                    tuple_returns = out_op_column.get_args_and_kwargs()[0]
                    return tuple_returns[0] if len(
                        tuple_returns) == 1 else tuple_returns
Beispiel #10
0
def get_space_from_op(op, num_categories=None):
    """
    Tries to re-create a Space object given some DataOp (e.g. a tf op).
    This is useful for shape inference on returned ops after having run through a graph_fn.

    Args:
        op (DataOp): The op to create a corresponding Space for.
        num_categories (Optional[int]): An optional indicator, what the `num_categories` property for
            an IntBox should be.

    Returns:
        Space: The inferred Space object.
    """
    # a Dict
    if isinstance(op, dict):  # DataOpDict
        spec = {}
        add_batch_rank = False
        add_time_rank = False
        for key, value in op.items():
            # Special case for IntBoxes:
            # If another key exists, with the name: `_num_[key]` -> take num_categories from that key's value.
            if key[:5] == "_num_":
                continue
            num_categories = op.get("_num_{}".format(key))
            spec[key] = get_space_from_op(value, num_categories=num_categories)
            # Return
            if spec[key] == 0:
                return 0
            if spec[key].has_batch_rank:
                add_batch_rank = True
            if spec[key].has_time_rank:
                add_time_rank = True
        return Dict(spec,
                    add_batch_rank=add_batch_rank,
                    add_time_rank=add_time_rank)
    # a Tuple
    elif isinstance(op, tuple):  # DataOpTuple
        spec = []
        add_batch_rank = False
        add_time_rank = False
        for i in op:
            space = get_space_from_op(i)
            if space == 0:
                return 0
            spec.append(space)
            if spec[-1].has_batch_rank:
                add_batch_rank = True
            if spec[-1].has_time_rank:
                add_time_rank = True
        return Tuple(spec,
                     add_batch_rank=add_batch_rank,
                     add_time_rank=add_time_rank)
    # primitive Space -> infer from op dtype and shape
    else:
        int_high = {
            "high": num_categories
        } if num_categories is not None else {}
        # Op itself is a single value, simple python type.
        if isinstance(op, (bool, int, float)):
            return BoxSpace.from_spec(spec=type(op), shape=(), **int_high)
        elif isinstance(op, str):
            raise RLGraphError(
                "Cannot derive Space from non-allowed op ({})!".format(op))
        # A single numpy array.
        elif isinstance(op, np.ndarray):
            return BoxSpace.from_spec(spec=convert_dtype(str(op.dtype), "np"),
                                      shape=op.shape,
                                      **int_high)
        elif isinstance(op, list):
            return try_space_inference_from_list(op)
        # No Space: e.g. the tf.no_op, a distribution (anything that's not a tensor).
        # PyTorch Tensors do not have get_shape so must check backend.
        elif hasattr(op, "dtype") is False or (get_backend() == "tf" and
                                               not hasattr(op, "get_shape")):
            return 0
        # Some tensor: can be converted into a BoxSpace.
        else:
            shape = get_shape(op)
            # Unknown shape (e.g. a cond op).
            if shape is None:
                return 0
            add_batch_rank = False
            add_time_rank = False
            time_major = False
            new_shape = list(shape)

            # New way: Detect via op._batch_rank and op._time_rank properties where these ranks are.
            if hasattr(op, "_batch_rank") and isinstance(op._batch_rank, int):
                add_batch_rank = True
                new_shape[op._batch_rank] = -1

            # elif get_backend() == "pytorch":
            #     if isinstance(op, torch.Tensor):
            #         if op.dim() > 1 and shape[0] == 1:
            #             add_batch_rank = True
            #             new_shape[0] = 1
            if hasattr(op, "_time_rank") and isinstance(op._time_rank, int):
                add_time_rank = True
                if op._time_rank == 0:
                    time_major = True
                new_shape[op._time_rank] = -1
            shape = tuple(n for n in new_shape if n != -1)

            # Old way: Detect automatically whether the first rank(s) are batch and/or time rank.
            if add_batch_rank is False and add_time_rank is False and shape != (
            ) and shape[0] is None:
                if len(shape) > 1 and shape[1] is None:
                    #raise RLGraphError(
                    #    "ERROR: Cannot determine time-major flag if both batch- and time-ranks are in an op w/o saying "
                    #    "which rank goes to which position!"
                    #)
                    shape = shape[2:]
                    add_time_rank = True
                else:
                    shape = shape[1:]
                add_batch_rank = True

            # TODO: If op._batch_rank and/or op._time_rank are not set, set them now.

            base_dtype = op.dtype.base_dtype if hasattr(
                op.dtype, "base_dtype") else op.dtype
            # PyTorch does not have a bool type
            if get_backend() == "pytorch":
                if op.dtype is torch.uint8:
                    base_dtype = bool
            base_dtype_str = str(base_dtype)

            # FloatBox
            if "float" in base_dtype_str:
                return FloatBox(shape=shape,
                                add_batch_rank=add_batch_rank,
                                add_time_rank=add_time_rank,
                                time_major=time_major,
                                dtype=convert_dtype(base_dtype, "np"))
            # IntBox
            elif "int" in base_dtype_str:
                high = num_categories or getattr(op, "_num_categories", None)
                return IntBox(high,
                              shape=shape,
                              add_batch_rank=add_batch_rank,
                              add_time_rank=add_time_rank,
                              time_major=time_major,
                              dtype=convert_dtype(base_dtype, "np"))
            # a BoolBox
            elif "bool" in base_dtype_str:
                return BoolBox(shape=shape,
                               add_batch_rank=add_batch_rank,
                               add_time_rank=add_time_rank,
                               time_major=time_major)
            # a TextBox
            elif "string" in base_dtype_str:
                return TextBox(shape=shape,
                               add_batch_rank=add_batch_rank,
                               add_time_rank=add_time_rank,
                               time_major=time_major)

    raise RLGraphError(
        "ERROR: Cannot derive Space from op '{}' (unknown type?)!".format(op))
Beispiel #11
0
    def __init__(self,
                 epsilon_spec=None,
                 noise_spec=None,
                 scope="exploration",
                 **kwargs):
        """
        Args:
            epsilon_spec (any): The spec or Component object itself to construct an EpsilonExploration Component.
            noise_spec (dict): The specification dict for a noise generator that adds noise to the NN's output.
        """
        super(Exploration, self).__init__(scope=scope, **kwargs)

        self.action_space = None  # The actual action space (may not have batch-rank, just the plain space)
        self.flat_action_space = None

        self.epsilon_exploration = None
        self.noise_component = None

        # For define-by-run sampling.
        self.sample_obj = None

        # Don't allow both epsilon and noise component
        if epsilon_spec and noise_spec:
            raise RLGraphError(
                "Cannot use both epsilon exploration and a noise component at the same time."
            )

        # Add epsilon component.
        if epsilon_spec:
            self.epsilon_exploration = EpsilonExploration.from_spec(
                epsilon_spec)
            self.add_components(self.epsilon_exploration)

            # Define our interface.
            @rlgraph_api(component=self)
            def get_action(self, actions, time_step, use_exploration=True):
                """
                Action depends on time-step (e.g. epsilon-decay).
                """
                epsilon_decisions = self.epsilon_exploration.do_explore(
                    actions, time_step)
                return self._graph_fn_pick(use_exploration, epsilon_decisions,
                                           actions)

        # Add noise component.
        elif noise_spec:
            self.noise_component = NoiseComponent.from_spec(noise_spec)
            self.add_components(self.noise_component)

            @rlgraph_api(component=self)
            def get_action(self, actions, time_step=0, use_exploration=True):
                """
                Noise is added to the sampled action.
                """
                noise = self.noise_component.get_noise()
                return self._graph_fn_add_noise(use_exploration, noise,
                                                actions)

        # Don't explore at all. Simple pass-through.
        else:

            @rlgraph_api(component=self)
            def get_action(self, actions, time_step=0, use_exploration=False):
                """
                Action is returned as is.
                """
                return actions
Beispiel #12
0
    def setup_session(self, hooks):
        """
        Creates and then enters the session for this model. Also finalizes the graph.

        Args:
            hooks (list): A list of session hooks to use.
        """
        if self.execution_mode == "distributed":
            self.logger.info("Setting up distributed TensorFlow session.")
            if self.server is None:
                raise RLGraphError(
                    "TensorflowGraphExecutor's Server is None! It could be that your DISTRIBUTED_BACKEND (currently "
                    "set to '{}') is not set to 'distributed_tf'. You can do so via the RLGraph config file in your "
                    "home directory or the ENV variable 'RLGRAPH_DISTRIBUTED_BACKEND=distributed_tf'.".
                    format(get_distributed_backend())
                )
            if self.tf_session_type == "monitored-session":
                session_creator = tf.train.ChiefSessionCreator(
                    scaffold=self.scaffold,
                    master=self.server.target,
                    config=self.tf_session_config,
                    checkpoint_dir=None,
                    checkpoint_filename_with_path=None
                )
                self.monitored_session = tf.train.MonitoredSession(
                    #is_chief=self.execution_spec["distributed_spec"]["task_index"] == 0,
                    session_creator=session_creator,
                    hooks=hooks,
                    stop_grace_period_secs=120  # Default value.
                )
            else:
                assert self.tf_session_type == "monitored-training-session",\
                    "ERROR: Invalid session type: {}!".format(self.tf_session_type)
                is_chief = self.execution_spec["distributed_spec"].get(
                    "is_chief", self.execution_spec["distributed_spec"]["task_index"] == 0
                )
                self.monitored_session = tf.train.MonitoredTrainingSession(
                    master=self.server.target,
                    is_chief=is_chief,
                    checkpoint_dir=None,  # TODO: specify?
                    save_checkpoint_secs=600,
                    save_summaries_secs=30,
                    log_step_count_steps=50000,
                    # scaffold=self.scaffold,
                    # Ignore other hooks
                    hooks=[hooks[-1]] if hooks else None,
                    config=self.tf_session_config,
                    stop_grace_period_secs=120  # Default value.
                )
        else:
            # If monitoring is disabled,
            if self.disable_monitoring:
                self.logger.info("Setting up default session for non-distributed mode.")
                self.monitored_session = tf.Session(config=self.tf_session_config)
            else:
                self.logger.info("Setting up singular monitored session for non-distributed mode.")
                self.monitored_session = tf.train.SingularMonitoredSession(
                    hooks=hooks,
                    scaffold=self.scaffold,
                    master='',  # Default value.
                    config=self.tf_session_config,
                    checkpoint_dir=None
                )

        # Exit the graph-context and finalize the graph.
        if self.graph_default_context is not None:
            self.graph_default_context.__exit__(None, None, None)

        # TODO back in
        # self.graph.finalize()

        if self.disable_monitoring:
            # If no monitoring, both just end up being simple sessions.
            self.session = self.monitored_session
            self.session.run(self.init_op)
        else:
            # Enter the session to be ready for acting/learning.
            self.monitored_session.__enter__()
            self.session = self.monitored_session._tf_sess()

        # Setup the tf Profiler.
        if self.profiling_enabled and not self.disable_monitoring:
            self.profiler = tf.profiler.Profiler(graph=self.session.graph)
Beispiel #13
0
    def build(self, root_component, input_spaces=None):
        """
        Builds the meta-graph by constructing op-record columns going into and coming out of all API-methods
        and graph_fns.

        Args:
            root_component (Component): Root component of the meta graph to build.
            input_spaces (Optional[Space]): Input spaces for all (exposed) API methods of the root-component.
        """

        # Time the meta-graph build:
        DataOpRecord.reset()
        time_start = time.perf_counter()
        api = {}

        # Sanity check input_spaces dict.
        if input_spaces is not None:
            for input_param_name in input_spaces.keys():
                if input_param_name not in root_component.api_method_inputs:
                    raise RLGraphError(
                        "ERROR: `input_spaces` contains an input-parameter name ('{}') that's not defined in any of "
                        "the root-component's ('{}') API-methods, whose args are '{}'!"
                        .format(input_param_name, root_component.name,
                                root_component.api_method_inputs))
        else:
            input_spaces = {}

        # Call all API methods of the core once and thereby, create empty in-op columns that serve as placeholders
        # and bi-directional links between ops (for the build time).
        for api_method_name, api_method_rec in root_component.api_methods.items(
        ):
            self.logger.debug("Building meta-graph of API-method '{}'.".format(
                api_method_name))

            # Create the loose list of in-op-records depending on signature and input-spaces given.
            # If an arg has a default value, its input-space does not have to be provided.
            in_ops_records = []
            use_named = False
            for i, param_name in enumerate(api_method_rec.input_names):
                # Arg has a default of None (flex). If in input_spaces, arg will be provided.
                if root_component.api_method_inputs[param_name] == "flex":
                    if param_name in input_spaces:
                        in_ops_records.append(
                            DataOpRecord(
                                position=i,
                                kwarg=param_name if use_named else None,
                                placeholder=param_name))
                    else:
                        use_named = True
                # Already defined (per default arg value (e.g. bool)).
                elif isinstance(root_component.api_method_inputs[param_name],
                                Space):
                    if param_name in input_spaces:
                        in_ops_records.append(
                            DataOpRecord(
                                position=i,
                                kwarg=param_name if use_named else None,
                                placeholder=param_name))
                    else:
                        use_named = True
                # No default values -> Must be provided in `input_spaces`.
                else:
                    # A var-positional param.
                    if root_component.api_method_inputs[param_name] == "*flex":
                        assert use_named is False
                        if param_name in input_spaces:
                            for j in range(
                                    len(force_list(input_spaces[param_name]))):
                                in_ops_records.append(
                                    DataOpRecord(position=i + j,
                                                 placeholder=param_name +
                                                 "[{}]".format(j)))
                    # A keyword param.
                    elif root_component.api_method_inputs[
                            param_name] == "**flex":
                        if param_name in input_spaces:
                            assert use_named is False
                            for key in sorted(input_spaces[param_name].keys()):
                                in_ops_records.append(
                                    DataOpRecord(kwarg=key,
                                                 placeholder=param_name +
                                                 "[{}]".format(key)))
                        use_named = True
                    else:
                        # TODO: If space not provided in input_spaces -> Try to call this API method later (maybe another API-method).
                        assert param_name in input_spaces, \
                            "ERROR: arg-name '{}' not defined in input_spaces for root component '{}'!".format(
                                param_name, root_component.global_scope
                            )
                        in_ops_records.append(
                            DataOpRecord(
                                position=i,
                                kwarg=param_name if use_named else None,
                                placeholder=param_name))

            # Do the actual core API-method call (thereby assembling the meta-graph).
            args = [
                op_rec for op_rec in in_ops_records if op_rec.kwarg is None
            ]
            kwargs = {
                op_rec.kwarg: op_rec
                for op_rec in in_ops_records if op_rec.kwarg is not None
            }
            getattr(api_method_rec.component, api_method_name)(*args, **kwargs)

            # Register core's interface.
            api[api_method_name] = (
                in_ops_records, api_method_rec.out_op_columns[-1].op_records)

            # Tag very last out-op-records with is_terminal_op=True, so we know in the build process that we are done.
            for op_rec in api_method_rec.out_op_columns[-1].op_records:
                op_rec.is_terminal_op = True

        time_build = time.perf_counter() - time_start
        self.logger.info(
            "Meta-graph build completed in {} s.".format(time_build))

        # Get some stats on the graph and report.
        num_meta_ops = DataOpRecord._ID + 1
        self.logger.info(
            "Meta-graph op-records generated: {}".format(num_meta_ops))

        return MetaGraph(root_component=root_component,
                         api=api,
                         num_ops=num_meta_ops,
                         build_status=True)
Beispiel #14
0
    def from_spec(cls, spec=None, **kwargs):
        """
        Uses the given spec to create an object.
        If `spec` is a dict, an optional "type" key can be used as a "constructor hint" to specify a certain class
        of the object.
        If `spec` is not a dict, `spec`'s value is used directly as the "constructor hint".

        The rest of `spec` (if it's a dict) will be used as kwargs for the (to-be-determined) constructor.
        Additional keys in **kwargs will always have precedence (overwrite keys in `spec` (if a dict)).
        Also, if the spec-dict or **kwargs contains the special key "_args", it will be popped from the dict
        and used as *args list to be passed separately to the constructor.

        The following constructor hints are valid:
        - None: Use `cls` as constructor.
        - An already instantiated object: Will be returned as is; no constructor call.
        - A string or an object that is a key in `cls`'s `__lookup_classes__` dict: The value in `__lookup_classes__`
            for that key will be used as the constructor.
        - A python callable: Use that as constructor.
        - A string: Either a json filename or the name of a python module+class (e.g. "rlgraph.components.Component")
            to be Will be used to

        Args:
            spec (Optional[dict]): The specification dict.

        Keyword Args:
            kwargs (any): Optional possibility to pass the c'tor arguments in here and use spec as the type-only info.
                Then we can call this like: from_spec([type]?, [**kwargs for ctor])
                If `spec` is already a dict, then `kwargs` will be merged with spec (overwriting keys in `spec`) after
                "type" has been popped out of `spec`.
                If a constructor of a Specifiable needs an *args list of items, the special key `_args` can be passed
                inside `kwargs` with a list type value (e.g. kwargs={"_args": [arg1, arg2, arg3]}).

        Returns:
            The object generated from the spec.
        """
        # specifiable_type is already a created object of this class -> Take it as is.
        if isinstance(spec, cls):
            return spec

        # `specifiable_type`: Indicator for the Specifiable's constructor.
        # `ctor_args`: *args arguments for the constructor.
        # `ctor_kwargs`: **kwargs arguments for the constructor.
        # Copy so caller can reuse safely.
        spec = deepcopy(spec)
        if isinstance(spec, dict):
            if "type" in spec:
                specifiable_type = spec.pop("type", None)
            else:
                specifiable_type = None
            ctor_kwargs = spec
            ctor_kwargs.update(kwargs)  # give kwargs priority
        else:
            specifiable_type = spec
            ctor_kwargs = kwargs
        # Special `_args` field in kwargs for *args-utilizing constructors.
        ctor_args = ctor_kwargs.pop("_args", [])

        # Figure out the actual constructor (class) from `type_`.
        # None: Try __default__object (if no args/kwargs), only then constructor of cls (using args/kwargs).
        if specifiable_type is None:
            # We have a default constructor that was defined directly by cls (not by its children).
            if cls.__default_constructor__ is not None and ctor_args == [] and \
                    (not hasattr(cls.__bases__[0], "__default_constructor__") or
                     cls.__bases__[0].__default_constructor__ is None or
                     cls.__bases__[0].__default_constructor__ is not cls.__default_constructor__
                    ):
                constructor = cls.__default_constructor__
                # Default partial's keywords into ctor_kwargs.
                if isinstance(constructor, partial):
                    kwargs = default_dict(ctor_kwargs, constructor.keywords)
                    constructor = partial(constructor.func, **kwargs)
                    ctor_kwargs = {} # erase to avoid duplicate kwarg error
            # Try our luck with this class itself.
            else:
                constructor = cls
        # Try the __lookup_classes__ of this class.
        else:
            constructor = cls.lookup_class(specifiable_type)

            # Found in cls.__lookup_classes__.
            if constructor is not None:
                pass
            # Python callable.
            elif callable(specifiable_type):
                constructor = specifiable_type
            # A string: Filename or a python module+class.
            elif isinstance(specifiable_type, str):
                if re.search(r'\.(yaml|yml|json)$', specifiable_type):
                    return cls.from_file(specifiable_type, *ctor_args, **ctor_kwargs)
                elif specifiable_type.find('.') != -1:
                    module_name, function_name = specifiable_type.rsplit(".", 1)
                    module = importlib.import_module(module_name)
                    constructor = getattr(module, function_name)
                else:
                    raise RLGraphError(
                        "ERROR: String specifier ({}) in from_spec must be a filename, a module+class, or a key "
                        "into {}.__lookup_classes__!".format(specifiable_type, cls.__name__)
                    )

        if not constructor:
            raise RLGraphError("Invalid type: {}".format(specifiable_type))

        # Create object with inferred constructor.
        specifiable_object = constructor(*ctor_args, **ctor_kwargs)
        assert isinstance(specifiable_object, constructor.func if isinstance(constructor, partial) else constructor)

        return specifiable_object
Beispiel #15
0
def get_space_from_op(op,
                      read_key_hints=False,
                      dtype=None,
                      low=None,
                      high=None):
    """
    Tries to re-create a Space object given some DataOp (e.g. a tf op).
    This is useful for shape inference on returned ops after having run through a graph_fn.

    Args:
        op (DataOp): The op to create a corresponding Space for.

        read_key_hints (bool): If True, tries to read type- and low/high-hints from the pattern of the Dict keys (str).
            - Preceding "I_": IntBox, "F_": FloatBox, "B_": BoolBox.
            - Succeeding "_low=0.0": Low value.
            - Succeeding "_high=1.0": High value.
            E.g. Dict key "F_somekey_low=0.0_high=2.0" indicates a FloatBox with low=0.0 and high=2.0.
                 Dict key "I_somekey" indicates an intbox with no limits.
                 Dict key "I_somekey_high=5" indicates an intbox with high=5 (values 0-4).

            Default: False.

        dtype (Optional[str]): An optional indicator, what the `dtype` of a BoxSpace should be.
        low (Optional[int,float]): An optional indicator, what the `low` property for a BoxSpace should be.
        high (Optional[int,float]): An optional indicator, what the `high` property for a BoxSpace should be.

    Returns:
        Space: The inferred Space object.
    """
    # a Dict
    if isinstance(op, dict):  # DataOpDict
        spec = {}
        add_batch_rank = False
        add_time_rank = False
        for key, value in op.items():
            # Try to infer hints from the key.
            if read_key_hints is True:
                dtype, low, high = get_space_hints_from_dict_key(key)
            spec[key] = get_space_from_op(value,
                                          dtype=dtype,
                                          low=low,
                                          high=high)
            # Return
            if spec[key] == 0:
                return 0
            if spec[key].has_batch_rank:
                add_batch_rank = True
            if spec[key].has_time_rank:
                add_time_rank = True
        return Dict(spec,
                    add_batch_rank=add_batch_rank,
                    add_time_rank=add_time_rank)
    # a Tuple
    elif isinstance(op, tuple):  # DataOpTuple
        spec = []
        add_batch_rank = False
        add_time_rank = False
        for i in op:
            space = get_space_from_op(i)
            if space == 0:
                return 0
            spec.append(space)
            if spec[-1].has_batch_rank:
                add_batch_rank = True
            if spec[-1].has_time_rank:
                add_time_rank = True
        return Tuple(spec,
                     add_batch_rank=add_batch_rank,
                     add_time_rank=add_time_rank)

    # primitive Space -> infer from op dtype and shape
    else:
        low_high = {}
        if high is not None:
            low_high["high"] = high
        if low is not None:
            low_high["low"] = low
        # Op itself is a single value, simple python type.
        if isinstance(op, (bool, int, float)):
            return BoxSpace.from_spec(spec=(dtype or type(op)),
                                      shape=(),
                                      **low_high)
        elif isinstance(op, str):
            raise RLGraphError(
                "Cannot derive Space from non-allowed op ({})!".format(op))
        # A single numpy array.
        elif isinstance(op, np.ndarray):
            return BoxSpace.from_spec(spec=convert_dtype(str(op.dtype), "np"),
                                      shape=op.shape,
                                      **low_high)
        elif isinstance(op, list):
            return try_space_inference_from_list(op, dtype=dtype, **low_high)
        # No Space: e.g. the tf.no_op, a distribution (anything that's not a tensor).
        # PyTorch Tensors do not have get_shape so must check backend.
        elif hasattr(op, "dtype") is False or (get_backend() == "tf" and
                                               not hasattr(op, "get_shape")):
            return 0
        # Some tensor: can be converted into a BoxSpace.
        else:
            shape = get_shape(op)
            # Unknown shape (e.g. a cond op).
            if shape is None:
                return 0
            add_batch_rank = False
            add_time_rank = False
            time_major = False
            new_shape = list(shape)

            # New way: Detect via op._batch_rank and op._time_rank properties where these ranks are.
            if hasattr(op, "_batch_rank") and isinstance(op._batch_rank, int):
                add_batch_rank = True
                new_shape[op._batch_rank] = -1

            # elif get_backend() == "pytorch":
            #     if isinstance(op, torch.Tensor):
            #         if op.dim() > 1 and shape[0] == 1:
            #             add_batch_rank = True
            #             new_shape[0] = 1
            if hasattr(op, "_time_rank") and isinstance(op._time_rank, int):
                add_time_rank = True
                if op._time_rank == 0:
                    time_major = True
                new_shape[op._time_rank] = -1
            shape = tuple(n for n in new_shape if n != -1)

            # Old way: Detect automatically whether the first rank(s) are batch and/or time rank.
            if add_batch_rank is False and add_time_rank is False and shape != (
            ) and shape[0] is None:
                if len(shape) > 1 and shape[1] is None:
                    #raise RLGraphError(
                    #    "ERROR: Cannot determine time-major flag if both batch- and time-ranks are in an op w/o saying "
                    #    "which rank goes to which position!"
                    #)
                    shape = shape[2:]
                    add_time_rank = True
                else:
                    shape = shape[1:]
                add_batch_rank = True

            # TODO: If op._batch_rank and/or op._time_rank are not set, set them now.

            base_dtype = op.dtype.base_dtype if hasattr(
                op.dtype, "base_dtype") else op.dtype
            # PyTorch does not have a bool type
            if get_backend() == "pytorch":
                if op.dtype is torch.uint8:
                    base_dtype = bool
            base_dtype_str = str(base_dtype)

            # FloatBox
            if "float" in base_dtype_str:
                return FloatBox(shape=shape,
                                add_batch_rank=add_batch_rank,
                                add_time_rank=add_time_rank,
                                time_major=time_major,
                                dtype=convert_dtype(base_dtype, "np"))
            # IntBox
            elif "int" in base_dtype_str:
                high_ = high or getattr(op, "_num_categories", None)
                return IntBox(high_,
                              shape=shape,
                              add_batch_rank=add_batch_rank,
                              add_time_rank=add_time_rank,
                              time_major=time_major,
                              dtype=convert_dtype(base_dtype, "np"))
            # a BoolBox
            elif "bool" in base_dtype_str:
                return BoolBox(shape=shape,
                               add_batch_rank=add_batch_rank,
                               add_time_rank=add_time_rank,
                               time_major=time_major)
            # a TextBox
            elif "string" in base_dtype_str:
                return TextBox(shape=shape,
                               add_batch_rank=add_batch_rank,
                               add_time_rank=add_time_rank,
                               time_major=time_major)

    raise RLGraphError(
        "ERROR: Cannot derive Space from op '{}' (unknown type?)!".format(op))
Beispiel #16
0
    def __init__(self,
                 network_spec,
                 action_space=None,
                 action_adapter_spec=None,
                 max_likelihood=True,
                 scope="policy",
                 **kwargs):
        """
        Args:
            network_spec (Union[NeuralNetwork,dict]): The NeuralNetwork Component or a specification dict to build
                one.

            action_space (Space): The action Space within which this Component will create actions.

            action_adapter_spec (Optional[dict]): A spec-dict to create an ActionAdapter. Use None for the default
                ActionAdapter object.

            max_likelihood (bool): Whether to pick actions according to the max-likelihood value or via sampling.
                Default: True.
        """
        super(Policy, self).__init__(scope=scope, **kwargs)

        self.neural_network = NeuralNetwork.from_spec(network_spec)
        if action_space is None:
            self.action_adapter = ActionAdapter.from_spec(action_adapter_spec)
            action_space = self.action_adapter.action_space
        else:
            self.action_adapter = ActionAdapter.from_spec(
                action_adapter_spec, action_space=action_space)
        self.action_space = action_space
        self.max_likelihood = max_likelihood

        # TODO: Hacky trick to implement IMPALA post-LSTM256 time-rank folding and unfolding.
        # TODO: Replace entirely via sonnet-like BatchApply Component.
        is_impala = "IMPALANetwork" in type(self.neural_network).__name__

        # Add API-method to get baseline output (if we use an extra value function baseline node).
        if isinstance(self.action_adapter, BaselineActionAdapter):
            # TODO: IMPALA attempt to speed up final pass after LSTM.
            if is_impala:
                self.time_rank_folder = ReShape(fold_time_rank=True,
                                                scope="time-rank-fold")
                self.time_rank_unfolder_v = ReShape(unfold_time_rank=True,
                                                    time_major=True,
                                                    scope="time-rank-unfold-v")
                self.time_rank_unfolder_a_probs = ReShape(
                    unfold_time_rank=True,
                    time_major=True,
                    scope="time-rank-unfold-a-probs")
                self.time_rank_unfolder_logits = ReShape(
                    unfold_time_rank=True,
                    time_major=True,
                    scope="time-rank-unfold-logits")
                self.time_rank_unfolder_log_probs = ReShape(
                    unfold_time_rank=True,
                    time_major=True,
                    scope="time-rank-unfold-log-probs")
                self.add_components(self.time_rank_folder,
                                    self.time_rank_unfolder_v,
                                    self.time_rank_unfolder_a_probs,
                                    self.time_rank_unfolder_log_probs,
                                    self.time_rank_unfolder_logits)

            @rlgraph_api(component=self)
            def get_state_values_logits_probabilities_log_probs(
                    self, nn_input, internal_states=None):
                nn_output = self.neural_network.apply(nn_input,
                                                      internal_states)
                last_internal_states = nn_output.get("last_internal_states")
                nn_output = nn_output["output"]

                # TODO: IMPALA attempt to speed up final pass after LSTM.
                if is_impala:
                    nn_output = self.time_rank_folder.apply(nn_output)

                out = self.action_adapter.get_logits_probabilities_log_probs(
                    nn_output)

                # TODO: IMPALA attempt to speed up final pass after LSTM.
                if is_impala:
                    state_values = self.time_rank_unfolder_v.apply(
                        out["state_values"], nn_output)
                    logits = self.time_rank_unfolder_logits.apply(
                        out["logits"], nn_output)
                    probs = self.time_rank_unfolder_a_probs.apply(
                        out["probabilities"], nn_output)
                    log_probs = self.time_rank_unfolder_log_probs.apply(
                        out["log_probs"], nn_output)
                else:
                    state_values = out["state_values"]
                    logits = out["logits"]
                    probs = out["probabilities"]
                    log_probs = out["log_probs"]

                return dict(state_values=state_values,
                            logits=logits,
                            probabilities=probs,
                            log_probs=log_probs,
                            last_internal_states=last_internal_states)

        # Figure out our Distribution.
        if isinstance(action_space, IntBox):
            self.distribution = Categorical()
        # Continuous action space -> Normal distribution (each action needs mean and variance from network).
        elif isinstance(action_space, FloatBox):
            self.distribution = Normal()
        else:
            raise RLGraphError(
                "ERROR: `action_space` is of type {} and not allowed in {} Component!"
                .format(type(action_space).__name__, self.name))

        self.add_components(self.neural_network, self.action_adapter,
                            self.distribution)

        if is_impala:
            self.add_components(self.time_rank_folder,
                                self.time_rank_unfolder_v,
                                self.time_rank_unfolder_a_probs,
                                self.time_rank_unfolder_log_probs,
                                self.time_rank_unfolder_logits)
Beispiel #17
0
def get_activation_function(activation_function=None, *other_parameters):
    """
    Returns an activation function (callable) to use in a NN layer.

    Args:
        activation_function (Optional[callable,str]): The activation function to lookup. Could be given as:
            - already a callable (return just that)
            - a lookup key (str)
            - None: Use linear activation.

        other_parameters (any): Possible extra parameter(s) used for some of the activation functions.

    Returns:
        callable: The backend-dependent activation function.
    """
    if get_backend() == "tf":
        if activation_function is None or callable(activation_function):
            return activation_function
        elif activation_function == "linear":
            return tf.identity
        # Rectifier linear unit (ReLU) : 0 if x < 0 else x
        elif activation_function == "relu":
            return tf.nn.relu
        # Exponential linear: exp(x) - 1 if x < 0 else x
        elif activation_function == "elu":
            return tf.nn.elu
        # Sigmoid: 1 / (1 + exp(-x))
        elif activation_function == "sigmoid":
            return tf.sigmoid
        # Scaled exponential linear unit: scale * [alpha * (exp(x) - 1) if < 0 else x]
        # https://arxiv.org/pdf/1706.02515.pdf
        elif activation_function == "selu":
            return tf.nn.selu
        # Swish function: x * sigmoid(x)
        # https://arxiv.org/abs/1710.05941
        elif activation_function == "swish":
            return lambda x: x * tf.sigmoid(x=x)
        # Leaky ReLU: x * [alpha if x < 0 else 1.0]
        elif activation_function in ["lrelu", "leaky_relu"]:
            alpha = other_parameters[0] if len(other_parameters) > 0 else 0.2
            return partial(tf.nn.leaky_relu, alpha=alpha)
        # Concatenated ReLU:
        elif activation_function == "crelu":
            return tf.nn.crelu
        # Softmax function:
        elif activation_function == "softmax":
            return tf.nn.softmax
        # Softplus function:
        elif activation_function == "softplus":
            return tf.nn.softplus
        # Softsign function:
        elif activation_function == "softsign":
            return tf.nn.softsign
        # tanh activation function:
        elif activation_function == "tanh":
            return tf.nn.tanh
        else:
            raise RLGraphError(
                "ERROR: Unknown activation_function '{}' for TensorFlow backend!"
                .format(activation_function))
    elif get_backend() == "pytorch":
        # Have to instantiate objects here.
        if activation_function is None or callable(activation_function):
            return activation_function
        elif activation_function == "linear":
            # Do nothing.
            return None
        # Rectifier linear unit (ReLU) : 0 if x < 0 else x
        elif activation_function == "relu":
            return nn.ReLU()
        # Exponential linear: exp(x) - 1 if x < 0 else x
        elif activation_function == "elu":
            return nn.ELU()
        # Sigmoid: 1 / (1 + exp(-x))
        elif activation_function == "sigmoid":
            return nn.Sigmoid()
        # Scaled exponential linear unit: scale * [alpha * (exp(x) - 1) if < 0 else x]
        # https://arxiv.org/pdf/1706.02515.pdf
        elif activation_function == "selu":
            return nn.SELU()
        # Leaky ReLU: x * [alpha if x < 0 else 1.0]
        elif activation_function in ["lrelu", "leaky_relu"]:
            alpha = other_parameters[0] if len(other_parameters) > 0 else 0.2
            return nn.LeakyReLU(negative_slope=alpha)
        # Softmax function:
        elif activation_function == "softmax":
            return nn.Softmax()
        # Softplus function:
        elif activation_function == "softplus":
            return nn.Softplus()
        # Softsign function:
        elif activation_function == "softsign":
            return nn.Softsign()
        # tanh activation function:
        elif activation_function == "tanh":
            return nn.Tanh()
        else:
            raise RLGraphError(
                "ERROR: Unknown activation_function '{}' for PyTorch backend!".
                format(activation_function))
def get_graph_markup(component, level=0, draw_graph_fns=False):
    """
    Returns graph markup to be used for RLGraph metagraph plotting.

    Uses the (mermaid)[https://github.com/knsv/mermaid] markup language.

    Args:
        component (Component): Component to generate meta-graph markup for.
        level (int): Indentation level. If >= 1, return this component as sub-component.
        draw_graph_fns (bool): Include graph fns in plot.

    Returns:
        str: Meta-graph markup string.
    """

    # Print (sub)graph declaration
    if level >= 1:
        markup = " " * 4 * level + "subgraph {}\n".format(component.name)
    elif level == 0:
        markup = "graph TD\n"
        markup += "classDef input_socket fill:#9ff,stroke:#333,stroke-width:2px;\n"
        markup += "classDef output_socket fill:#f9f,stroke:#333,stroke-width:2px;\n"
        markup += "classDef space fill:#999,stroke:#333,stroke-width:2px;\n"
        markup += "classDef graph_fn fill:#ff9,stroke:#333,stroke-width:2px;\n"
        markup += "\n"
    else:
        raise RLGraphError(
            "Invalid component indentation level {}".format(level))

    all_sockets = list()
    all_graph_fns = list()

    # Add input socket nodes with the following markup: in_socket_HASH(INPUT SOCKET NAME)
    markup_input_sockets = list()
    for input_socket in component.input_sockets:
        markup += " " * 4 * (level + 1) + "socket_{hash}(\"{name}\")\n".format(
            hash=hash(input_socket), name=input_socket.name)
        markup_input_sockets.append(
            "socket_{hash}".format(hash=hash(input_socket)))
        all_sockets.append(input_socket)

    # Add output socket nodes with the following markup: out_socket_HASH(OUTPUT SOCKET NAME)
    markup_output_sockets = list()
    for output_socket in component.output_sockets:
        markup += " " * 4 * (level + 1) + "socket_{hash}(\"{name}\")\n".format(
            hash=hash(output_socket), name=output_socket.name)
        markup_output_sockets.append(
            "socket_{hash}".format(hash=hash(output_socket)))
        all_sockets.append(output_socket)

    markup += "\n"

    # Add graph function nodes with the following markup: graphfn_HASH(GRAPH FN NAME)
    markup_graph_fns = list()
    for graph_fn in component.graph_fns:
        markup += " " * 4 * (level +
                             1) + "graphfn_{hash}(\"{name}\")\n".format(
                                 hash=hash(graph_fn), name=graph_fn.name)
        markup_graph_fns.append("graphfn_{hash}".format(hash=hash(graph_fn)))
        all_graph_fns.append(graph_fn)

    # Collect connections by looping through all incoming connections.
    # All outgoing connections should be incoming connections of another socket, so we don't need to loop through them.
    connections = list()
    markup_spaces = list()
    for socket in all_sockets:
        for incoming_connection in socket.incoming_connections:
            if isinstance(incoming_connection, Socket):
                connections.append(
                    ("socket_{}".format(hash(incoming_connection)),
                     "socket_{}".format(hash(socket)), None))
            elif isinstance(incoming_connection, Space):
                # Add spaces to markup (we only know about them because of their connections).
                markup += " " * 4 * (level +
                                     1) + "space_{hash}(\"{name}\")\n".format(
                                         hash=hash(incoming_connection),
                                         name=str(incoming_connection))
                markup_spaces.append(
                    "space_{hash}".format(hash=hash(incoming_connection)))
                connections.append(
                    ("space_{}".format(hash(incoming_connection)),
                     "socket_{}".format(hash(socket)), None))
            elif isinstance(incoming_connection, GraphFunction):
                connections.append(
                    ("graphfn_{}".format(hash(incoming_connection)),
                     "socket_{}".format(hash(socket)), None))

    # Collect graph fn connections by looping through all input sockets of the graph fns.
    # All output sockets should have been covered by the above collection of incoming connections to the sockets.
    for graph_fn in all_graph_fns:
        for input_socket_name, input_socket_dict in graph_fn.input_sockets.items(
        ):
            input_socket = input_socket_dict['socket']
            if isinstance(input_socket, Socket):
                connections.append(("socket_{}".format(hash(input_socket)),
                                    "graphfn_{}".format(hash(graph_fn)), None))
            else:
                raise ValueError("Not a valid input socket: {} ({})".format(
                    input_socket, type(input_socket)))

    # Add style class `input_socket` to the input sockets
    if markup_input_sockets:
        markup += " " * 4 * (level + 1) + "class {} input_socket;\n".format(
            ','.join(markup_input_sockets))

    # Add style class `output_socket` to the output sockets
    if markup_output_sockets:
        markup += " " * 4 * (level + 1) + "class {} output_socket;\n".format(
            ','.join(markup_output_sockets))

    # Add style class `space` to the spaces
    if markup_spaces:
        markup += " " * 4 * (level + 1) + "class {} space;\n".format(
            ','.join(markup_spaces))

    # Add style class `graph_fn` to the graph fns
    if markup_graph_fns:
        markup += " " * 4 * (level + 1) + "class {} graph_fn;\n".format(
            ','.join(markup_graph_fns))

    markup += "\n"

    # Add sub-components.
    for sub_component_name, sub_component in component.sub_components.items():
        markup += get_graph_markup(sub_component,
                                   level=level + 1,
                                   draw_graph_fns=draw_graph_fns)

    # Subgraphs (level >= 1) require an end statement.
    if level >= 1:
        markup += " " * 4 * level + "end\n"

    markup += "\n"

    # Connection are inserted after the graph
    for connection in connections:
        if connection[2]:
            # Labeled connection
            markup += " " * 4 * level + "{}--{}-->{}\n".format(
                connection[0], connection[2], connection[1])
        else:
            # Unlabeled connection
            markup += " " * 4 * level + "{}-->{}\n".format(
                connection[0], connection[1])

    markup += "\n"

    return markup
Beispiel #19
0
    def _execute(self,
                 num_timesteps=None,
                 num_episodes=None,
                 max_timesteps_per_episode=None,
                 use_exploration=True,
                 update_spec=None,
                 frameskip=None,
                 reset=True):
        """
        Actual implementation underlying `execute_timesteps` and `execute_episodes`.

        Args:
            num_timesteps (Optional[int]): The maximum number of timesteps to run. At least one of `num_timesteps` or
                `num_episodes` must be provided.
            num_episodes (Optional[int]): The maximum number of episodes to run. At least one of `num_timesteps` or
                `num_episodes` must be provided.
            use_exploration (Optional[bool]): Indicates whether to utilize exploration (epsilon or noise based)
                when picking actions. Default: True.
            max_timesteps_per_episode (Optional[int]): Can be used to limit the number of timesteps per episode.
                Use None or 0 for no limit. Default: None.
            update_spec (Optional[dict]): Update parameters. If None, the worker only performs rollouts.
                Matches the structure of an Agent's update_spec dict and will be "defaulted" by that dict.
                See `input_parsing/parse_update_spec.py` for more details.
            frameskip (Optional[int]): How often actions are repeated after retrieving them from the agent.
                Rewards are accumulated over the number of skips. Use None for the Worker's default value.
            reset (bool): Whether to reset the environment and all the Worker's internal counters.
                Default: True.

        Returns:
            dict: Execution statistics.
        """
        assert num_timesteps is not None or num_episodes is not None,\
            "ERROR: One of `num_timesteps` or `num_episodes` must be provided!"
        # Are we updating or just acting/observing?
        update_spec = default_dict(update_spec, self.agent.update_spec)
        self.set_update_schedule(update_spec)

        num_timesteps = num_timesteps or 0
        num_episodes = num_episodes or 0
        max_timesteps_per_episode = [
            max_timesteps_per_episode or 0
            for _ in range_(self.num_environments)
        ]
        frameskip = frameskip or self.frameskip

        # Stats.
        timesteps_executed = 0
        episodes_executed = 0

        start = time.perf_counter()
        episode_terminals = self.episode_terminals
        if reset is True:
            self.env_frames = 0
            self.episodes_since_update = 0
            self.finished_episode_rewards = [
                [] for _ in range_(self.num_environments)
            ]
            self.finished_episode_durations = [
                [] for _ in range_(self.num_environments)
            ]
            self.finished_episode_timesteps = [
                [] for _ in range_(self.num_environments)
            ]

            for i, env_id in enumerate(self.env_ids):
                self.episode_returns[i] = 0
                self.episode_timesteps[i] = 0
                self.episode_terminals[i] = False
                self.episode_starts[i] = time.perf_counter()
                if self.worker_executes_preprocessing:
                    self.state_is_preprocessed[env_id] = False

            self.env_states = self.vector_env.reset_all()
            self.agent.reset()
        elif self.env_states[0] is None:
            raise RLGraphError(
                "Runner must be reset at the very beginning. Environment is in invalid state."
            )

        # Only run everything for at most num_timesteps (if defined).
        env_states = self.env_states
        while not (0 < num_timesteps <= timesteps_executed):
            if self.render:
                self.vector_env.render()

            if self.worker_executes_preprocessing:
                for i, env_id in enumerate(self.env_ids):
                    state = self.agent.state_space.force_batch(env_states[i])
                    if self.preprocessors[env_id] is not None:
                        if self.state_is_preprocessed[env_id] is False:
                            self.preprocessed_states_buffer[
                                i] = self.preprocessors[env_id].preprocess(
                                    state)
                            self.state_is_preprocessed[env_id] = True
                    else:
                        self.preprocessed_states_buffer[i] = env_states[i]
                # TODO extra returns when worker is not applying preprocessing.
                actions = self.agent.get_action(
                    states=self.preprocessed_states_buffer,
                    use_exploration=use_exploration,
                    apply_preprocessing=self.apply_preprocessing)
                preprocessed_states = np.array(self.preprocessed_states_buffer)
            else:
                actions, preprocessed_states = self.agent.get_action(
                    states=np.array(env_states),
                    use_exploration=use_exploration,
                    apply_preprocessing=True,
                    extra_returns="preprocessed_states")

            # Accumulate the reward over n env-steps (equals one action pick). n=self.frameskip.
            env_rewards = [0 for _ in range_(self.num_environments)]
            next_states = None

            # For Dict action spaces, we have to treat each key as an array with batch-rank at index 0.
            # The action-dict is then translated into a list of dicts where each dict contains the original data
            # but without the batch-rank.
            # E.g. {'A': array([0, 1]), 'B': array([2, 3])} -> [{'A': 0, 'B': 2}, {'A': 1, 'B': 3}]
            if isinstance(self.agent.action_space, Dict):
                some_key = next(iter(actions))
                assert isinstance(actions, dict) and isinstance(actions[some_key], np.ndarray),\
                    "ERROR: Cannot flip Dict-action batch with dict keys if returned value is not a dict OR " \
                    "values of returned value are not np.ndarrays!"
                # TODO: What if actions come as nested dicts (more than one level deep)?
                # TODO: Use DataOpDict/Tuple's new `map` method.
                if hasattr(actions[some_key], "__len__"):
                    env_actions = [{
                        key: value[i]
                        for key, value in actions.items()
                    } for i in range(len(actions[some_key]))]
                else:
                    # Action was not array type.
                    env_actions = [{
                        key: value
                        for key, value in actions.items()
                    }]
            # Tuple action Spaces:
            # E.g. Tuple(array([0, 1]), array([2, 3])) -> [(0, 2), (1, 3)]
            elif isinstance(self.agent.action_space, Tuple):
                assert isinstance(actions, tuple) and isinstance(actions[0], np.ndarray),\
                    "ERROR: Cannot flip tuple-action batch if returned value is not a tuple OR " \
                    "values of returned value are not np.ndarrays!"
                # TODO: Use DataOpDict/Tuple's new `map` method.
                env_actions = [
                    tuple(value[i] for _, value in enumerate(actions))
                    for i in range(len(actions[0]))
                ]
            # No container batch-flipping necessary.
            else:
                env_actions = actions
                if self.num_environments == 1 and env_actions.shape == ():
                    env_actions = [env_actions]

            for _ in range_(frameskip):
                next_states, step_rewards, episode_terminals, _ = self.vector_env.step(
                    actions=env_actions)

                self.env_frames += self.num_environments
                for i, step_reward in enumerate(step_rewards):
                    env_rewards[i] += step_reward
                if np.any(episode_terminals):
                    break

            # Only render once per action.
            #if self.render:
            #    self.vector_env.environments[0].render()

            for i, env_id in enumerate(self.env_ids):
                self.episode_returns[i] += env_rewards[i]
                self.episode_timesteps[i] += 1

                if 0 < max_timesteps_per_episode[i] <= self.episode_timesteps[
                        i]:
                    episode_terminals[i] = True
                if self.worker_executes_preprocessing:
                    self.state_is_preprocessed[env_id] = False
                # Do accounting for finished episodes.
                if episode_terminals[i]:
                    episodes_executed += 1
                    self.episodes_since_update += 1
                    episode_duration = time.perf_counter(
                    ) - self.episode_starts[i]
                    self.finished_episode_rewards[i].append(
                        self.episode_returns[i])
                    self.finished_episode_durations[i].append(episode_duration)
                    self.finished_episode_timesteps[i].append(
                        self.episode_timesteps[i])

                    self.log_finished_episode(
                        episode_return=self.episode_returns[i],
                        duration=episode_duration,
                        timesteps=self.episode_timesteps[i],
                        env_num=i)

                    # Reset this environment and its preprocecssor stack.
                    env_states[i] = self.vector_env.reset(i)
                    if self.worker_executes_preprocessing and self.preprocessors[
                            env_id] is not None:
                        self.preprocessors[env_id].reset()
                        # This re-fills the sequence with the reset state.
                        state = self.agent.state_space.force_batch(
                            env_states[i])
                        # Pre - process, add to buffer
                        self.preprocessed_states_buffer[i] = np.array(
                            self.preprocessors[env_id].preprocess(state))
                        self.state_is_preprocessed[env_id] = True

                    self.episode_returns[i] = 0
                    self.episode_timesteps[i] = 0
                    self.episode_starts[i] = time.perf_counter()
                else:
                    # Otherwise assign states to next states
                    env_states[i] = next_states[i]

                if self.worker_executes_preprocessing and self.preprocessors[
                        env_id] is not None:
                    #next_state = self.agent.state_space.force_batch(env_states[i])
                    next_states[i] = np.array(
                        self.preprocessors[env_id].preprocess(
                            env_states[i]))  # next_state
                self._observe(self.env_ids[i], preprocessed_states[i],
                              env_actions[i], env_rewards[i], next_states[i],
                              episode_terminals[i])
            self.update_if_necessary()
            timesteps_executed += self.num_environments
            num_timesteps_reached = (0 < num_timesteps <= timesteps_executed)

            if 0 < num_episodes <= episodes_executed or num_timesteps_reached:
                break

        total_time = (time.perf_counter() - start) or 1e-10

        # Return values for current episode(s) if None have been completed.
        if episodes_executed == 0:
            mean_episode_runtime = 0
            mean_episode_reward = np.mean(self.episode_returns)
            max_episode_reward = np.max(self.episode_returns)
            final_episode_reward = self.episode_returns[0]
        else:
            all_finished_durations = []
            all_finished_rewards = []
            for i in range_(self.num_environments):
                all_finished_rewards.extend(self.finished_episode_rewards[i])
                all_finished_durations.extend(
                    self.finished_episode_durations[i])
            mean_episode_runtime = np.mean(all_finished_durations)
            mean_episode_reward = np.mean(all_finished_rewards)
            max_episode_reward = np.max(all_finished_rewards)
            final_episode_reward = all_finished_rewards[-1]

        self.episode_terminals = episode_terminals
        self.env_states = env_states
        results = dict(
            runtime=total_time,
            # Agent act/observe throughput.
            timesteps_executed=timesteps_executed,
            ops_per_second=(timesteps_executed / total_time),
            # Env frames including action repeats.
            env_frames=self.env_frames,
            env_frames_per_second=(self.env_frames / total_time),
            episodes_executed=episodes_executed,
            episodes_per_minute=(episodes_executed / (total_time / 60)),
            mean_episode_runtime=mean_episode_runtime,
            mean_episode_reward=mean_episode_reward,
            max_episode_reward=max_episode_reward,
            final_episode_reward=final_episode_reward)

        # Total time of run.
        self.logger.info("Finished execution in {} s".format(total_time))
        # Total (RL) timesteps (actions) done (and timesteps/sec).
        self.logger.info("Time steps (actions) executed: {} ({} ops/s)".format(
            results['timesteps_executed'], results['ops_per_second']))
        # Total env-timesteps done (including action repeats) (and env-timesteps/sec).
        self.logger.info(
            "Env frames executed (incl. action repeats): {} ({} frames/s)".
            format(results['env_frames'], results['env_frames_per_second']))
        # Total episodes done (and episodes/min).
        self.logger.info("Episodes finished: {} ({} episodes/min)".format(
            results['episodes_executed'], results['episodes_per_minute']))
        self.logger.info("Mean episode runtime: {}s".format(
            results['mean_episode_runtime']))
        self.logger.info("Mean episode reward: {}".format(
            results['mean_episode_reward']))
        self.logger.info("Max. episode reward: {}".format(
            results['max_episode_reward']))
        self.logger.info("Final episode reward: {}".format(
            results['final_episode_reward']))

        return results
Beispiel #20
0
    def __init__(self, shape, specification=None, **kwargs):
        """
        Args:
            shape (tuple): The shape of the Variables to initialize.
            specification (any): A spec that determines the nature of this initializer.

        Raises:
            RLGraphError: If a fixed shape in `specification` does not match `shape`.
        """
        super(Initializer, self).__init__()

        # The shape of the variable to be initialized.
        self.shape = shape
        # The actual underlying initializer object.
        self.initializer = None

        # Truncated Normal.
        if specification == "truncated_normal":
            if get_backend() == "tf":
                # Use the first dimension (num_rows or batch rank) to figure out the stddev.
                stddev = 1 / math.sqrt(shape[0] if isinstance(
                    shape, (tuple, list)) and len(shape) > 0 else 1.0)
                self.initializer = tf.truncated_normal_initializer(
                    stddev=stddev)
            elif get_backend() == "pytorch":
                stddev = 1 / math.sqrt(shape[0] if isinstance(
                    shape, (tuple, list)) and len(shape) > 0 else 1.0)
                self.initializer = lambda t: torch.nn.init.normal_(tensor=t,
                                                                   std=stddev)

        # No spec -> Leave initializer as None for TF (will then use default;
        #  e.g. for tf weights: Xavier uniform). For PyTorch, still have to set Xavier.
        # TODO this is None or is False is very unclean because TF and PT have different defaults ->
        # change to clean default values for weights and biases.
        elif specification is None or specification is False:
            if get_backend() == "tf":
                pass
            elif get_backend() == "pytorch":
                self.initializer = torch.nn.init.xavier_uniform_

        # Fixed values spec -> Use them, just do sanity checking.
        else:
            # Constant value across the variable.
            if isinstance(specification, (float, int)):
                pass
            # A 1D initializer (e.g. for biases).
            elif isinstance(specification, list):
                array = np.asarray(specification,
                                   dtype=convert_dtype("float32", "np"))
                if array.shape != self.shape:
                    raise RLGraphError(
                        "ERROR: Number/shape of given items ({}) not identical with shape ({})!"
                        .format(array.shape, self.shape))
            # A nD initializer (numpy-array).
            elif isinstance(specification, np.ndarray):
                if specification.shape != self.shape:
                    raise RLGraphError(
                        "ERROR: Shape of given items ({}) not identical with shape ({})!"
                        .format(specification.shape, self.shape))
            # Unknown type.
            else:
                raise RLGraphError(
                    "ERROR: Bad specification given ({}) for Initializer object!"
                    .format(specification))

            # Create the backend initializer object.
            if get_backend() == "tf":
                self.initializer = tf.constant_initializer(
                    value=specification, dtype=convert_dtype("float32"))
            elif get_backend() == "pytorch":
                self.initializer = lambda t: torch.nn.init.constant_(
                    tensor=t, val=specification)
Beispiel #21
0
    def __init__(self,
                 network_spec,
                 action_space=None,
                 action_adapter_spec=None,
                 deterministic=True,
                 scope="policy",
                 **kwargs):
        """
        Args:
            network_spec (Union[NeuralNetwork,dict]): The NeuralNetwork Component or a specification dict to build
                one.

            action_space (Space): The action Space within which this Component will create actions.

            action_adapter_spec (Optional[dict]): A spec-dict to create an ActionAdapter. Use None for the default
                ActionAdapter object.

            deterministic (bool): Whether to pick actions according to the max-likelihood value or via sampling.
                Default: True.

            batch_apply (bool): Whether to wrap both the NN and the ActionAdapter with a BatchApply Component in order
                to fold time rank into batch rank before a forward pass.
        """
        super(Policy, self).__init__(scope=scope, **kwargs)

        self.neural_network = NeuralNetwork.from_spec(
            network_spec)  # type: NeuralNetwork

        # Create the necessary action adapters for this Policy. One for each action space component.
        self.action_adapters = dict()
        if action_space is None:
            self.action_adapters[""] = ActionAdapter.from_spec(
                action_adapter_spec)
            self.action_space = self.action_adapters[""].action_space
            # Assert single component action space.
            assert len(self.action_space.flatten()) == 1,\
                "ERROR: Action space must not be ContainerSpace if no `action_space` is given in Policy c'tor!"
        else:
            self.action_space = Space.from_spec(action_space)
            for i, (flat_key, action_component) in enumerate(
                    self.action_space.flatten().items()):
                if action_adapter_spec is not None:
                    aa_spec = action_adapter_spec.get(flat_key,
                                                      action_adapter_spec)
                    aa_spec["action_space"] = action_component
                else:
                    aa_spec = dict(action_space=action_component)
                self.action_adapters[flat_key] = ActionAdapter.from_spec(
                    aa_spec, scope="action-adapter-{}".format(i))

        self.deterministic = deterministic

        # Figure out our Distributions.
        self.distributions = dict()
        for i, (flat_key, action_component) in enumerate(
                self.action_space.flatten().items()):
            if isinstance(action_component, IntBox):
                self.distributions[flat_key] = Categorical(
                    scope="categorical-{}".format(i))
            # Continuous action space -> Normal distribution (each action needs mean and variance from network).
            elif isinstance(action_component, FloatBox):
                self.distributions[flat_key] = Normal(
                    scope="normal-{}".format(i))
            else:
                raise RLGraphError(
                    "ERROR: `action_component` is of type {} and not allowed in {} Component!"
                    .format(type(action_space).__name__, self.name))

        self.add_components(*[self.neural_network] +
                            list(self.action_adapters.values()) +
                            list(self.distributions.values()))
    def _execute(self,
                 num_timesteps=None,
                 num_episodes=None,
                 max_timesteps_per_episode=None,
                 use_exploration=True,
                 update_spec=None,
                 frameskip=None,
                 reset=True):
        """
        Actual implementation underlying `execute_timesteps` and `execute_episodes`.

        Args:
            num_timesteps (Optional[int]): The maximum number of timesteps to run. At least one of `num_timesteps` or
                `num_episodes` must be provided.
            num_episodes (Optional[int]): The maximum number of episodes to run. At least one of `num_timesteps` or
                `num_episodes` must be provided.
            use_exploration (Optional[bool]): Indicates whether to utilize exploration (epsilon or noise based)
                when picking actions. Default: True.
            max_timesteps_per_episode (Optional[int]): Can be used to limit the number of timesteps per episode.
                Use None or 0 for no limit. Default: None.
            update_spec (Optional[dict]): Update parameters. If None, the worker only performs rollouts.
                Matches the structure of an Agent's update_spec dict and will be "defaulted" by that dict.
                See `input_parsing/parse_update_spec.py` for more details.
            frameskip (Optional[int]): How often actions are repeated after retrieving them from the agent.
                Rewards are accumulated over the number of skips. Use None for the Worker's default value.
            reset (bool): Whether to reset the environment and all the Worker's internal counters.
                Default: True.

        Returns:
            dict: Execution statistics.
        """
        assert num_timesteps is not None or num_episodes is not None,\
            "ERROR: One of `num_timesteps` or `num_episodes` must be provided!"
        # Are we updating or just acting/observing?
        update_spec = default_dict(update_spec, self.agent.update_spec)
        self.set_update_schedule(update_spec)

        num_timesteps = num_timesteps or 0
        num_episodes = num_episodes or 0
        max_timesteps_per_episode = [
            max_timesteps_per_episode or 0
            for _ in range_(self.num_environments)
        ]
        frameskip = frameskip or self.frameskip

        # Stats.
        timesteps_executed = 0
        episodes_executed = 0

        start = time.perf_counter()
        episode_terminals = self.episode_terminals
        if reset is True:
            self.env_frames = 0
            self.finished_episode_rewards = [
                [] for _ in range_(self.num_environments)
            ]
            self.finished_episode_durations = [
                [] for _ in range_(self.num_environments)
            ]
            self.finished_episode_timesteps = [
                [] for _ in range_(self.num_environments)
            ]

            for i, env_id in enumerate(self.env_ids):
                self.episode_returns[i] = 0
                self.episode_timesteps[i] = 0
                self.episode_terminals[i] = False
                self.episode_starts[i] = time.perf_counter()
                if self.worker_executes_preprocessing:
                    self.state_is_preprocessed[env_id] = False

            self.env_states = self.vector_env.reset_all()
            self.agent.reset()
        elif self.env_states[0] is None:
            raise RLGraphError(
                "Runner must be reset at the very beginning. Environment is in invalid state."
            )

        # Only run everything for at most num_timesteps (if defined).
        env_states = self.env_states
        while not (0 < num_timesteps <= timesteps_executed):
            if self.render:
                # This renders the first underlying environment.
                self.vector_env.render()

            if self.worker_executes_preprocessing:
                for i, env_id in enumerate(self.env_ids):
                    state = self.agent.state_space.force_batch(env_states[i])
                    if self.preprocessors[env_id] is not None:
                        if self.state_is_preprocessed[env_id] is False:
                            self.preprocessed_states_buffer[
                                i] = self.preprocessors[env_id].preprocess(
                                    state)
                            self.state_is_preprocessed[env_id] = True
                    else:
                        self.preprocessed_states_buffer[i] = env_states[i]
                # TODO extra returns when worker is not applying preprocessing.
                actions = self.agent.get_action(
                    states=self.preprocessed_states_buffer,
                    use_exploration=use_exploration,
                    apply_preprocessing=self.apply_preprocessing)
                preprocessed_states = np.array(self.preprocessed_states_buffer)
            else:
                preprocessed_states, actions = self.agent.get_action(
                    states=np.array(env_states),
                    use_exploration=use_exploration,
                    apply_preprocessing=True,
                    extra_returns="preprocessed_states")

            # Accumulate the reward over n env-steps (equals one action pick). n=self.frameskip.
            env_rewards = [0 for _ in range_(self.num_environments)]
            next_states = None
            for _ in range_(frameskip):
                next_states, step_rewards, episode_terminals, infos = self.vector_env.step(
                    actions=actions)

                self.env_frames += self.num_environments
                for i, step_reward in enumerate(step_rewards):
                    env_rewards[i] += step_reward
                if np.any(episode_terminals):
                    break

            # Only render once per action.
            if self.render:
                self.vector_env.environments[0].render()

            for i, env_id in enumerate(self.env_ids):
                self.episode_returns[i] += env_rewards[i]
                self.episode_timesteps[i] += 1

                if 0 < max_timesteps_per_episode[i] <= self.episode_timesteps[
                        i]:
                    episode_terminals[i] = True
                if self.worker_executes_preprocessing:
                    self.state_is_preprocessed[env_id] = False
                # Do accounting for finished episodes.
                if episode_terminals[i]:
                    episodes_executed += 1
                    episode_duration = time.perf_counter(
                    ) - self.episode_starts[i]
                    self.finished_episode_rewards[i].append(
                        self.episode_returns[i])
                    self.finished_episode_durations[i].append(episode_duration)
                    self.finished_episode_timesteps[i].append(
                        self.episode_timesteps[i])

                    self.log_finished_episode(
                        reward=self.episode_returns[i],
                        duration=episode_duration,
                        timesteps=self.episode_timesteps[i],
                        env_num=i)

                    # Reset this environment and its preprocecssor stack.
                    env_states[i] = self.vector_env.reset(i)
                    if self.worker_executes_preprocessing and self.preprocessors[
                            env_id] is not None:
                        self.preprocessors[env_id].reset()
                        # This re-fills the sequence with the reset state.
                        state = self.agent.state_space.force_batch(
                            env_states[i])
                        # Pre - process, add to buffer
                        self.preprocessed_states_buffer[i] = np.array(
                            self.preprocessors[env_id].preprocess(state))
                        self.state_is_preprocessed[env_id] = True

                    self.episode_returns[i] = 0
                    self.episode_timesteps[i] = 0
                    self.episode_starts[i] = time.perf_counter()
                else:
                    # Otherwise assign states to next states
                    env_states[i] = next_states[i]

                if self.worker_executes_preprocessing and self.preprocessors[
                        env_id] is not None:
                    next_state = self.agent.state_space.force_batch(
                        env_states[i])
                    next_states[i] = np.array(
                        self.preprocessors[env_id].preprocess(next_state))
                # TODO: If worker does not execute preprocessing, next state is not preprocessed here.
                # Observe per environment.
                self.agent.observe(preprocessed_states=preprocessed_states[i],
                                   actions=actions[i],
                                   internals=[],
                                   rewards=env_rewards[i],
                                   next_states=next_states[i],
                                   terminals=episode_terminals[i],
                                   env_id=self.env_ids[i])
            self.update_if_necessary()
            timesteps_executed += self.num_environments
            num_timesteps_reached = (0 < num_timesteps <= timesteps_executed)

            if 0 < num_episodes <= episodes_executed or num_timesteps_reached:
                break

        total_time = (time.perf_counter() - start) or 1e-10

        # Return values for current episode(s) if None have been completed.
        if episodes_executed == 0:
            mean_episode_runtime = 0
            mean_episode_reward = np.mean(self.episode_returns)
            max_episode_reward = np.max(self.episode_returns)
            final_episode_reward = self.episode_returns[0]
        else:
            all_finished_durations = []
            all_finished_rewards = []
            for i in range_(self.num_environments):
                all_finished_rewards.extend(self.finished_episode_rewards[i])
                all_finished_durations.extend(
                    self.finished_episode_durations[i])
            mean_episode_runtime = np.mean(all_finished_durations)
            mean_episode_reward = np.mean(all_finished_rewards)
            max_episode_reward = np.max(all_finished_rewards)
            final_episode_reward = all_finished_rewards[-1]

        self.episode_terminals = episode_terminals
        self.env_states = env_states
        results = dict(
            runtime=total_time,
            # Agent act/observe throughput.
            timesteps_executed=timesteps_executed,
            ops_per_second=(timesteps_executed / total_time),
            # Env frames including action repeats.
            env_frames=self.env_frames,
            env_frames_per_second=(self.env_frames / total_time),
            episodes_executed=episodes_executed,
            episodes_per_minute=(episodes_executed / (total_time / 60)),
            mean_episode_runtime=mean_episode_runtime,
            mean_episode_reward=mean_episode_reward,
            max_episode_reward=max_episode_reward,
            final_episode_reward=final_episode_reward)

        # Total time of run.
        self.logger.info("Finished execution in {} s".format(total_time))
        # Total (RL) timesteps (actions) done (and timesteps/sec).
        self.logger.info("Time steps (actions) executed: {} ({} ops/s)".format(
            results['timesteps_executed'], results['ops_per_second']))
        # Total env-timesteps done (including action repeats) (and env-timesteps/sec).
        self.logger.info(
            "Env frames executed (incl. action repeats): {} ({} frames/s)".
            format(results['env_frames'], results['env_frames_per_second']))
        # Total episodes done (and episodes/min).
        self.logger.info("Episodes finished: {} ({} episodes/min)".format(
            results['episodes_executed'], results['episodes_per_minute']))
        self.logger.info("Mean episode runtime: {}s".format(
            results['mean_episode_runtime']))
        self.logger.info("Mean episode reward: {}".format(
            results['mean_episode_reward']))
        self.logger.info("Max. episode reward: {}".format(
            results['max_episode_reward']))
        self.logger.info("Final episode reward: {}".format(
            results['final_episode_reward']))

        return results
Beispiel #23
0
        def call(*args):
            if isinstance(self.output_spaces, dict):
                assert method_name in self.output_spaces, "ERROR: Method '{}' not specified in output_spaces: {}!".\
                    format(method_name, self.output_spaces)
                specs = self.output_spaces[method_name]
            else:
                specs = self.output_spaces(method_name)

            if specs is None:
                raise RLGraphError(
                    "No Space information received for method '{}:{}'".format(
                        self.specifiable_class.__name__, method_name))

            dtypes = []
            shapes = []
            return_slots = []
            for i, space in enumerate(force_list(specs)):
                assert not isinstance(space, ContainerSpace)
                # Expecting an op (space 0).
                if space == 0:
                    dtypes.append(0)
                    shapes.append(0)
                    return_slots.append(i)
                # Expecting a tensor.
                elif space is not None:
                    dtypes.append(convert_dtype(space.dtype))
                    shapes.append(space.shape)
                    return_slots.append(i)

            if get_backend() == "tf":
                # This function will send the method-call-comment via the out-pipe to the remote (server) Specifiable
                # object - all in-graph - and return the results to be used further by other graph ops.
                def py_call(*call_args):
                    call_args = [
                        arg.decode('UTF-8') if isinstance(arg, bytes) else arg
                        for arg in call_args
                    ]
                    try:
                        self.out_pipe.send(call_args)
                        received_results = self.out_pipe.recv()

                        # If an error occurred, it'll be passed back through the pipe.
                        if isinstance(received_results, Exception):
                            raise received_results
                        elif received_results is not None:
                            return received_results

                    except Exception as e:
                        if isinstance(e, IOError):
                            raise StopIteration()  # Clean exit.
                        else:
                            print("ERROR: Sent={} Exception={}".format(
                                call_args, e))
                            raise

                results = tf.py_func(py_call, (method_name, ) + tuple(args),
                                     dtypes,
                                     name=method_name)

                # Force known shapes on the returned tensors.
                for i, (result, shape) in enumerate(zip(results, shapes)):
                    # Not an op (which have shape=0).
                    if shape != 0:
                        result.set_shape(shape)
            else:
                raise NotImplementedError

            return results[0] if len(dtypes) == 1 else tuple(results)
Beispiel #24
0
 def get_id():
     DataOpRecord._ID += 1
     if DataOpRecord._ID >= DataOpRecord.MAX_ID:
         raise RLGraphError("Maximum number of op-rec IDs reached! Simply hard-increase `DataOpRecord.MAX_ID`.")
     return DataOpRecord._ID
Beispiel #25
0
    def split_flattened_input_ops(self, *ops, **kwarg_ops):
        """
        Splits any FlattenedDataOp in *ops and **kwarg_ops into its SingleDataOps and collects them to be passed
        one by one through some graph_fn. If more than one FlattenedDataOp exists in *ops and **kwarg_ops,
        these must have the exact same keys.
        If `add_auto_key_as_first_param` is True: Add auto-key as very first parameter in each
        returned parameter tuple.

        Args:
            *ops (op): The primitive ops to split.
            **kwarg_ops (op): More primitive ops to split (but by named key).

        Returns:
            Union[FlattenedDataOp,Tuple[DataOp]]: The sorted parameter tuples (by flat-key) to use as api_methods in the
                calls to the graph_fn.
                If no FlattenedDataOp is in ops, returns ops as-is.

        Raises:
            RLGraphError: If there are more than 1 flattened ops in ops and their keys don't match 100%.
        """
        assert all(op is not None for op in ops)  # just make sure

        # Collect FlattenedDataOp for checking their keys (must match).
        flattened = []
        for op in ops:
            if isinstance(op, dict) and (len(op) > 1 or "" not in op):
                flattened.append(op)

        # If it's more than 1, make sure they match. If they don't match: raise Error.
        if len(flattened) > 1:
            # Loop through the non-first ones and make sure all keys match vs the first one.
            lead_arg_dict = flattened[0]
            for other in flattened[1:]:
                other_arg_iter = iter(other)
                for key in lead_arg_dict.keys():
                    k_other = next(other_arg_iter)
                    if key != k_other:  # or get_shape(v_other) != get_shape(value):
                        raise RLGraphError("ERROR: Flattened ops have a key mismatch ({} vs {})!".format(key, k_other))

        # We have one or many (matching) ContainerDataOps: Split the calls.
        if len(flattened) > 0:
            # The first op that is a FlattenedDataOp.
            guide_op = next(op for op in ops if len(op) > 1 or "" not in op)
            # Re-create our iterators.
            collected_call_params = FlattenedDataOp()
            # Do the single split calls to our computation func.
            for key in guide_op.keys():
                # Prep input params for a single call.
                params = [key] if self.add_auto_key_as_first_param is True else []
                kwargs = {}
                for op in ops:
                    # Check first, do not try to check key into tensor (not iterable):
                    if isinstance(op, dict):
                        params.append(op[key] if key in op else op[""])
                    else:
                        # E.g. tuple args.
                        params.append(op)

                # Add kwarg_ops.
                for kwarg_key, kwarg_op in kwarg_ops.items():
                    kwargs[kwarg_key] = kwarg_ops[kwarg_key][key] \
                        if key in kwarg_ops[kwarg_key] else kwarg_ops[kwarg_key][""]
                # Now do the single call.
                collected_call_params[key] = (params, kwargs)
            return collected_call_params
        # We don't have any container ops: No splitting possible. Return args and kwargs as is.
        else:
            params = [""] if self.add_auto_key_as_first_param is True else []
            params += [op[""] if isinstance(op, dict) else op for op in ops]
            return tuple(params), {key: value[""] for key, value in kwarg_ops.items()}
Beispiel #26
0
def define_api_method(component, api_method_record, copy_record=True):
    """
    Registers an API-method with a Component instance.

    Args:
        component (Component): The Component object to register the API method with.
        api_method_record (APIMethodRecord): The APIMethodRecord describing the to-be-registered API-method.
        copy_record (bool): Whether to deepcopy the APIMethodRecord prior to handing it to the Component for storing.
    """
    # Deep copy the record (in case this got registered the normal way with via decorating a class method).
    if copy_record:
        api_method_record = copy.deepcopy(api_method_record)
    api_method_record.component = component

    # Raise errors if `name` already taken in this Component.
    if not api_method_record.ok_to_overwrite:
        # There already is an API-method with that name.
        if api_method_record.name in component.api_methods:
            raise RLGraphError(
                "API-method with name '{}' already defined!".format(
                    api_method_record.name))
        # There already is another object property with that name (avoid accidental overriding).
        elif not api_method_record.is_class_method and getattr(
                component, api_method_record.name, None) is not None:
            raise RLGraphError(
                "Component '{}' already has a property called '{}'. Cannot define an API-method with "
                "the same name!".format(component.name,
                                        api_method_record.name))

    # Do not build this API as per ctor instructions.
    if api_method_record.name in component.switched_off_apis:
        return

    component.synthetic_methods.add(api_method_record.name)
    setattr(
        component, api_method_record.name,
        api_method_record.wrapper_func.__get__(component, component.__class__))
    setattr(api_method_record.wrapper_func, "__name__", api_method_record.name)

    component.api_methods[api_method_record.name] = api_method_record

    # Direct callable for eager/define by run.
    component.api_fn_by_name[
        api_method_record.name] = api_method_record.wrapper_func

    # Update the api_method_inputs dict (with empty Spaces if not defined yet).
    skip_args = 1  # self
    skip_args += (api_method_record.is_graph_fn_wrapper
                  and api_method_record.add_auto_key_as_first_param)
    param_list = list(
        inspect.signature(
            api_method_record.func).parameters.values())[skip_args:]

    for param in param_list:
        component.api_methods[api_method_record.name].input_names.append(
            param.name)
        if param.name not in component.api_method_inputs:
            # This param has a default value.
            if param.default != inspect.Parameter.empty:
                # Default is None. Set to "flex" (to signal that this Space is not needed for input-completeness)
                # and wait for first call using this parameter (only then set it to that Space).
                if param.default is None:
                    component.api_method_inputs[param.name] = "flex"
                # Default is some python value (e.g. a bool). Use that are the assigned Space.
                else:
                    space = get_space_from_op(param.default)
                    component.api_method_inputs[param.name] = space
            # This param is an *args param. Store as "*flex". Then with upcoming API calls, we determine the Spaces
            # for the single items in *args and set them under "param[0]", "param[1]", etc..
            elif param.kind == inspect.Parameter.VAR_POSITIONAL:
                component.api_method_inputs[param.name] = "*flex"
            # This param is a **kwargs param. Store as "**flex". Then with upcoming API calls, we determine the Spaces
            # for the single items in **kwargs and set them under "param[some-key]", "param[some-other-key]", etc..
            elif param.kind == inspect.Parameter.VAR_KEYWORD:
                component.api_method_inputs[param.name] = "**flex"
            # Normal POSITIONAL_ONLY parameter. Store as None (needed) for now.
            else:
                component.api_method_inputs[param.name] = None
Beispiel #27
0
 def __init__(self, *input_names, **kwargs):
     raise RLGraphError(
         "DictMerger component is no longer supported! Please use ContainerMerger (same API) instead."
     )