Esempio n. 1
0
    def get_number_and_flatten_records(self, records, single):
        """
        Returns the number of records (even if a single, non-batched record is provided) and the flattened records.

        Args:
            records (any): The records to insert.

            single (bool): Optional flag to indicate that we are being passed a single record. This will avoid a
                `Space.contains()` check on our record_space, but is otherwise ok to leave as False, even if the
                incoming record is single/non-batched.

        Returns:
            Tuple:
                - int: The number of records.
                - list: The flattened records.
        """
        # Extract next-values from records before flattening.
        flat_next_records = None
        if self.next_record_setup:
            next_records = {}
            for field, (next_field, bins) in self.next_record_setup.items():
                next_value = records[next_field]
                del records[next_field]
                next_records[field] = next_value
            flat_next_records = tf.nest.flatten(next_records)

        flat_records = tf.nest.flatten(records)
        # Single (non-batched) record.
        if single is True or self.flat_record_space[0].get_shape(include_main_Axes=True) == \
                (self.capacity,) + flat_records[0].shape:
            num_records = 0
        else:
            num_records = get_batch_size(flat_records[0])
        # Non batched, single entry -> Add batch rank.
        if num_records == 0:
            flat_records = [np.array([r]) for r in flat_records]
            num_records = 1

        # Check for correct batch size.
        if self.next_record_setup:
            if self.batch_size is None:
                self.batch_size = num_records
                assert self.capacity % self.batch_size == 0, \
                    "ERROR: `batch_size` set to {}. But `capacity` must be a multiple of memory's `batch_size`!".\
                    format(self.batch_size)
            elif num_records != self.batch_size:
                raise SurrealError(
                    "Incoming batch has wrong size ({}). Must always be {}!".
                    format(num_records, self.batch_size))

        # Make sure `records` roughly matches our record_space.
        assert len(flat_records) == len(self.flat_record_space), \
            "ERROR: Structure of `records` does not seem to match `self.record_space`!"

        # We have an `next_record_setup`.
        if self.next_record_setup:
            # Add the next-values to our "reserve" area.
            self.next_records.append(flat_next_records)

        return num_records, flat_records
Esempio n. 2
0
def keras_from_spec(spec):
    # Layers are given as list -> Build a simple Keras sequential model using Keras configs.
    if isinstance(spec, (list, tuple)):
        sequential = tf.keras.models.Sequential()
        for layer in spec:
            layer_copy = copy.deepcopy(layer)  # protect oroginal config
            name = layer_copy.pop("name").lower()
            #assert name in ["dense", "conv2d", "flatten", "lstm"]
            class_ = None
            for match in [Dense, Conv2D, Flatten, LSTM]:
                if match.__name__.lower() == name:
                    class_ = match
                    break
            if class_:
                sequential.add(class_.from_config(layer_copy))
            else:
                if name == "onehot":
                    sequential.add(
                        tf.keras.layers.Lambda(
                            lambda in_: tf.one_hot(in_, **layer_copy)))
                else:
                    raise SurrealError(
                        "Unknown layer/tf-op '{}'!".format(name))
        return sequential

    return spec
Esempio n. 3
0
def convert_dtype(dtype, to="tf"):
    """
    Translates any type (tf, numpy, python, etc..) into the respective tensorflow/numpy data type.

    Args:
        dtype (any): String describing a numerical type (e.g. 'float'), numpy data type, tf dtype,
            or python numerical type.
        to (str): Either one of 'tf' (tensorflow), 'np' (numpy), 'str' (string).
            Default="tf".

    Returns:
        TensorFlow, Numpy, string, representing a data type (depending on `to` parameter).
    """
    dtype = str(dtype)
    if "bool" in dtype:
        return np.bool_ if to == "np" else tf.bool
    elif "float64" in dtype:
        return np.float64 if to == "np" else tf.float64
    elif "float" in dtype:
        return np.float32 if to == "np" else tf.float32
    elif "int64" in dtype:
        return np.int64 if to == "np" else tf.int64
    elif "uint8" in dtype:
        return np.uint8 if to == "np" else tf.uint8
    elif "int16" in dtype:
        return np.int16 if to == "np" else tf.int16
    elif "int" in dtype:
        return np.int32 if to == "np" else tf.int32
    elif "str" in dtype:
        return np.unicode_ if to == "np" else tf.string

    raise SurrealError(
        "Error: Type conversion to '{}' for type '{}' not supported.".format(
            to, str(dtype)))
Esempio n. 4
0
    def __init__(self, spec=None, **kwargs):
        space_dict = {}
        main_axes = kwargs.pop("main_axes", None)
        value = kwargs.pop("value", None)

        self.do_not_overwrite_items_extra_ranks = kwargs.pop(
            "do_not_overwrite_items_extra_ranks", False)

        # Allow for any spec or already constructed Space to be passed in as values in the python-dict.
        # Spec may be part of kwargs.
        if spec is None:
            spec = kwargs

        is_generator = type(spec).__name__ == "generator"

        # `spec` could be a dict or a generator (when using tf.nest to map over a Dict).
        for key, val in (spec.items() if not is_generator else spec):
            # Keys must be strings.
            if not isinstance(key, str):
                raise SurrealError("No non-str keys allowed in a Dict-Space!")

            # Value is already a Space: Copy it (to not affect original Space) and maybe add/remove batch/time-ranks.
            if isinstance(val, Space):
                val.value = None
                if self.do_not_overwrite_items_extra_ranks is True:
                    space_dict[key] = val
                else:
                    space_dict[key] = val.strip_axes().with_axes(
                        main_axes=main_axes)
            # Value is a list/tuple -> treat as Tuple space.
            elif isinstance(val, (list, tuple)):
                if self.do_not_overwrite_items_extra_ranks is True:
                    space_dict[key] = Tuple(
                        *val, do_not_overwrite_items_extra_ranks=True)
                else:
                    space_dict[key] = Tuple(*val, main_axes=main_axes)
            # Value is a spec (or a spec-dict with "type" field) -> produce via `from_spec`.
            elif (isinstance(val, dict)
                  and "type" in val) or not isinstance(val, dict):
                if self.do_not_overwrite_items_extra_ranks is True:
                    space_dict[key] = Space.make(
                        val, do_not_overwrite_items_extra_ranks=True)
                else:
                    space_dict[key] = Space.make(val, main_axes=main_axes)
            # Value is a simple dict -> recursively construct another Dict Space as a sub-space of this one.
            else:
                if self.do_not_overwrite_items_extra_ranks is True:
                    space_dict[key] = Dict(
                        val, do_not_overwrite_items_extra_ranks=True)
                else:
                    space_dict[key] = Dict(val, main_axes=main_axes)
            # Set the parent of the added Space to `self`.
            space_dict[key].parent = self

        ContainerSpace.__init__(
            self,
            shape=tuple([self[key].shape for key in sorted(self.keys())]),
            main_axes=main_axes,
            value=value)
        dict.__init__(self, space_dict)
Esempio n. 5
0
def get_distribution_spec_from_adapter(distribution_adapter):
    distribution_adapter_type_str = type(distribution_adapter).__name__
    if distribution_adapter_type_str == "CategoricalDistributionAdapter":
        return dict(type="categorical")
    elif distribution_adapter_type_str == "GumbelSoftmaxDistributionAdapter":
        return dict(type="gumbel-softmax")
    elif distribution_adapter_type_str == "BernoulliDistributionAdapter":
        return dict(type="bernoulli")
    # TODO: What about multi-variate normal with non-trivial co-var matrices?
    elif distribution_adapter_type_str == "NormalDistributionAdapter":
        return dict(type="normal")
    elif distribution_adapter_type_str == "BetaDistributionAdapter":
        return dict(type="beta")
    elif distribution_adapter_type_str == "SquashedNormalDistributionAdapter":
        return dict(type="squashed-normal")
    elif distribution_adapter_type_str == "MixtureDistributionAdapter":
        # TODO: MixtureDistribution is generic (any sub-distributions, but its AA is not (only supports mixture-Normal))
        return dict(type="mixture",
                    _args=[
                        "multivariate-normal"
                        for _ in range(distribution_adapter.num_mixtures)
                    ])
    elif distribution_adapter_type_str == "PlainOutputAdapter":
        return None
    else:
        raise SurrealError(
            "'{}' is an unknown DistributionAdapter type!".format(
                distribution_adapter_type_str))
Esempio n. 6
0
    def from_file(cls, filename, *args, **kwargs):
        """
        Create object from spec saved in filename. Expects json or yaml format.

        Args:
            filename: file containing the spec (json or yaml)

        Keyword Args:
            Used as additional parameters for call to constructor.

        Returns:
            object
        """
        path = os.path.join(os.getcwd(), filename)
        if not os.path.isfile(path):
            raise SurrealError('No such file: {}'.format(filename))

        with open(path, 'rt') as fp:
            if path.endswith('.yaml') or path.endswith('.yml'):
                spec = yaml.load(fp)
            else:
                spec = json.load(fp)

        # Add possible *args.
        spec["_args"] = args
        return cls.make(spec=spec, **kwargs)
Esempio n. 7
0
    def _get_np_shape(self, size=None):
        """
        Helper to determine, which shape one should pass to the numpy random funcs for sampling from a Space.
        Depends on `size`, the `shape` of this Space and the `self.has_batch_rank/has_time_rank` settings.

        Args:
            size: See `self.sample()`.

        Returns:
            Tuple[int]: Shape to use for numpy random sampling.
        """
        # Default dims according to self.main_axes (use one for undefined dimensions).
        if size is None:
            return tuple([i if i is not None else 1 for i in self.get_shape(include_main_axes=True)])

        # With one axis.
        if isinstance(size, int):
            assert len(self.main_axes) == 1,\
                "ERROR: `size` must be a tuple of len {} (number of main-axes)!".format(len(self.main_axes))
            return (size,) + self.shape

        # With one or more axes (given as tuple).
        elif isinstance(size, (tuple, list)):
            assert len(size) == len(self.main_axes),\
                "ERROR: `size` must be of len {} (number of main-axes)!".format(len(self.main_axes))
            return tuple([i if i is not None else 1 for i in self.get_shape(include_main_axes=True)])

        raise SurrealError("`size` must be int or tuple/list!")
Esempio n. 8
0
    def inject_next_values_if_necessary(self, indices, records):
        """
        If required (`self.next_record_setup` is defined), injects into `records` the necessary next-values.
        Either pulls next-values from some records (n-steps) ahead or from `self.next_records` depending on
        `self.index` and the `indices` of the records.

        Args:
            indices (List[int]): The indices of the records to pull.
            records (List[any]): The actual records (already pulled) that now need to be extended by the next-values.
        """
        if self.next_record_setup:
            # The critical range is the index range for which we cannot simply go ahead n-steps to get the
            # next-values as the records n-steps ahead are unrelated (from a much earlier insertion) to the records at
            # `indices`. Therefore, we must use the `self.next_records` area to get the correct next-values.
            critical_range = [
                i % self.capacity
                for i in range(self.index -
                               self.batch_size * self.n_step, self.index)
            ]
            # Loop through all next-record setups.
            for field, (next_field,
                        memory_bins) in self.next_record_setup.items():
                next_values = []
                for next_var, var in enumerate(memory_bins):
                    a = []
                    for i in indices:
                        # i is within last batch -> Take next-values from reserve area.
                        if i in critical_range:
                            pos_in_critical_range = critical_range.index(i)
                            # Not enough records in memory yet to produce an n-step sample.
                            if len(
                                    self.next_records
                            ) <= pos_in_critical_range // self.batch_size:
                                raise SurrealError(
                                    "Memory with n-step={} not ready yet to pull records from. Insert enough samples "
                                    "first to reach n-step capability. Current size={}."
                                    .format(self.n_step, self.size))
                            a.append(self.next_records[pos_in_critical_range //
                                                       self.batch_size]
                                     [next_var][pos_in_critical_range %
                                                self.batch_size])
                        # i is not within last batch -> Take next-values from next records (n-steps ahead) in memory.
                        else:
                            a.append(self.memory[var]
                                     [(i + self.batch_size * self.n_step) %
                                      self.capacity])
                    next_values.append(np.array(a))
                records[next_field] = tf.nest.pack_sequence_as(
                    self.record_space[field].structure, next_values)

        return records
Esempio n. 9
0
def is_bounded_space(box_space):
    if not isinstance(box_space, Float):
        return False
    # Unbounded.
    if box_space.low == float("-inf") and box_space.high == float("inf"):
        return False
    # Bounded.
    elif box_space.low != float("-inf") and box_space.high != float("inf"):
        return True
    # TODO: Semi-bounded -> Exponential distribution.
    else:
        raise SurrealError(
            "Semi-bounded core for distribution-generation are not supported yet! You passed in low={} high={}."
            .format(box_space.low, box_space.high))
Esempio n. 10
0
 def _auto_input_lambda(self, input_component):
     new_shape = tuple([-1 for _ in range(len(input_component.main_axes))]) + \
                 (int(tf.reduce_prod(input_component.get_shape(with_category_rank=True))),)
     if isinstance(input_component, Int):
         return lambda i_: tf.reshape(
             tf.one_hot(i_, input_component.num_categories)
             if i_.dtype in [tf.int32, tf.int64] else i_, new_shape)
     elif isinstance(input_component, Float):
         return lambda i_: tf.reshape(i_, new_shape)
     elif isinstance(input_component, Bool):
         return lambda i_: tf.reshape(tf.cast(i_, tf.float32), new_shape)
     else:
         raise SurrealError("Unsupported input-space type: {}!".format(
             type(input_component).__name__))
Esempio n. 11
0
    def get_records_with_indices(self, num_records=1):
        if self.size <= 0:
            raise SurrealError("ReplayBuffer is empty.")

        # Calculate the indices to pull from the memory.
        # If num_records is <= our size, return w/o replacement (duplicates), otherwise, allow duplicates.
        indices = np.random.choice(
            np.arange(0, self.size),
            size=int(num_records),
            replace=True if num_records > self.size else False)
        indices = (self.index - 1 - indices) % self.capacity
        records = self.get_records_at_indices(indices)

        if KeepLastMemoryBatch is True:
            self.last_records_pulled = records

        return records, indices
    def get_records_with_indices(self, num_records=1):
        if self.size <= 0:
            raise SurrealError("PrioritizedReplayBuffer is empty.")

        # Calculate the indices to pull from the memory.
        indices = []
        prob_sum = self.merged_segment_tree.sum_segment_tree.get_sum(0, self.size)  # -1?
        samples = np.random.random(size=(num_records,)) * prob_sum  # TODO: check: available_records instead or num_records?
        for sample in samples:
            indices.append(self.merged_segment_tree.sum_segment_tree.index_of_prefixsum(prefix_sum=sample))

        indices = np.asarray(indices)
        records = self.get_records_at_indices(indices)

        if KeepLastMemoryBatch is True:
            self.last_records_pulled = records

        return records, indices
Esempio n. 13
0
    def translate_space(space, force_float32=False):
        """
        Translates openAI core into RLGraph Space classes.

        Args:
            space (gym.core.Space): The openAI Space to be translated.

        Returns:
            Space: The translated rlgraph Space.
        """
        if isinstance(space, gym.spaces.Discrete):
            return Int(space.n)
        elif isinstance(space, gym.spaces.MultiBinary):
            return Bool(shape=(space.n, ))
        elif isinstance(space, gym.spaces.MultiDiscrete):
            return Int(low=np.zeros((space.nvec.ndim, ), dtype=np.uint8),
                       high=space.nvec)
        elif isinstance(space, gym.spaces.Box):
            # Decide by dtype:
            box_dtype = str(space.low.dtype)
            if "int" in box_dtype:
                return Int(low=space.low, high=space.high, dtype=box_dtype)
            elif "float" in box_dtype:
                return Float(
                    low=space.low,
                    high=space.high,
                    dtype="float32" if force_float32 is True else box_dtype)
            elif "bool" in box_dtype:
                return Bool(shape=space.shape)
        elif isinstance(space, gym.spaces.Tuple):
            return Tuple(
                *[OpenAIGymEnv.translate_space(s) for s in space.spaces])
        elif isinstance(space, gym.spaces.Dict):
            return Dict({
                key: OpenAIGymEnv.translate_space(value,
                                                  force_float32=force_float32)
                for key, value in space.spaces.items()
            })

        raise SurrealError(
            "Unknown openAI gym Space class ({}) for state_space!".format(
                space))
Esempio n. 14
0
    def reduce(self, start, limit, reduce_op=operator.add):
        """
        Applies an operation to the specified segment.

        Args:
            start (int): Start index to apply reduction to.
            limit (int): End index to apply reduction to.
            reduce_op (Union(operator.add,min,max)): Reduce op to apply.

        Returns:
            Number: Result of reduce operation
        """
        if limit is None:
            limit = self.capacity
        if limit < 0:
            limit += self.capacity

        # Init result with neutral element of reduce op.
        # Note that all of these are commutative reduce ops.
        if reduce_op == operator.add:
            result = 0.0
        elif reduce_op == min:
            result = float("inf")
        elif reduce_op == max:
            result = float("-inf")
        else:
            raise SurrealError(
                "Unsupported reduce OP. Support ops are [add, min, max].")
        start += self.capacity
        limit += self.capacity

        while start < limit:
            if start & 1:
                result = reduce_op(result, self.values[start])
                start += 1
            if limit & 1:
                limit -= 1
                result = reduce_op(result, self.values[limit])
            start = start >> 1
            limit = limit >> 1
        return result
Esempio n. 15
0
def get_adapter_spec_from_distribution_spec(distribution_spec):
    """
    Args:
        distribution_spec (Union[dict,Distribution]): The spec of the Distribution object, for which to return an
            appropriate DistributionAdapter spec dict.

    Returns:
        dict: The spec-dict to make a DistributionAdapter.
    """
    # Create a dummy-distribution to get features from it.
    distribution = Distribution.make(distribution_spec)
    distribution_type_str = re.sub(r'[\W]|distribution$', "",
                                   type(distribution).__name__.lower())

    if distribution_type_str == "categorical":
        return dict(type="categorical-distribution-adapter")
    elif distribution_type_str == "gumbelsoftmax":
        return dict(type="gumbel-softmax-distribution-adapter")
    elif distribution_type_str == "bernoulli":
        return dict(type="bernoulli-distribution-adapter")
    elif distribution_type_str == "normal":
        return dict(type="normal-distribution-adapter")
    elif distribution_type_str == "multivariatenormal":
        return dict(type="multivariate-normal-distribution-adapter")
    elif distribution_type_str == "beta":
        return dict(type="beta-distribution-adapter")
    elif distribution_type_str == "squashednormal":
        return dict(type="squashed-normal-distribution-adapter")
    elif distribution_type_str == "mixture":
        return dict(type="mixture-distribution-adapter",
                    _args=[
                        get_adapter_spec_from_distribution_spec(
                            re.sub(r'[\W]|distribution$', "",
                                   type(s).__name__.lower()))
                        for s in distribution.sub_distributions
                    ])
    else:
        raise SurrealError("'{}' is an unknown Distribution type!".format(
            distribution_type_str))
Esempio n. 16
0
    def make(cls, spec=None, **kwargs):
        """
        Uses the given spec to create an object.
        If `spec` is a dict, an optional "type" key can be used as a "constructor hint" to specify a certain class
        of the object.
        If `spec` is not a dict, `spec`'s value is used directly as the "constructor hint".

        The rest of `spec` (if it's a dict) will be used as kwargs for the (to-be-determined) constructor.
        Additional keys in **kwargs will always have precedence (overwrite keys in `spec` (if a dict)).
        Also, if the spec-dict or **kwargs contains the special key "_args", it will be popped from the dict
        and used as *args list to be passed separately to the constructor.

        The following constructor hints are valid:
        - None: Use `cls` as constructor.
        - An already instantiated object: Will be returned as is; no constructor call.
        - A string or an object that is a key in `cls`'s `__lookup_classes__` dict: The value in `__lookup_classes__`
            for that key will be used as the constructor.
        - A python callable: Use that as constructor.
        - A string: Either a json filename or the name of a python module+class (e.g. "rlgraph.components.Component")
            to be Will be used to

        Args:
            spec (Optional[dict]): The specification dict.

        Keyword Args:
            kwargs (any): Optional possibility to pass the c'tor arguments in here and use spec as the type-only info.
                Then we can call this like: make([type]?, [**kwargs for ctor])
                If `spec` is already a dict, then `kwargs` will be merged with spec (overwriting keys in `spec`) after
                "type" has been popped out of `spec`.
                If a constructor of a Makeable needs an *args list of items, the special key `_args` can be passed
                inside `kwargs` with a list type value (e.g. kwargs={"_args": [arg1, arg2, arg3]}).

        Returns:
            The object generated from the spec.
        """
        # specifiable_type is already a created object of this class -> Take it as is.
        if isinstance(spec, cls):
            return spec

        # `specifiable_type`: Indicator for the Makeable's constructor.
        # `ctor_args`: *args arguments for the constructor.
        # `ctor_kwargs`: **kwargs arguments for the constructor.
        # Try to copy so caller can reuse safely.
        try:
            spec = deepcopy(spec)
        except Exception as e:
            pass
        if isinstance(spec, dict):
            specifiable_type = spec.pop("type", None)
            ctor_kwargs = spec
            # Give kwargs priority over things defined in spec dict. This way, one can pass a generic `spec` and then
            # override single c'tor parameters via the kwargs in the call to `make`.
            ctor_kwargs.update(kwargs)
        else:
            specifiable_type = spec
            if specifiable_type is None and "type" in kwargs:
                specifiable_type = kwargs.pop("type")
            ctor_kwargs = kwargs
        # Special `_args` field in kwargs for *args-utilizing constructors.
        ctor_args = force_list(ctor_kwargs.pop("_args", []))

        # Figure out the actual constructor (class) from `type_`.
        # None: Try __default__object (if no args/kwargs), only then constructor of cls (using args/kwargs).
        if specifiable_type is None:
            # We have a default constructor that was defined directly by cls (not by its children).
            if cls.__default_constructor__ is not None and ctor_args == [] and \
                    (not hasattr(cls.__bases__[0], "__default_constructor__") or
                     cls.__bases__[0].__default_constructor__ is None or
                     cls.__bases__[0].__default_constructor__ is not cls.__default_constructor__
                    ):
                constructor = cls.__default_constructor__
                # Default partial's keywords into ctor_kwargs.
                if isinstance(constructor, partial):
                    kwargs = default_dict(ctor_kwargs, constructor.keywords)
                    constructor = partial(constructor.func, **kwargs)
                    ctor_kwargs = {}  # erase to avoid duplicate kwarg error
            # Try our luck with this class itself.
            else:
                constructor = cls
        # Try the __lookup_classes__ of this class.
        else:
            constructor = cls.lookup_class(specifiable_type)

            # Found in cls.__lookup_classes__.
            if constructor is not None:
                pass
            # Python callable.
            elif callable(specifiable_type):
                constructor = specifiable_type
            # A string: Filename or a python module+class.
            elif isinstance(specifiable_type, str):
                if re.search(r'\.(yaml|yml|json)$', specifiable_type):
                    return cls.from_file(specifiable_type, *ctor_args,
                                         **ctor_kwargs)
                elif specifiable_type.find('.') != -1:
                    module_name, function_name = specifiable_type.rsplit(
                        ".", 1)
                    module = importlib.import_module(module_name)
                    constructor = getattr(module, function_name)
                else:
                    raise SurrealError(
                        "String specifier ({}) in `make` must be a filename, a module+class, or a key "
                        "into {}.__lookup_classes__!".format(
                            specifiable_type, cls.__name__))

        if not constructor:
            raise SurrealError(
                "Invalid type '{}'. Cannot `make`.".format(specifiable_type))

        # Create object with inferred constructor.
        specifiable_object = constructor(*ctor_args, **ctor_kwargs)
        # No sanity check for fake (lambda)-"constructors".
        if type(constructor).__name__ != "function":
            assert isinstance(
                specifiable_object, constructor.func if isinstance(
                    constructor, partial) else constructor)

        return specifiable_object
Esempio n. 17
0
    def _create_adapters_and_distributions(self, output_space, adapters,
                                           distributions):
        if output_space is None:
            adapter = DistributionAdapter.make(adapters)
            self.output_space = adapter.output_space
            # Assert single component output space.
            assert isinstance(self.output_space, PrimitiveSpace), \
                "ERROR: Output space must not be ContainerSpace if no `output_space` is given in Network constructor!"
        else:
            self.output_space = Space.make(output_space)
        self.flat_output_space = tf.nest.flatten(self.output_space)

        # Find out whether we have a generic adapter-spec (one for all output components).
        generic_adapter_spec = None
        if isinstance(adapters,
                      dict) and not any(key in adapters
                                        for key in self.output_space):
            generic_adapter_spec = adapters
        # adapters may be incomplete (add Nones to non-defined leafs).
        elif isinstance(adapters, dict):
            adapters = complement_struct(adapters,
                                         reference_struct=self.output_space)
        flat_output_adapter_spec = flatten_alongside(
            adapters, alongside=self.output_space)

        # Find out whether we have a generic distribution-spec (one for all output components).
        generic_distribution_spec = None
        if isinstance(self.output_space, PrimitiveSpace) or \
                (isinstance(distributions, dict) and not any(key in distributions for key in self.output_space)):
            generic_distribution_spec = distributions
            flat_distribution_spec = tf.nest.map_structure(
                lambda s: distributions, self.flat_output_space)
        else:
            # adapters may be incomplete (add Nones to non-defined leafs).
            if isinstance(distributions, dict):
                distributions = complement_struct(
                    distributions, reference_struct=self.output_space)
            # No distributions whatsoever.
            elif not distributions:
                distributions = complement_struct(
                    {}, reference_struct=self.output_space)
            # Use default distributions (depending on output-space(s)).
            elif distributions is True or distributions == "default":
                distributions = complement_struct(
                    {}, reference_struct=self.output_space, value=True)
            flat_distribution_spec = tf.nest.flatten(distributions)

        # Figure out our Distributions.
        for i, output_component in enumerate(self.flat_output_space):
            # Generic spec -> Use it.
            if generic_adapter_spec:
                da_spec = copy.deepcopy(generic_adapter_spec)
                da_spec["output_space"] = output_component
            # Spec dict -> find setting in possibly incomplete spec.
            elif isinstance(adapters, dict):
                # If not specified in dict -> auto-generate AA-spec.
                da_spec = flat_output_adapter_spec[i]
                da_spec["output_space"] = output_component
            # Simple type spec.
            elif not isinstance(adapters, DistributionAdapter):
                da_spec = dict(output_space=output_component)
            # Direct object.
            else:
                da_spec = adapters

            # We have to get the type of the adapter from a distribution.
            if isinstance(da_spec, dict) and "type" not in da_spec:
                # Single distribution settings for all output components.
                if generic_distribution_spec is not None:
                    settings = {} if generic_distribution_spec in [
                        "default", True, False
                    ] else (generic_distribution_spec or {})
                else:
                    settings = flat_distribution_spec[i] if isinstance(
                        flat_distribution_spec[i], dict) else {}
                # `distributions` could be simply a direct spec dict.
                if (isinstance(settings, dict)
                        and "type" in settings) or isinstance(
                            settings, Distribution):
                    dist_spec = settings
                else:
                    dist_spec = get_default_distribution_from_space(
                        output_component, **settings)

                # No distribution.
                if not generic_distribution_spec and not flat_distribution_spec[
                        i]:
                    self.distributions.append(None)
                # Some distribution.
                else:
                    self.distributions.append(Distribution.make(dist_spec))
                    if self.distributions[-1] is None:
                        raise SurrealError(
                            "`output_component` is of type {} and not allowed in {} Component!"
                            .format(
                                type(output_space).__name__,
                                type(self).__name__))
                # Special case: No distribution AND float -> plain output adapter.
                if not generic_distribution_spec and \
                        (not flat_distribution_spec[i] and isinstance(da_spec["output_space"], Float)):
                    da_spec["type"] = "plain-output-adapter"
                # All other cases: Get adapter type from distribution spec
                # (even if we don't use a distribution in the end).
                else:
                    default_dict(
                        da_spec,
                        get_adapter_spec_from_distribution_spec(dist_spec))

                self.adapters.append(DistributionAdapter.make(da_spec))

            # da_spec is completely defined  -> Use it to get distribution.
            else:
                self.adapters.append(DistributionAdapter.make(da_spec))
                if distributions[i]:
                    dist_spec = get_distribution_spec_from_adapter(
                        self.adapters[-1])
                    self.distributions.append(Distribution.make(dist_spec))
Esempio n. 18
0
def get_space_from_data(data, num_categories=None, main_axes=None):
    """
    Tries to re-create a Space object given some DataOp (e.g. a tf op).
    This is useful for shape inference on returned ops after having run through a graph_fn.

    Args:
        data (any): The data to create a corresponding Space for.

        num_categories (Optional[int]): An optional indicator, what the `num_categories` property for
            an Int should be.

    Returns:
        Space: The inferred Space object.
    """
    # Dict.
    if isinstance(data, dict):
        spec = {}
        for key, value in data.items():

            # OBSOLETE THIS! Special case for Ints:
            # If another key exists, with the name: `_num_[key]` -> take num_categories from that key's value.
            #if key[:5] == "_num_":
            #    continue
            #num_categories = data.get("_num_{}".format(key))

            num_categories = num_categories.get(key, None) if isinstance(
                num_categories, dict) else num_categories
            spec[key] = get_space_from_data(value,
                                            num_categories=num_categories,
                                            main_axes=main_axes)
            # Return
            if spec[key] == 0:
                return 0
        return Dict(spec, main_axes=main_axes)
    # Tuple.
    elif isinstance(data, tuple):
        spec = []
        for i in data:
            space = get_space_from_data(i, main_axes=main_axes)
            if space == 0:
                return 0
            spec.append(space)
        return Tuple(spec, main_axes=main_axes)
    # Primitive Space -> Infer from data dtype and shape.
    else:
        # `data` itself is a single value, simple python type.
        if isinstance(data, int):
            int_high = {
                "high": num_categories
            } if num_categories is not None else {}
            return PrimitiveSpace.make(spec=type(data), shape=(), **int_high)
        elif isinstance(data, (bool, float)):
            return PrimitiveSpace.make(spec=type(data), shape=())
        elif isinstance(data, str):
            raise SurrealError(
                "Cannot derive Space from str data ({})!".format(data))
        # A single numpy array.
        elif isinstance(data, (np.ndarray, tf.Tensor)):
            dtype = convert_dtype(data.dtype, "np")
            int_high = {"high": num_categories} if num_categories is not None and \
                dtype in [np.uint8, np.int16, np.int32, np.int64] else {}
            # Must subtract main_axes from beginning of data.shape.
            shape = tuple(data.shape[len(main_axes or []):])
            return PrimitiveSpace.make(spec=dtype,
                                       shape=shape,
                                       main_axes=main_axes,
                                       **int_high)
        # Try inferring the Space from a python list.
        elif isinstance(data, list):
            return try_space_inference_from_list(data)
        # No Space: e.g. the tf.no_op, a distribution (anything that's not a tensor).
        # PyTorch Tensors do not have get_shape so must check backend.
        elif hasattr(data, "dtype") is False or not hasattr(data, "get_shape"):
            return 0

    raise SurrealError(
        "ERROR: Cannot derive Space from data '{}' (unknown type?)!".format(
            data))
Esempio n. 19
0
    def call(self,
             inputs,
             values=None,
             *,
             deterministic=None,
             likelihood=False,
             log_likelihood=False):
        """
        Computes Q(s) -> a by passing the inputs through our model
        """
        deterministic = deterministic if deterministic is not None else self.deterministic

        # If complex input -> pass through pre_concat_nns, then concat, then move on through core nn.
        if len(self.pre_concat_networks) > 0:
            inputs = tf.nest.flatten(inputs)
            inputs = tf.concat([
                self.pre_concat_networks[i](in_)
                if self.pre_concat_networks[i] is not None else in_
                for i, in_ in enumerate(inputs)
            ],
                               axis=-1)

        # Return struct according to output Space.
        nn_out = self.network(inputs)

        # Simple output -> Push through each of our output-adapters.
        if not isinstance(nn_out, (tuple, dict)):
            # No values given -> Sample from distribution or return plain adapter-output (if no distribution given).
            if values is None:
                adapter_outputs = [a(nn_out) for a in self.adapters]
                tfp_distributions = [
                    distribution.parameterize_distribution(adapter_outputs[i])
                    if distribution is not None else None
                    for i, distribution in enumerate(self.distributions)
                ]
                sample = [
                    distribution._sample(tfp_distributions[i],
                                         deterministic=deterministic)
                    if distribution is not None else adapter_outputs[i]
                    for i, distribution in enumerate(self.distributions)
                ]
                packed_sample = tf.nest.pack_sequence_as(
                    self.output_space.structure, sample)
                # Return (combined?) likelihood values for each sample along with sample.
                if likelihood is True or log_likelihood is True:
                    # Calculate probs/likelihoods (all in log-space for increased accuracy (for very small probs)).
                    log_llhs_components = [
                        # Reduce all axes that are not main_axes, so that we get only one
                        # (log)?-prob/likelihood per (composite)-action.
                        tf.reduce_sum(
                            distribution._log_prob(tfp_distributions[i],
                                                   sample[i]),
                            axis=self.flat_output_space[i].reduction_axes)
                        if distribution is not None else 0.0
                        for i, distribution in enumerate(self.distributions)
                    ]
                    # Combine all probs/likelihoods by multiplying up.
                    log_llh_sum = 0.0
                    for log_llh_component in log_llhs_components:
                        log_llh_sum += log_llh_component
                    return packed_sample, log_llh_sum if log_likelihood else tf.exp(
                        log_llh_sum)

                else:
                    return packed_sample
            # Values given -> Return probabilities/likelihoods or plain outputs for given values (if no distribution).
            else:
                values = complement_struct(values, self.output_space.structure,
                                           "_undef_")
                flat_values = tf.nest.flatten(values)
                combined_likelihood_return = None
                for i, distribution in enumerate(self.distributions):
                    if distribution is not None and flat_values[
                            i] is not "_undef_" and flat_values[i] is not None:
                        log_llhs = distribution.log_prob(
                            self.adapters[i](nn_out), flat_values[i])
                        log_llh_sum = tf.math.reduce_sum(
                            log_llhs,
                            axis=self.flat_output_space[i].reduction_axes)
                        combined_likelihood_return = (
                            combined_likelihood_return
                            if combined_likelihood_return is not None else
                            0.0) + log_llh_sum
                if combined_likelihood_return is not None and not log_likelihood:
                    combined_likelihood_return = tf.math.exp(
                        combined_likelihood_return)

                outputs = []
                for i, distribution in enumerate(self.distributions):
                    # No distribution.
                    if distribution is None and flat_values[i] is not "_undef_":
                        # Some value for this component was given.
                        if flat_values[i] is not None and flat_values[
                                i] is not False:
                            # Make sure it's an Int space.
                            if not isinstance(self.flat_output_space[i], Int):
                                raise SurrealError(
                                    "Component {} of output space does not have a distribution and is not an Int. "
                                    "Hence, values for this component (to get outputs of likelihoodsfor) are not "
                                    "allowed in `call`.")
                            # Return outputs for the discrete values by doing the sum-over-Hadamard-trick.
                            outputs.append(
                                tf.math.reduce_sum(
                                    self.adapters[i](nn_out) *
                                    tf.one_hot(flat_values[i],
                                               depth=self.flat_output_space[i].
                                               num_categories),
                                    axis=-1))
                        # No value given, return plain adapter output.
                        else:
                            outputs.append(self.adapters[i](nn_out))
                    # Distribution: Already handled by likelihood block above.
                    else:
                        outputs.append(None)

                # Only likelihood expected (there are no non-distribution components in our output space).
                if all(o is None for o in outputs):
                    return combined_likelihood_return

                packed_out = tf.nest.pack_sequence_as(
                    self.output_space.structure, outputs)
                if combined_likelihood_return is not None:
                    return packed_out, combined_likelihood_return
                else:
                    return packed_out

        # NN already outputs containers.
        else:
            # Must match self.output_space.
            tf.nest.assert_same_structure(self.adapters, nn_out)
            adapter_outputs = [
                adapter(out) for out, adapter in zip(
                    tf.nest.flatten(nn_out), tf.nest.flatten(self.adapters))
            ]
            return tf.nest.pack_sequence_as(adapter_outputs, self.adapters)
Esempio n. 20
0
def get_default_distribution_from_space(
        space,
        *,
        num_mixture_experts=0,
        bounded_distribution_type="beta",
        discrete_distribution_type="categorical",
        gumbel_softmax_temperature=1.0):
    """
    Args:
        space (Space): The primitive Space for which to derive a default distribution spec.

        num_mixture_experts (int): If > 0, use a mixture distribution over the determined "base"-distribution using n
            experts. TODO: So far, this only works for continuous distributions.

        bounded_distribution_type (str): The lookup class string for a bounded Float distribution.
            Default: "beta".

        discrete_distribution_type(str): The class of distributions to use for discrete action core. For options
            check the components.distributions package. Default: categorical. Agents requiring reparameterization
            may require a GumbelSoftmax distribution instead.

        gumbel_softmax_temperature (float): Temperature parameter for the Gumbel-Softmax distribution used
            for discrete actions.

    Returns:
        Dict: A Spec dict, from which a valid default distribution object can be created.
    """
    # Int: Categorical.
    if isinstance(space, Int):
        assert discrete_distribution_type in ["gumbel-softmax", "categorical"]
        if discrete_distribution_type == "gumbel-softmax":
            return dict(type="gumbel-softmax",
                        temperature=gumbel_softmax_temperature)
        else:
            return dict(type=discrete_distribution_type)

    # Bool: Bernoulli.
    elif isinstance(space, Bool):
        return dict(type="bernoulli")

    # Continuous action space: Normal/Beta/etc. distribution.
    elif isinstance(space, Float):
        # Unbounded -> Normal distribution.
        if not is_bounded_space(space):
            single = dict(type="normal")
        # Bounded -> according to the bounded_distribution parameter.
        else:
            assert bounded_distribution_type in ["beta", "squashed-normal"]
            single = dict(type=bounded_distribution_type,
                          low=space.low,
                          high=space.high)

        # Use a mixture distribution?
        if num_mixture_experts > 0:
            return dict(type="mixture",
                        _args=single,
                        num_experts=num_mixture_experts)
        else:
            return single

    # Container Space.
    elif isinstance(space, ContainerSpace):
        return dict(type="joint-cumulative",
                    distributions=tf.nest.pack_sequence_as(
                        space.structure,
                        tf.nest.map_structure(
                            lambda s: get_default_distribution_from_space(s),
                            tf.nest.flatten(space))))
    else:
        raise SurrealError(
            "No distribution defined for space {}!".format(space))