def get_number_and_flatten_records(self, records, single): """ Returns the number of records (even if a single, non-batched record is provided) and the flattened records. Args: records (any): The records to insert. single (bool): Optional flag to indicate that we are being passed a single record. This will avoid a `Space.contains()` check on our record_space, but is otherwise ok to leave as False, even if the incoming record is single/non-batched. Returns: Tuple: - int: The number of records. - list: The flattened records. """ # Extract next-values from records before flattening. flat_next_records = None if self.next_record_setup: next_records = {} for field, (next_field, bins) in self.next_record_setup.items(): next_value = records[next_field] del records[next_field] next_records[field] = next_value flat_next_records = tf.nest.flatten(next_records) flat_records = tf.nest.flatten(records) # Single (non-batched) record. if single is True or self.flat_record_space[0].get_shape(include_main_Axes=True) == \ (self.capacity,) + flat_records[0].shape: num_records = 0 else: num_records = get_batch_size(flat_records[0]) # Non batched, single entry -> Add batch rank. if num_records == 0: flat_records = [np.array([r]) for r in flat_records] num_records = 1 # Check for correct batch size. if self.next_record_setup: if self.batch_size is None: self.batch_size = num_records assert self.capacity % self.batch_size == 0, \ "ERROR: `batch_size` set to {}. But `capacity` must be a multiple of memory's `batch_size`!".\ format(self.batch_size) elif num_records != self.batch_size: raise SurrealError( "Incoming batch has wrong size ({}). Must always be {}!". format(num_records, self.batch_size)) # Make sure `records` roughly matches our record_space. assert len(flat_records) == len(self.flat_record_space), \ "ERROR: Structure of `records` does not seem to match `self.record_space`!" # We have an `next_record_setup`. if self.next_record_setup: # Add the next-values to our "reserve" area. self.next_records.append(flat_next_records) return num_records, flat_records
def keras_from_spec(spec): # Layers are given as list -> Build a simple Keras sequential model using Keras configs. if isinstance(spec, (list, tuple)): sequential = tf.keras.models.Sequential() for layer in spec: layer_copy = copy.deepcopy(layer) # protect oroginal config name = layer_copy.pop("name").lower() #assert name in ["dense", "conv2d", "flatten", "lstm"] class_ = None for match in [Dense, Conv2D, Flatten, LSTM]: if match.__name__.lower() == name: class_ = match break if class_: sequential.add(class_.from_config(layer_copy)) else: if name == "onehot": sequential.add( tf.keras.layers.Lambda( lambda in_: tf.one_hot(in_, **layer_copy))) else: raise SurrealError( "Unknown layer/tf-op '{}'!".format(name)) return sequential return spec
def convert_dtype(dtype, to="tf"): """ Translates any type (tf, numpy, python, etc..) into the respective tensorflow/numpy data type. Args: dtype (any): String describing a numerical type (e.g. 'float'), numpy data type, tf dtype, or python numerical type. to (str): Either one of 'tf' (tensorflow), 'np' (numpy), 'str' (string). Default="tf". Returns: TensorFlow, Numpy, string, representing a data type (depending on `to` parameter). """ dtype = str(dtype) if "bool" in dtype: return np.bool_ if to == "np" else tf.bool elif "float64" in dtype: return np.float64 if to == "np" else tf.float64 elif "float" in dtype: return np.float32 if to == "np" else tf.float32 elif "int64" in dtype: return np.int64 if to == "np" else tf.int64 elif "uint8" in dtype: return np.uint8 if to == "np" else tf.uint8 elif "int16" in dtype: return np.int16 if to == "np" else tf.int16 elif "int" in dtype: return np.int32 if to == "np" else tf.int32 elif "str" in dtype: return np.unicode_ if to == "np" else tf.string raise SurrealError( "Error: Type conversion to '{}' for type '{}' not supported.".format( to, str(dtype)))
def __init__(self, spec=None, **kwargs): space_dict = {} main_axes = kwargs.pop("main_axes", None) value = kwargs.pop("value", None) self.do_not_overwrite_items_extra_ranks = kwargs.pop( "do_not_overwrite_items_extra_ranks", False) # Allow for any spec or already constructed Space to be passed in as values in the python-dict. # Spec may be part of kwargs. if spec is None: spec = kwargs is_generator = type(spec).__name__ == "generator" # `spec` could be a dict or a generator (when using tf.nest to map over a Dict). for key, val in (spec.items() if not is_generator else spec): # Keys must be strings. if not isinstance(key, str): raise SurrealError("No non-str keys allowed in a Dict-Space!") # Value is already a Space: Copy it (to not affect original Space) and maybe add/remove batch/time-ranks. if isinstance(val, Space): val.value = None if self.do_not_overwrite_items_extra_ranks is True: space_dict[key] = val else: space_dict[key] = val.strip_axes().with_axes( main_axes=main_axes) # Value is a list/tuple -> treat as Tuple space. elif isinstance(val, (list, tuple)): if self.do_not_overwrite_items_extra_ranks is True: space_dict[key] = Tuple( *val, do_not_overwrite_items_extra_ranks=True) else: space_dict[key] = Tuple(*val, main_axes=main_axes) # Value is a spec (or a spec-dict with "type" field) -> produce via `from_spec`. elif (isinstance(val, dict) and "type" in val) or not isinstance(val, dict): if self.do_not_overwrite_items_extra_ranks is True: space_dict[key] = Space.make( val, do_not_overwrite_items_extra_ranks=True) else: space_dict[key] = Space.make(val, main_axes=main_axes) # Value is a simple dict -> recursively construct another Dict Space as a sub-space of this one. else: if self.do_not_overwrite_items_extra_ranks is True: space_dict[key] = Dict( val, do_not_overwrite_items_extra_ranks=True) else: space_dict[key] = Dict(val, main_axes=main_axes) # Set the parent of the added Space to `self`. space_dict[key].parent = self ContainerSpace.__init__( self, shape=tuple([self[key].shape for key in sorted(self.keys())]), main_axes=main_axes, value=value) dict.__init__(self, space_dict)
def get_distribution_spec_from_adapter(distribution_adapter): distribution_adapter_type_str = type(distribution_adapter).__name__ if distribution_adapter_type_str == "CategoricalDistributionAdapter": return dict(type="categorical") elif distribution_adapter_type_str == "GumbelSoftmaxDistributionAdapter": return dict(type="gumbel-softmax") elif distribution_adapter_type_str == "BernoulliDistributionAdapter": return dict(type="bernoulli") # TODO: What about multi-variate normal with non-trivial co-var matrices? elif distribution_adapter_type_str == "NormalDistributionAdapter": return dict(type="normal") elif distribution_adapter_type_str == "BetaDistributionAdapter": return dict(type="beta") elif distribution_adapter_type_str == "SquashedNormalDistributionAdapter": return dict(type="squashed-normal") elif distribution_adapter_type_str == "MixtureDistributionAdapter": # TODO: MixtureDistribution is generic (any sub-distributions, but its AA is not (only supports mixture-Normal)) return dict(type="mixture", _args=[ "multivariate-normal" for _ in range(distribution_adapter.num_mixtures) ]) elif distribution_adapter_type_str == "PlainOutputAdapter": return None else: raise SurrealError( "'{}' is an unknown DistributionAdapter type!".format( distribution_adapter_type_str))
def from_file(cls, filename, *args, **kwargs): """ Create object from spec saved in filename. Expects json or yaml format. Args: filename: file containing the spec (json or yaml) Keyword Args: Used as additional parameters for call to constructor. Returns: object """ path = os.path.join(os.getcwd(), filename) if not os.path.isfile(path): raise SurrealError('No such file: {}'.format(filename)) with open(path, 'rt') as fp: if path.endswith('.yaml') or path.endswith('.yml'): spec = yaml.load(fp) else: spec = json.load(fp) # Add possible *args. spec["_args"] = args return cls.make(spec=spec, **kwargs)
def _get_np_shape(self, size=None): """ Helper to determine, which shape one should pass to the numpy random funcs for sampling from a Space. Depends on `size`, the `shape` of this Space and the `self.has_batch_rank/has_time_rank` settings. Args: size: See `self.sample()`. Returns: Tuple[int]: Shape to use for numpy random sampling. """ # Default dims according to self.main_axes (use one for undefined dimensions). if size is None: return tuple([i if i is not None else 1 for i in self.get_shape(include_main_axes=True)]) # With one axis. if isinstance(size, int): assert len(self.main_axes) == 1,\ "ERROR: `size` must be a tuple of len {} (number of main-axes)!".format(len(self.main_axes)) return (size,) + self.shape # With one or more axes (given as tuple). elif isinstance(size, (tuple, list)): assert len(size) == len(self.main_axes),\ "ERROR: `size` must be of len {} (number of main-axes)!".format(len(self.main_axes)) return tuple([i if i is not None else 1 for i in self.get_shape(include_main_axes=True)]) raise SurrealError("`size` must be int or tuple/list!")
def inject_next_values_if_necessary(self, indices, records): """ If required (`self.next_record_setup` is defined), injects into `records` the necessary next-values. Either pulls next-values from some records (n-steps) ahead or from `self.next_records` depending on `self.index` and the `indices` of the records. Args: indices (List[int]): The indices of the records to pull. records (List[any]): The actual records (already pulled) that now need to be extended by the next-values. """ if self.next_record_setup: # The critical range is the index range for which we cannot simply go ahead n-steps to get the # next-values as the records n-steps ahead are unrelated (from a much earlier insertion) to the records at # `indices`. Therefore, we must use the `self.next_records` area to get the correct next-values. critical_range = [ i % self.capacity for i in range(self.index - self.batch_size * self.n_step, self.index) ] # Loop through all next-record setups. for field, (next_field, memory_bins) in self.next_record_setup.items(): next_values = [] for next_var, var in enumerate(memory_bins): a = [] for i in indices: # i is within last batch -> Take next-values from reserve area. if i in critical_range: pos_in_critical_range = critical_range.index(i) # Not enough records in memory yet to produce an n-step sample. if len( self.next_records ) <= pos_in_critical_range // self.batch_size: raise SurrealError( "Memory with n-step={} not ready yet to pull records from. Insert enough samples " "first to reach n-step capability. Current size={}." .format(self.n_step, self.size)) a.append(self.next_records[pos_in_critical_range // self.batch_size] [next_var][pos_in_critical_range % self.batch_size]) # i is not within last batch -> Take next-values from next records (n-steps ahead) in memory. else: a.append(self.memory[var] [(i + self.batch_size * self.n_step) % self.capacity]) next_values.append(np.array(a)) records[next_field] = tf.nest.pack_sequence_as( self.record_space[field].structure, next_values) return records
def is_bounded_space(box_space): if not isinstance(box_space, Float): return False # Unbounded. if box_space.low == float("-inf") and box_space.high == float("inf"): return False # Bounded. elif box_space.low != float("-inf") and box_space.high != float("inf"): return True # TODO: Semi-bounded -> Exponential distribution. else: raise SurrealError( "Semi-bounded core for distribution-generation are not supported yet! You passed in low={} high={}." .format(box_space.low, box_space.high))
def _auto_input_lambda(self, input_component): new_shape = tuple([-1 for _ in range(len(input_component.main_axes))]) + \ (int(tf.reduce_prod(input_component.get_shape(with_category_rank=True))),) if isinstance(input_component, Int): return lambda i_: tf.reshape( tf.one_hot(i_, input_component.num_categories) if i_.dtype in [tf.int32, tf.int64] else i_, new_shape) elif isinstance(input_component, Float): return lambda i_: tf.reshape(i_, new_shape) elif isinstance(input_component, Bool): return lambda i_: tf.reshape(tf.cast(i_, tf.float32), new_shape) else: raise SurrealError("Unsupported input-space type: {}!".format( type(input_component).__name__))
def get_records_with_indices(self, num_records=1): if self.size <= 0: raise SurrealError("ReplayBuffer is empty.") # Calculate the indices to pull from the memory. # If num_records is <= our size, return w/o replacement (duplicates), otherwise, allow duplicates. indices = np.random.choice( np.arange(0, self.size), size=int(num_records), replace=True if num_records > self.size else False) indices = (self.index - 1 - indices) % self.capacity records = self.get_records_at_indices(indices) if KeepLastMemoryBatch is True: self.last_records_pulled = records return records, indices
def get_records_with_indices(self, num_records=1): if self.size <= 0: raise SurrealError("PrioritizedReplayBuffer is empty.") # Calculate the indices to pull from the memory. indices = [] prob_sum = self.merged_segment_tree.sum_segment_tree.get_sum(0, self.size) # -1? samples = np.random.random(size=(num_records,)) * prob_sum # TODO: check: available_records instead or num_records? for sample in samples: indices.append(self.merged_segment_tree.sum_segment_tree.index_of_prefixsum(prefix_sum=sample)) indices = np.asarray(indices) records = self.get_records_at_indices(indices) if KeepLastMemoryBatch is True: self.last_records_pulled = records return records, indices
def translate_space(space, force_float32=False): """ Translates openAI core into RLGraph Space classes. Args: space (gym.core.Space): The openAI Space to be translated. Returns: Space: The translated rlgraph Space. """ if isinstance(space, gym.spaces.Discrete): return Int(space.n) elif isinstance(space, gym.spaces.MultiBinary): return Bool(shape=(space.n, )) elif isinstance(space, gym.spaces.MultiDiscrete): return Int(low=np.zeros((space.nvec.ndim, ), dtype=np.uint8), high=space.nvec) elif isinstance(space, gym.spaces.Box): # Decide by dtype: box_dtype = str(space.low.dtype) if "int" in box_dtype: return Int(low=space.low, high=space.high, dtype=box_dtype) elif "float" in box_dtype: return Float( low=space.low, high=space.high, dtype="float32" if force_float32 is True else box_dtype) elif "bool" in box_dtype: return Bool(shape=space.shape) elif isinstance(space, gym.spaces.Tuple): return Tuple( *[OpenAIGymEnv.translate_space(s) for s in space.spaces]) elif isinstance(space, gym.spaces.Dict): return Dict({ key: OpenAIGymEnv.translate_space(value, force_float32=force_float32) for key, value in space.spaces.items() }) raise SurrealError( "Unknown openAI gym Space class ({}) for state_space!".format( space))
def reduce(self, start, limit, reduce_op=operator.add): """ Applies an operation to the specified segment. Args: start (int): Start index to apply reduction to. limit (int): End index to apply reduction to. reduce_op (Union(operator.add,min,max)): Reduce op to apply. Returns: Number: Result of reduce operation """ if limit is None: limit = self.capacity if limit < 0: limit += self.capacity # Init result with neutral element of reduce op. # Note that all of these are commutative reduce ops. if reduce_op == operator.add: result = 0.0 elif reduce_op == min: result = float("inf") elif reduce_op == max: result = float("-inf") else: raise SurrealError( "Unsupported reduce OP. Support ops are [add, min, max].") start += self.capacity limit += self.capacity while start < limit: if start & 1: result = reduce_op(result, self.values[start]) start += 1 if limit & 1: limit -= 1 result = reduce_op(result, self.values[limit]) start = start >> 1 limit = limit >> 1 return result
def get_adapter_spec_from_distribution_spec(distribution_spec): """ Args: distribution_spec (Union[dict,Distribution]): The spec of the Distribution object, for which to return an appropriate DistributionAdapter spec dict. Returns: dict: The spec-dict to make a DistributionAdapter. """ # Create a dummy-distribution to get features from it. distribution = Distribution.make(distribution_spec) distribution_type_str = re.sub(r'[\W]|distribution$', "", type(distribution).__name__.lower()) if distribution_type_str == "categorical": return dict(type="categorical-distribution-adapter") elif distribution_type_str == "gumbelsoftmax": return dict(type="gumbel-softmax-distribution-adapter") elif distribution_type_str == "bernoulli": return dict(type="bernoulli-distribution-adapter") elif distribution_type_str == "normal": return dict(type="normal-distribution-adapter") elif distribution_type_str == "multivariatenormal": return dict(type="multivariate-normal-distribution-adapter") elif distribution_type_str == "beta": return dict(type="beta-distribution-adapter") elif distribution_type_str == "squashednormal": return dict(type="squashed-normal-distribution-adapter") elif distribution_type_str == "mixture": return dict(type="mixture-distribution-adapter", _args=[ get_adapter_spec_from_distribution_spec( re.sub(r'[\W]|distribution$', "", type(s).__name__.lower())) for s in distribution.sub_distributions ]) else: raise SurrealError("'{}' is an unknown Distribution type!".format( distribution_type_str))
def make(cls, spec=None, **kwargs): """ Uses the given spec to create an object. If `spec` is a dict, an optional "type" key can be used as a "constructor hint" to specify a certain class of the object. If `spec` is not a dict, `spec`'s value is used directly as the "constructor hint". The rest of `spec` (if it's a dict) will be used as kwargs for the (to-be-determined) constructor. Additional keys in **kwargs will always have precedence (overwrite keys in `spec` (if a dict)). Also, if the spec-dict or **kwargs contains the special key "_args", it will be popped from the dict and used as *args list to be passed separately to the constructor. The following constructor hints are valid: - None: Use `cls` as constructor. - An already instantiated object: Will be returned as is; no constructor call. - A string or an object that is a key in `cls`'s `__lookup_classes__` dict: The value in `__lookup_classes__` for that key will be used as the constructor. - A python callable: Use that as constructor. - A string: Either a json filename or the name of a python module+class (e.g. "rlgraph.components.Component") to be Will be used to Args: spec (Optional[dict]): The specification dict. Keyword Args: kwargs (any): Optional possibility to pass the c'tor arguments in here and use spec as the type-only info. Then we can call this like: make([type]?, [**kwargs for ctor]) If `spec` is already a dict, then `kwargs` will be merged with spec (overwriting keys in `spec`) after "type" has been popped out of `spec`. If a constructor of a Makeable needs an *args list of items, the special key `_args` can be passed inside `kwargs` with a list type value (e.g. kwargs={"_args": [arg1, arg2, arg3]}). Returns: The object generated from the spec. """ # specifiable_type is already a created object of this class -> Take it as is. if isinstance(spec, cls): return spec # `specifiable_type`: Indicator for the Makeable's constructor. # `ctor_args`: *args arguments for the constructor. # `ctor_kwargs`: **kwargs arguments for the constructor. # Try to copy so caller can reuse safely. try: spec = deepcopy(spec) except Exception as e: pass if isinstance(spec, dict): specifiable_type = spec.pop("type", None) ctor_kwargs = spec # Give kwargs priority over things defined in spec dict. This way, one can pass a generic `spec` and then # override single c'tor parameters via the kwargs in the call to `make`. ctor_kwargs.update(kwargs) else: specifiable_type = spec if specifiable_type is None and "type" in kwargs: specifiable_type = kwargs.pop("type") ctor_kwargs = kwargs # Special `_args` field in kwargs for *args-utilizing constructors. ctor_args = force_list(ctor_kwargs.pop("_args", [])) # Figure out the actual constructor (class) from `type_`. # None: Try __default__object (if no args/kwargs), only then constructor of cls (using args/kwargs). if specifiable_type is None: # We have a default constructor that was defined directly by cls (not by its children). if cls.__default_constructor__ is not None and ctor_args == [] and \ (not hasattr(cls.__bases__[0], "__default_constructor__") or cls.__bases__[0].__default_constructor__ is None or cls.__bases__[0].__default_constructor__ is not cls.__default_constructor__ ): constructor = cls.__default_constructor__ # Default partial's keywords into ctor_kwargs. if isinstance(constructor, partial): kwargs = default_dict(ctor_kwargs, constructor.keywords) constructor = partial(constructor.func, **kwargs) ctor_kwargs = {} # erase to avoid duplicate kwarg error # Try our luck with this class itself. else: constructor = cls # Try the __lookup_classes__ of this class. else: constructor = cls.lookup_class(specifiable_type) # Found in cls.__lookup_classes__. if constructor is not None: pass # Python callable. elif callable(specifiable_type): constructor = specifiable_type # A string: Filename or a python module+class. elif isinstance(specifiable_type, str): if re.search(r'\.(yaml|yml|json)$', specifiable_type): return cls.from_file(specifiable_type, *ctor_args, **ctor_kwargs) elif specifiable_type.find('.') != -1: module_name, function_name = specifiable_type.rsplit( ".", 1) module = importlib.import_module(module_name) constructor = getattr(module, function_name) else: raise SurrealError( "String specifier ({}) in `make` must be a filename, a module+class, or a key " "into {}.__lookup_classes__!".format( specifiable_type, cls.__name__)) if not constructor: raise SurrealError( "Invalid type '{}'. Cannot `make`.".format(specifiable_type)) # Create object with inferred constructor. specifiable_object = constructor(*ctor_args, **ctor_kwargs) # No sanity check for fake (lambda)-"constructors". if type(constructor).__name__ != "function": assert isinstance( specifiable_object, constructor.func if isinstance( constructor, partial) else constructor) return specifiable_object
def _create_adapters_and_distributions(self, output_space, adapters, distributions): if output_space is None: adapter = DistributionAdapter.make(adapters) self.output_space = adapter.output_space # Assert single component output space. assert isinstance(self.output_space, PrimitiveSpace), \ "ERROR: Output space must not be ContainerSpace if no `output_space` is given in Network constructor!" else: self.output_space = Space.make(output_space) self.flat_output_space = tf.nest.flatten(self.output_space) # Find out whether we have a generic adapter-spec (one for all output components). generic_adapter_spec = None if isinstance(adapters, dict) and not any(key in adapters for key in self.output_space): generic_adapter_spec = adapters # adapters may be incomplete (add Nones to non-defined leafs). elif isinstance(adapters, dict): adapters = complement_struct(adapters, reference_struct=self.output_space) flat_output_adapter_spec = flatten_alongside( adapters, alongside=self.output_space) # Find out whether we have a generic distribution-spec (one for all output components). generic_distribution_spec = None if isinstance(self.output_space, PrimitiveSpace) or \ (isinstance(distributions, dict) and not any(key in distributions for key in self.output_space)): generic_distribution_spec = distributions flat_distribution_spec = tf.nest.map_structure( lambda s: distributions, self.flat_output_space) else: # adapters may be incomplete (add Nones to non-defined leafs). if isinstance(distributions, dict): distributions = complement_struct( distributions, reference_struct=self.output_space) # No distributions whatsoever. elif not distributions: distributions = complement_struct( {}, reference_struct=self.output_space) # Use default distributions (depending on output-space(s)). elif distributions is True or distributions == "default": distributions = complement_struct( {}, reference_struct=self.output_space, value=True) flat_distribution_spec = tf.nest.flatten(distributions) # Figure out our Distributions. for i, output_component in enumerate(self.flat_output_space): # Generic spec -> Use it. if generic_adapter_spec: da_spec = copy.deepcopy(generic_adapter_spec) da_spec["output_space"] = output_component # Spec dict -> find setting in possibly incomplete spec. elif isinstance(adapters, dict): # If not specified in dict -> auto-generate AA-spec. da_spec = flat_output_adapter_spec[i] da_spec["output_space"] = output_component # Simple type spec. elif not isinstance(adapters, DistributionAdapter): da_spec = dict(output_space=output_component) # Direct object. else: da_spec = adapters # We have to get the type of the adapter from a distribution. if isinstance(da_spec, dict) and "type" not in da_spec: # Single distribution settings for all output components. if generic_distribution_spec is not None: settings = {} if generic_distribution_spec in [ "default", True, False ] else (generic_distribution_spec or {}) else: settings = flat_distribution_spec[i] if isinstance( flat_distribution_spec[i], dict) else {} # `distributions` could be simply a direct spec dict. if (isinstance(settings, dict) and "type" in settings) or isinstance( settings, Distribution): dist_spec = settings else: dist_spec = get_default_distribution_from_space( output_component, **settings) # No distribution. if not generic_distribution_spec and not flat_distribution_spec[ i]: self.distributions.append(None) # Some distribution. else: self.distributions.append(Distribution.make(dist_spec)) if self.distributions[-1] is None: raise SurrealError( "`output_component` is of type {} and not allowed in {} Component!" .format( type(output_space).__name__, type(self).__name__)) # Special case: No distribution AND float -> plain output adapter. if not generic_distribution_spec and \ (not flat_distribution_spec[i] and isinstance(da_spec["output_space"], Float)): da_spec["type"] = "plain-output-adapter" # All other cases: Get adapter type from distribution spec # (even if we don't use a distribution in the end). else: default_dict( da_spec, get_adapter_spec_from_distribution_spec(dist_spec)) self.adapters.append(DistributionAdapter.make(da_spec)) # da_spec is completely defined -> Use it to get distribution. else: self.adapters.append(DistributionAdapter.make(da_spec)) if distributions[i]: dist_spec = get_distribution_spec_from_adapter( self.adapters[-1]) self.distributions.append(Distribution.make(dist_spec))
def get_space_from_data(data, num_categories=None, main_axes=None): """ Tries to re-create a Space object given some DataOp (e.g. a tf op). This is useful for shape inference on returned ops after having run through a graph_fn. Args: data (any): The data to create a corresponding Space for. num_categories (Optional[int]): An optional indicator, what the `num_categories` property for an Int should be. Returns: Space: The inferred Space object. """ # Dict. if isinstance(data, dict): spec = {} for key, value in data.items(): # OBSOLETE THIS! Special case for Ints: # If another key exists, with the name: `_num_[key]` -> take num_categories from that key's value. #if key[:5] == "_num_": # continue #num_categories = data.get("_num_{}".format(key)) num_categories = num_categories.get(key, None) if isinstance( num_categories, dict) else num_categories spec[key] = get_space_from_data(value, num_categories=num_categories, main_axes=main_axes) # Return if spec[key] == 0: return 0 return Dict(spec, main_axes=main_axes) # Tuple. elif isinstance(data, tuple): spec = [] for i in data: space = get_space_from_data(i, main_axes=main_axes) if space == 0: return 0 spec.append(space) return Tuple(spec, main_axes=main_axes) # Primitive Space -> Infer from data dtype and shape. else: # `data` itself is a single value, simple python type. if isinstance(data, int): int_high = { "high": num_categories } if num_categories is not None else {} return PrimitiveSpace.make(spec=type(data), shape=(), **int_high) elif isinstance(data, (bool, float)): return PrimitiveSpace.make(spec=type(data), shape=()) elif isinstance(data, str): raise SurrealError( "Cannot derive Space from str data ({})!".format(data)) # A single numpy array. elif isinstance(data, (np.ndarray, tf.Tensor)): dtype = convert_dtype(data.dtype, "np") int_high = {"high": num_categories} if num_categories is not None and \ dtype in [np.uint8, np.int16, np.int32, np.int64] else {} # Must subtract main_axes from beginning of data.shape. shape = tuple(data.shape[len(main_axes or []):]) return PrimitiveSpace.make(spec=dtype, shape=shape, main_axes=main_axes, **int_high) # Try inferring the Space from a python list. elif isinstance(data, list): return try_space_inference_from_list(data) # No Space: e.g. the tf.no_op, a distribution (anything that's not a tensor). # PyTorch Tensors do not have get_shape so must check backend. elif hasattr(data, "dtype") is False or not hasattr(data, "get_shape"): return 0 raise SurrealError( "ERROR: Cannot derive Space from data '{}' (unknown type?)!".format( data))
def call(self, inputs, values=None, *, deterministic=None, likelihood=False, log_likelihood=False): """ Computes Q(s) -> a by passing the inputs through our model """ deterministic = deterministic if deterministic is not None else self.deterministic # If complex input -> pass through pre_concat_nns, then concat, then move on through core nn. if len(self.pre_concat_networks) > 0: inputs = tf.nest.flatten(inputs) inputs = tf.concat([ self.pre_concat_networks[i](in_) if self.pre_concat_networks[i] is not None else in_ for i, in_ in enumerate(inputs) ], axis=-1) # Return struct according to output Space. nn_out = self.network(inputs) # Simple output -> Push through each of our output-adapters. if not isinstance(nn_out, (tuple, dict)): # No values given -> Sample from distribution or return plain adapter-output (if no distribution given). if values is None: adapter_outputs = [a(nn_out) for a in self.adapters] tfp_distributions = [ distribution.parameterize_distribution(adapter_outputs[i]) if distribution is not None else None for i, distribution in enumerate(self.distributions) ] sample = [ distribution._sample(tfp_distributions[i], deterministic=deterministic) if distribution is not None else adapter_outputs[i] for i, distribution in enumerate(self.distributions) ] packed_sample = tf.nest.pack_sequence_as( self.output_space.structure, sample) # Return (combined?) likelihood values for each sample along with sample. if likelihood is True or log_likelihood is True: # Calculate probs/likelihoods (all in log-space for increased accuracy (for very small probs)). log_llhs_components = [ # Reduce all axes that are not main_axes, so that we get only one # (log)?-prob/likelihood per (composite)-action. tf.reduce_sum( distribution._log_prob(tfp_distributions[i], sample[i]), axis=self.flat_output_space[i].reduction_axes) if distribution is not None else 0.0 for i, distribution in enumerate(self.distributions) ] # Combine all probs/likelihoods by multiplying up. log_llh_sum = 0.0 for log_llh_component in log_llhs_components: log_llh_sum += log_llh_component return packed_sample, log_llh_sum if log_likelihood else tf.exp( log_llh_sum) else: return packed_sample # Values given -> Return probabilities/likelihoods or plain outputs for given values (if no distribution). else: values = complement_struct(values, self.output_space.structure, "_undef_") flat_values = tf.nest.flatten(values) combined_likelihood_return = None for i, distribution in enumerate(self.distributions): if distribution is not None and flat_values[ i] is not "_undef_" and flat_values[i] is not None: log_llhs = distribution.log_prob( self.adapters[i](nn_out), flat_values[i]) log_llh_sum = tf.math.reduce_sum( log_llhs, axis=self.flat_output_space[i].reduction_axes) combined_likelihood_return = ( combined_likelihood_return if combined_likelihood_return is not None else 0.0) + log_llh_sum if combined_likelihood_return is not None and not log_likelihood: combined_likelihood_return = tf.math.exp( combined_likelihood_return) outputs = [] for i, distribution in enumerate(self.distributions): # No distribution. if distribution is None and flat_values[i] is not "_undef_": # Some value for this component was given. if flat_values[i] is not None and flat_values[ i] is not False: # Make sure it's an Int space. if not isinstance(self.flat_output_space[i], Int): raise SurrealError( "Component {} of output space does not have a distribution and is not an Int. " "Hence, values for this component (to get outputs of likelihoodsfor) are not " "allowed in `call`.") # Return outputs for the discrete values by doing the sum-over-Hadamard-trick. outputs.append( tf.math.reduce_sum( self.adapters[i](nn_out) * tf.one_hot(flat_values[i], depth=self.flat_output_space[i]. num_categories), axis=-1)) # No value given, return plain adapter output. else: outputs.append(self.adapters[i](nn_out)) # Distribution: Already handled by likelihood block above. else: outputs.append(None) # Only likelihood expected (there are no non-distribution components in our output space). if all(o is None for o in outputs): return combined_likelihood_return packed_out = tf.nest.pack_sequence_as( self.output_space.structure, outputs) if combined_likelihood_return is not None: return packed_out, combined_likelihood_return else: return packed_out # NN already outputs containers. else: # Must match self.output_space. tf.nest.assert_same_structure(self.adapters, nn_out) adapter_outputs = [ adapter(out) for out, adapter in zip( tf.nest.flatten(nn_out), tf.nest.flatten(self.adapters)) ] return tf.nest.pack_sequence_as(adapter_outputs, self.adapters)
def get_default_distribution_from_space( space, *, num_mixture_experts=0, bounded_distribution_type="beta", discrete_distribution_type="categorical", gumbel_softmax_temperature=1.0): """ Args: space (Space): The primitive Space for which to derive a default distribution spec. num_mixture_experts (int): If > 0, use a mixture distribution over the determined "base"-distribution using n experts. TODO: So far, this only works for continuous distributions. bounded_distribution_type (str): The lookup class string for a bounded Float distribution. Default: "beta". discrete_distribution_type(str): The class of distributions to use for discrete action core. For options check the components.distributions package. Default: categorical. Agents requiring reparameterization may require a GumbelSoftmax distribution instead. gumbel_softmax_temperature (float): Temperature parameter for the Gumbel-Softmax distribution used for discrete actions. Returns: Dict: A Spec dict, from which a valid default distribution object can be created. """ # Int: Categorical. if isinstance(space, Int): assert discrete_distribution_type in ["gumbel-softmax", "categorical"] if discrete_distribution_type == "gumbel-softmax": return dict(type="gumbel-softmax", temperature=gumbel_softmax_temperature) else: return dict(type=discrete_distribution_type) # Bool: Bernoulli. elif isinstance(space, Bool): return dict(type="bernoulli") # Continuous action space: Normal/Beta/etc. distribution. elif isinstance(space, Float): # Unbounded -> Normal distribution. if not is_bounded_space(space): single = dict(type="normal") # Bounded -> according to the bounded_distribution parameter. else: assert bounded_distribution_type in ["beta", "squashed-normal"] single = dict(type=bounded_distribution_type, low=space.low, high=space.high) # Use a mixture distribution? if num_mixture_experts > 0: return dict(type="mixture", _args=single, num_experts=num_mixture_experts) else: return single # Container Space. elif isinstance(space, ContainerSpace): return dict(type="joint-cumulative", distributions=tf.nest.pack_sequence_as( space.structure, tf.nest.map_structure( lambda s: get_default_distribution_from_space(s), tf.nest.flatten(space)))) else: raise SurrealError( "No distribution defined for space {}!".format(space))