Esempio n. 1
0
    def sanity_check_component_tree(self, root_component):
        """
        Checks the initial component nesting setup (parent and their child components).

        Raises:
              RLGraphError: If sanity of the init nesting setup could not be confirmed.
        """
        # Check whether every component (except root-component) has a parent.
        components = root_component.get_all_sub_components()

        self.logger.info("Components created: {}".format(len(components)))

        core_found = False
        for component in components:
            if component.parent_component is None:
                if component is not root_component:
                    raise RLGraphError(
                        "ERROR: Component '{}' has no parent Component but is not the root-component! Only the "
                        "root-component has a `parent_component` of None.".
                        format(component))
                else:
                    core_found = True
            elif component.parent_component is not None and component is root_component:
                raise RLGraphError(
                    "ERROR: Root-Component '{}' has a parent Component ({}), but is not allowed to!"
                    .format(component, component.parent_component))
        if core_found is False:
            raise RLGraphError(
                "ERROR: Root-component '{}' was not found in meta-graph!".
                format(root_component))
Esempio n. 2
0
 def _is_action_bounded(action_component):
     if not isinstance(action_component, FloatBox):
         return False
     if isinstance(action_component.low, (list, np.ndarray)) and np.rank(action_component.low) > 0:
         # TODO: we need to properly split the action space in case *some* of the dimensions are bounded!
         if (not np.all(np.isinf(action_component.low)) and np.any(np.isinf(action_component.low))) or \
                 (not np.all(np.isinf(action_component.high)) and np.any(np.isinf(action_component.high))):
             raise RLGraphError("Actions with mix of unbounded and bounded dimensions are not supported yet!"
                                "The boundaries are low={} high={}.".format(action_component.low, action_component.high))
         low = action_component.low[0]
         high = action_component.high[0]
     else:
         low = action_component.low
         high = action_component.high
     # Unbounded.
     if low == float("-inf") and high == float("inf"):
         return False
     # Bounded.
     elif low != float("-inf") and high != float("inf"):
         return True
     # TODO: Semi-bounded -> Exponential distribution.
     else:
         raise RLGraphError(
             "Semi-bounded action spaces are not supported yet! You passed in low={} high={}.".
             format(action_component.low, action_component.high)
         )
Esempio n. 3
0
def define_graph_fn(component, graph_fn_record, copy_record=True):
    """
    Registers a graph_fn with a Component instance.

    Args:
        component (Component): The Component object to register the graph function with.
        graph_fn_record (GraphFnRecord): The GraphFnRecord describing the to-be-registered graph function.
        copy_record (bool): Whether to deepcopy the GraphFnRecord prior to handing it to the Component for storing.
    """
    # Deep copy the record (in case this got registered the normal way with via decorating a class method).
    if copy_record is True:
        graph_fn_record = copy.deepcopy(graph_fn_record)

    graph_fn_record.component = component

    # Raise errors if `name` already taken in this Component.
    # There already is a graph_fn with that name.
    if graph_fn_record.name in component.graph_fns:
        raise RLGraphError("Graph-Fn with name '{}' already defined!".format(
            graph_fn_record.name))
    # There already is another object property with that name (avoid accidental overriding).
    elif not graph_fn_record.is_class_method and getattr(
            component, graph_fn_record.name, None) is not None:
        raise RLGraphError(
            "Component '{}' already has a property called '{}'. Cannot define a Graph-Fn with "
            "the same name!".format(component.name, graph_fn_record.name))

    setattr(
        component, graph_fn_record.name,
        graph_fn_record.wrapper_func.__get__(component, component.__class__))
    setattr(graph_fn_record.func, "__self__", component)

    component.graph_fns[graph_fn_record.name] = graph_fn_record
Esempio n. 4
0
def parse_observe_spec(observe_spec):
    """
    Parses parameters for `Agent.observe()` calls and inserts default values where necessary.

    Args:
        observe_spec (Optional[dict]): Observe spec dict.

    Returns:
        dict: The sanitized observe_spec dict.
    """
    # If no spec given.
    default_spec = dict(
        # Do we buffer observations in python before sending them through the graph?
        buffer_enabled=True,
        # Fill buffer with n records before sending them through the graph.
        buffer_size=100,  # only if buffer_enabled=True
        # Set to > 1 if we want to post-process buffered values for n-step learning.
        n_step=
        1,  # values > 1 are only allowed if buffer_enabled is True and buffer_size >> n.
    )
    observe_spec = default_dict(observe_spec, default_spec)

    if observe_spec["n_step"] > 1:
        if observe_spec["buffer_enabled"] is False:
            raise RLGraphError(
                "Cannot setup observations with n-step (n={}), while buffering is switched "
                "off".format(observe_spec["n_step"]))
        elif observe_spec["buffer_size"] < 3 * observe_spec["n_step"]:
            raise RLGraphError(
                "Buffer must be at least 3x as large as n-step (n={}, min-buffer={})!"
                .format(observe_spec["n_step"], 3 * observe_spec["n_step"]))

    return observe_spec
Esempio n. 5
0
    def sanity_check_meta_graph(self, root_component):
        """
        Checks the constructed meta-graph after calling `self.build_meta_graph` for
        inconsistencies.

        Raises:
              RLGraphError: If sanity of the meta-graph could not be confirmed.
        """
        # Check whether every component (except root-component) has a parent.
        components = root_component.get_all_sub_components()

        self.logger.info("Components created: {}".format(len(components)))

        core_found = False
        for component in components:
            if component.parent_component is None:
                if component is not root_component:
                    raise RLGraphError(
                        "ERROR: Component '{}' has no parent Component but is not the root-component! Only the "
                        "root-component has a `parent_component` of None.".
                        format(component))
                else:
                    core_found = True
            elif component.parent_component is not None and component is root_component:
                raise RLGraphError(
                    "ERROR: Root-Component '{}' has a parent Component ({}), but is not allowed to!"
                    .format(component, component.parent_component))
        if core_found is False:
            raise RLGraphError(
                "ERROR: Root-component '{}' was not found in meta-graph!".
                format(root_component))
Esempio n. 6
0
    def check_input_spaces(self, input_spaces, action_space=None):
        in_space = input_spaces["inputs"]

        self.type = type(in_space)
        if self.output_order is None:
            # Auto-ordering only valid for incoming Tuples.
            assert self.type == Tuple, \
                "ERROR: Cannot use auto-ordering in ContainerSplitter for input Dict spaces! Only ok for Tuples."
            self.output_order = list(range(len(in_space)))

        # Make sure input is a Dict (unsorted).
        assert self.type == Dict or self.type == Tuple,\
            "ERROR: Input Space for ContainerSplitter ({}) must be Dict or Tuple (but is " \
            "{})!".format(self.global_scope, in_space)

        # Keys of in_space must all be part of `self.output_order`.
        for i, name_or_index in enumerate(self.output_order):
            if self.type == Dict and name_or_index not in in_space:
                raise RLGraphError(
                    "Name #{} in `output_order` (value={}) of ContainerSplitter '{}'"
                    " is not part of the input Space "
                    "({})!".format(i, name_or_index, self.scope, in_space))
            elif self.type == Tuple and name_or_index >= len(in_space):
                raise RLGraphError(
                    "Index #{} in `output_order` (value={}) of ContainerSplitter '{}'"
                    " is outside the length of the input "
                    "Space ({})!".format(i, name_or_index, self.scope,
                                         in_space))
Esempio n. 7
0
    def __init__(self, spec=None, **kwargs):
        add_batch_rank = kwargs.pop("add_batch_rank", False)
        add_time_rank = kwargs.pop("add_time_rank", False)
        time_major = kwargs.pop("time_major", False)

        ContainerSpace.__init__(self,
                                add_batch_rank=add_batch_rank,
                                add_time_rank=add_time_rank,
                                time_major=time_major)

        # Allow for any spec or already constructed Space to be passed in as values in the python-dict.
        # Spec may be part of kwargs.
        if spec is None:
            spec = kwargs

        space_dict = {}
        for key in sorted(spec.keys()):
            # Keys must be strings.
            if not isinstance(key, str):
                raise RLGraphError(
                    "ERROR: No non-str keys allowed in a Dict-Space!")
            # Prohibit reserved characters (for flattened syntax).
            if re.search(
                    r'/|{}\d+{}'.format(FLAT_TUPLE_OPEN, FLAT_TUPLE_CLOSE),
                    key):
                raise RLGraphError(
                    "ERROR: Key to Dict must not contain '/' or '{}\d+{}'! Is {}."
                    .format(FLAT_TUPLE_OPEN, FLAT_TUPLE_CLOSE, key))
            value = spec[key]
            # Value is already a Space: Copy it (to not affect original Space) and maybe add/remove batch/time-ranks.
            if isinstance(value, Space):
                w_batch_w_time = value.with_extra_ranks(
                    add_batch_rank, add_time_rank, time_major)
                space_dict[key] = w_batch_w_time
            # Value is a list/tuple -> treat as Tuple space.
            elif isinstance(value, (list, tuple)):
                space_dict[key] = Tuple(*value,
                                        add_batch_rank=add_batch_rank,
                                        add_time_rank=add_time_rank,
                                        time_major=time_major)
            # Value is a spec (or a spec-dict with "type" field) -> produce via `from_spec`.
            elif (isinstance(value, dict)
                  and "type" in value) or not isinstance(value, dict):
                space_dict[key] = Space.from_spec(
                    value,
                    add_batch_rank=add_batch_rank,
                    add_time_rank=add_time_rank,
                    time_major=time_major)
            # Value is a simple dict -> recursively construct another Dict Space as a sub-space of this one.
            else:
                space_dict[key] = Dict(value,
                                       add_batch_rank=add_batch_rank,
                                       add_time_rank=add_time_rank,
                                       time_major=time_major)

        dict.__init__(self, space_dict)
Esempio n. 8
0
    def get_preprocessed_space(self, space):
        # TODO map of allowed conversions in utils?
        if isinstance(space, IntBox):
            if self.to_dtype == "float" or self.to_dtype == "float32" or self.to_dtype == "np.float"\
                    or self.to_dtype == "tf.float32" or self.to_dtype == "torch.float32":
                return FloatBox(shape=space.shape,
                                low=space.low,
                                high=space.high,
                                add_batch_rank=space.has_batch_rank,
                                add_time_rank=space.has_time_rank)
            elif self.to_dtype == "bool":
                if space.low == 0 and space.high == 1:
                    return BoolBox(shape=space.shape,
                                   add_batch_rank=space.has_batch_rank,
                                   add_time_rank=space.has_time_rank)
                else:
                    raise RLGraphError(
                        "ERROR: Conversion from IntBox to BoolBox not allowed if low is not 0 and "
                        "high is not 1.")
        elif isinstance(space, BoolBox):
            if self.to_dtype == "float" or self.to_dtype == "float32" or self.to_dtype == "np.float" \
                 or self.to_dtype == "tf.float32" or self.to_dtype == "torch.float32":
                return FloatBox(shape=space.shape,
                                low=0.0,
                                high=1.0,
                                add_batch_rank=space.has_batch_rank,
                                add_time_rank=space.has_time_rank)
            elif self.to_dtype == "int" or self.to_dtype == "int32" or self.to_dtype  == "np.int32" or \
                    self.to_dtype == "tf.int32" or self.to_dtype == "torch.int32":
                return IntBox(shape=space.shape,
                              low=0,
                              high=1,
                              add_batch_rank=space.has_batch_rank,
                              add_time_rank=space.has_time_rank)
        elif isinstance(space, FloatBox):
            if self.to_dtype == "int" or self.to_dtype == "int32" or self.to_dtype  == "np.int32" or \
                 self.to_dtype == "tf.int32" or self.to_dtype == "torch.int32":
                return IntBox(shape=space.shape,
                              low=space.low,
                              high=space.high,
                              add_batch_rank=space.has_batch_rank,
                              add_time_rank=space.has_time_rank)

        # Wrong conversion.
        else:
            raise RLGraphError(
                "ERROR: Space conversion from: {} to type {} not supported".
                format(space, self.to_dtype))

        # No conversion.
        return space
Esempio n. 9
0
    def __init__(self, width, height, interpolation="area", scope="image-resize", **kwargs):
        """
        Args:
            width (int): The new width.
            height (int): The new height.
            interpolation (str): One of "bilinear", "area". Default: "bilinear" (which is also the default for both
                cv2 and tf).
        """
        super(ImageResize, self).__init__(scope=scope, **kwargs)
        self.width = width
        self.height = height
        
        if interpolation == "bilinear":
            if get_backend() == "tf":
                self.tf_interpolation = ResizeMethod.BILINEAR
                # All other backends use cv2 currently.
            # Sometimes we mix python preprocessor stack with tf backend -> always need this.
            self.cv2_interpolation = cv2.INTER_LINEAR
        elif interpolation == "area":
            if get_backend() == "tf":
                self.tf_interpolation = ResizeMethod.AREA
            self.cv2_interpolation = cv2.INTER_AREA
        else:
            raise RLGraphError("Invalid interpolation algorithm {}!. Allowed are 'bilinear' and "
                               "'area'.".format(interpolation))

        # The output spaces after preprocessing (per flat-key).
        self.output_spaces = None
Esempio n. 10
0
def get_action_adapter_type_from_distribution_type(distribution_type_str):
    """
    Args:
        distribution_type_str (str): The type (str) of the Distribution object, for which to return an appropriate
            ActionAdapter lookup-class string.

    Returns:
        str: The lookup-class string for an action-adapter.
    """
    # IntBox: Categorical.
    if distribution_type_str == "Categorical":
        return "categorical-distribution-adapter"
    elif distribution_type_str == "GumbelSoftmax":
        return "gumbel-softmax-distribution-adapter"
    # BoolBox: Bernoulli.
    elif distribution_type_str == "Bernoulli":
        return "bernoulli-distribution-adapter"
    # Continuous action space: Normal/Beta/etc. distribution.
    # Unbounded -> Normal distribution.
    elif distribution_type_str == "Normal":
        return "normal-distribution-adapter"
    # Bounded -> Beta.
    elif distribution_type_str == "Beta":
        return "beta-distribution-adapter"
    # Bounded -> Squashed Normal.
    elif distribution_type_str == "SquashedNormal":
        return "squashed-normal-distribution-adapter"
    else:
        raise RLGraphError("'{}' is an unknown Distribution type!".format(
            distribution_type_str))
Esempio n. 11
0
def get_distribution_spec_from_action_adapter(action_adapter):
    action_adapter_type_str = type(action_adapter).__name__
    if action_adapter_type_str == "CategoricalDistributionAdapter":
        return dict(type="categorical")
    elif action_adapter_type_str == "GumbelSoftmaxDistributionAdapter":
        return dict(type="gumbel-softmax")
    elif action_adapter_type_str == "BernoulliDistributionAdapter":
        return dict(type="bernoulli")
    # TODO: What about multi-variate normal with non-trivial co-var matrices?
    elif action_adapter_type_str == "NormalDistributionAdapter":
        return dict(type="normal")
    elif action_adapter_type_str == "BetaDistributionAdapter":
        return dict(type="beta")
    elif action_adapter_type_str == "SquashedNormalDistributionAdapter":
        return dict(type="squashed-normal")
    elif action_adapter_type_str == "NormalMixtureDistributionAdapter":
        # TODO: MixtureDistribution is generic (any sub-distributions, but its AA is not (only supports mixture-Normal))
        return dict(type="mixture",
                    _args=[
                        "multivariate-normal"
                        for _ in range(action_adapter.size_mixture)
                    ])
    else:
        raise RLGraphError("'{}' is an unknown ActionAdapter type!".format(
            action_adapter_type_str))
Esempio n. 12
0
    def from_file(cls, filename, *args, **kwargs):
        """
        Create object from spec saved in filename. Expects json or yaml format.

        Args:
            filename: file containing the spec (json or yaml)

        Keyword Args:
            Used as additional parameters for call to constructor.

        Returns:
            object
        """
        path = os.path.join(os.getcwd(), filename)
        if not os.path.isfile(path):
            raise RLGraphError('No such file: {}'.format(filename))

        with open(path, 'rt') as fp:
            if path.endswith('.yaml') or path.endswith('.yml'):
                spec = yaml.load(fp)
            else:
                spec = json.load(fp)

        # Add possible *args.
        spec["_args"] = args
        return cls.from_spec(spec=spec, **kwargs)
 def test_ppo_agent_faulty_op_visualization(self):
     """
     Creates a PPOAgent with a badly connected network and visualizes the root component.
     """
     agent_config = config_from_path(
         "configs/ppo_agent_for_2x2_gridworld.json")
     # Sabotage the NN.
     agent_config["network_spec"] = [{
         "type": "dense",
         "units": 10
     }, {
         "type": "embedding",
         "embed_dim": 3,
         "vocab_size": 4
     }]
     env = GridWorld(world="2x2")
     # Build Agent and hence trigger the Space error.
     try:
         ppo_agent = PPOAgent.from_spec(
             agent_config,
             state_space=GridWorld.grid_world_2x2_flattened_state_space,
             action_space=env.action_space)
     except RLGraphSpaceError as e:
         print("Seeing expected RLGraphSpaceError ({}). Test ok.".format(e))
     else:
         raise RLGraphError(
             "Not seeing expected RLGraphSpaceError with faulty input Space to embed layer of PPO!"
         )
Esempio n. 14
0
 def get_id():
     DataOpRecord._ID += 1
     if DataOpRecord._ID >= DataOpRecord.MAX_ID:
         raise RLGraphError(
             "Maximum number of op-rec IDs reached! Simply hard-increase `DataOpRecord.MAX_ID`."
         )
     return DataOpRecord._ID
Esempio n. 15
0
    def translate_space(space):
        """
        Translates an openAI space into an RLGraph Space object.

        Args:
            space (gym.spaces.Space): The openAI Space to be translated.

        Returns:
            Space: The translated Rlgraph Space.
        """
        if isinstance(space, gym.spaces.Discrete):
            if space.n == 2:
                return BoolBox()
            else:
                return IntBox(space.n)
        elif isinstance(space, gym.spaces.MultiBinary):
            return BoolBox(shape=(space.n, ))
        elif isinstance(space, gym.spaces.MultiDiscrete):
            return IntBox(low=np.zeros((space.nvec.ndim, ),
                                       dtype_("uint8", "np")),
                          high=space.nvec)
        elif isinstance(space, gym.spaces.Box):
            return FloatBox(low=space.low, high=space.high)
        elif isinstance(space, gym.spaces.Tuple):
            return Tuple(
                *[OpenAIGymEnv.translate_space(s) for s in space.spaces])
        elif isinstance(space, gym.spaces.Dict):
            return Dict({
                k: OpenAIGymEnv.translate_space(v)
                for k, v in space.spaces.items()
            })
        else:
            raise RLGraphError(
                "Unknown openAI gym Space class for state_space!")
Esempio n. 16
0
def parse_update_spec(update_spec):
    """
    Parses update/learning parameters and inserts default values where necessary.

    Args:
        update_spec (Optional[dict]): Update/Learning spec dict.

    Returns:
        dict: The sanitized update_spec dict.
    """
    # If no spec given.
    default_spec = dict(
        # Whether to perform calls to `Agent.update()` at all.
        do_updates=True,
        # The unit in which we measure frequency: one of "timesteps", "episodes", "sec".
        # unit="timesteps", # TODO: not supporting any other than timesteps
        # The number of 'units' to wait before we do any updating at all.
        steps_before_update=0,
        # The frequency with which we update (given in `unit`).
        update_interval=4,
        # The number of consecutive `Agent.update()` calls per update.
        update_steps=1,
        # The batch size with which to update (e.g. when pulling records from a memory).
        batch_size=64,
        sync_interval=128)
    update_spec = default_dict(update_spec, default_spec)
    # Assert that the synch interval is a multiple of the update_interval.
    if update_spec["sync_interval"] / update_spec["update_interval"] != \
        update_spec["sync_interval"] // update_spec["update_interval"]:
        raise RLGraphError(
            "ERROR: sync_interval ({}) must be multiple of update_interval "
            "({})!".format(update_spec["sync_interval"],
                           update_spec["update_interval"]))

    return update_spec
Esempio n. 17
0
def _sanity_check_call_parameters(self, params, method, method_type,
                                  add_auto_key_as_first_param):
    raw_signature_parameters = inspect.signature(method).parameters
    actual_params = list(raw_signature_parameters.values())
    if add_auto_key_as_first_param is True:
        actual_params = actual_params[1:]
    if len(params) != len(actual_params):
        # Check whether the last arg is var_positional (e.g. *inputs; in that case it's ok if the number of params
        # is larger than that of the actual graph_fn params or its one smaller).
        if actual_params[-1].kind == inspect.Parameter.VAR_POSITIONAL and (
                len(params) > len(actual_params) > 0
                or len(params) == len(actual_params) - 1):
            pass
        # Some actual params have default values: Number of given params must be at least as large as the number
        # of non-default actual params but maximally as large as the number of actual_parameters.
        elif len(actual_params) >= len(params) >= sum(
            [p.default is inspect.Parameter.empty for p in actual_params]):
            pass
        else:
            raise RLGraphError(
                "ERROR: {} '{}/{}' has {} input-parameters, but {} ({}) were being provided in the "
                "`Component.call` method!".format(method_type, self.name,
                                                  method.__name__,
                                                  len(actual_params),
                                                  len(params), params))
Esempio n. 18
0
    def _create_action_adapters_and_distributions(self, action_space, action_adapter_spec):
        if action_space is None:
            adapter = ActionAdapter.from_spec(action_adapter_spec)
            self.action_space = adapter.action_space
            # Assert single component action space.
            assert len(self.action_space.flatten()) == 1,\
                "ERROR: Action space must not be ContainerSpace if no `action_space` is given in Policy c'tor!"
        else:
            self.action_space = Space.from_spec(action_space)

        # Figure out our Distributions.
        for i, (flat_key, action_component) in enumerate(self.action_space.flatten().items()):
            distribution = self.distributions[flat_key] = self._get_distribution(i, action_component)
            if distribution is None:
                raise RLGraphError("ERROR: `action_component` is of type {} and not allowed in {} Component!".
                                   format(type(action_space).__name__, self.name))
            action_adapter_type = distribution.get_action_adapter_type()
            # Spec dict.
            if isinstance(action_adapter_spec, dict):
                aa_spec = action_adapter_spec.get(flat_key, action_adapter_spec)
                aa_spec["type"] = action_adapter_type
                aa_spec["action_space"] = action_component
            # Simple type spec.
            elif not isinstance(action_adapter_spec, ActionAdapter):
                aa_spec = dict(type=action_adapter_type, action_space=action_component)
            # Direct object.
            else:
                aa_spec = action_adapter_spec
            self.action_adapters[flat_key] = ActionAdapter.from_spec(aa_spec, scope="action-adapter-{}".format(i))
Esempio n. 19
0
def create_colocated_ray_actors(cls, config, num_agents, max_attempts=10):
    """
    Creates a specified number of co-located RayActors.

    Args:
        cls (class): Actor class to create
        config (dict): Config for actor.
        num_agents (int): Number of worker agents to create.
        max_attempts (Optional[int]): Max number of attempts to create colocated agents, will raise
            an error if creation was not successful within this number.

    Returns:
        list: List of created agents.

    Raises:
        RLGraph-Error if not enough agents could be created within the specified number of attempts.
    """
    agents = []
    attempt = 1

    while len(agents) < num_agents and attempt <= max_attempts:
        ray_agents = [cls.remote(config) for _ in range(attempt * num_agents)]
        local_agents, _ = split_local_non_local_agents(ray_agents)
        agents.extend(local_agents)

    if len(agents) < num_agents:
        raise RLGraphError(
            "Could not create the specified number ({}) of agents.".format(
                num_agents))

    return agents[:num_agents]
Esempio n. 20
0
    def _graph_fn_sync(self, values_):
        """
        Generates the op that syncs this Synchronizable's parent's variable values from another Synchronizable
        Component.

        Args:
            values_ (DataOpDict): The dict of variable values (coming from the "variables"-Socket of any other
                Component) that need to be assigned to this Component's parent's variables.
                The keys in the dict refer to the names of our parent's variables and must match their names.

        Returns:
            DataOp: The op that executes the syncing.
        """
        # Loop through all incoming vars and our own and collect assign ops.
        syncs = []
        # Sanity checking
        if get_backend() == "tf":
            parents_vars = self.parent_component.get_variables(collections=self.collections, custom_scope_separator="-")
            syncs_from, syncs_to = (sorted(values_.items()), sorted(parents_vars.items()))
            if len(syncs_from) != len(syncs_to):
                raise RLGraphError("ERROR: Number of Variables to sync must match! "
                                   "We have {} syncs_from and {} syncs_to.".format(len(syncs_from), len(syncs_to)))
            for (key_from, var_from), (key_to, var_to) in zip(syncs_from, syncs_to):
                # Sanity checking. TODO: Check the names' ends? Without the global scope?
                #if key_from != key_to:
                #    raise RLGraphError("ERROR: Variable names for syncing must match in order and name! "
                #                    "Mismatch at from={} and to={}.".format(key_from, key_to))
                    if get_shape(var_from) != get_shape(var_to):
                        raise RLGraphError("ERROR: Variable shapes for syncing must match! "
                                           "Shape mismatch between from={} ({}) and to={} ({}).".
                                           format(key_from, get_shape(var_from), key_to, get_shape(var_to)))
                    syncs.append(self.assign_variable(var_to, var_from))

            # Bundle everything into one "sync"-op.
            with tf.control_dependencies(syncs):
                return tf.no_op(name="sync-to-{}".format(self.parent_component.name))

        elif get_backend() == "pytorch":
            # Get refs(!)
            parents_vars = self.parent_component.get_variables(collections=self.collections,
                                                               custom_scope_separator="-", get_ref=True)
            syncs_from, sync_to_ref = (sorted(values_.items()), sorted(parents_vars.items()))

            # Assign parameters of layers.
            for (key_from, var_from), (key_to, ref_to) in zip(syncs_from, sync_to_ref):
                ref_to.set_value(var_from)
            return None
Esempio n. 21
0
    def split_flattened_input_ops(self, *ops, **kwarg_ops):
        """
        Splits any FlattenedDataOp in *ops and **kwarg_ops into its SingleDataOps and collects them to be passed
        one by one through some graph_fn. If more than one FlattenedDataOp exists in *ops and **kwarg_ops,
        these must have the exact same keys.
        If `add_auto_key_as_first_param` is True: Add auto-key as very first parameter in each
        returned parameter tuple.

        Args:
            *ops (op): The primitive ops to split.
            **kwarg_ops (op): More primitive ops to split (but by named key).

        Returns:
            Union[FlattenedDataOp,Tuple[DataOp]]: The sorted parameter tuples (by flat-key) to use as api_methods in the
                calls to the graph_fn.
                If no FlattenedDataOp is in ops, returns ops as-is.

        Raises:
            RLGraphError: If there are more than 1 flattened ops in ops and their keys don't match 100%.
        """
        assert all(op is not None for op in ops)  # just make sure

        # Collect FlattenedDataOp for checking their keys (must match).
        flattened = [op.items() for op in ops if len(op) > 1 or "" not in op]
        # If it's more than 1, make sure they match. If they don't match: raise Error.
        if len(flattened) > 1:
            # Loop through the non-first ones and make sure all keys match vs the first one.
            for other in flattened[1:]:
                other_arg_iter = iter(other)
                for key, value in flattened[0]:
                    k_other, v_other = next(other_arg_iter)
                    if key != k_other:  # or get_shape(v_other) != get_shape(value):
                        raise RLGraphError("ERROR: Flattened ops have a key mismatch ({} vs {})!".format(key, k_other))

        # We have one or many (matching) ContainerDataOps: Split the calls.
        if len(flattened) > 0:
            # The first op that is a FlattenedDataOp.
            guide_op = next(op for op in ops if len(op) > 1 or "" not in op)
            # Re-create our iterators.
            collected_call_params = FlattenedDataOp()
            # Do the single split calls to our computation func.
            for key in guide_op.keys():
                # Prep input params for a single call.
                params = [key] if self.add_auto_key_as_first_param is True else []
                for op in ops:
                    params.append(op[key] if key in op else op[""])
                # Add kwarg_ops
                for kwarg_key, kwarg_op in kwarg_ops.items():
                    params.append(tuple([
                        kwarg_key,
                        kwarg_ops[kwarg_key][key] if key in kwarg_ops[kwarg_key] else kwarg_ops[kwarg_key][""]
                    ]))
                # Now do the single call.
                collected_call_params[key] = params
            return collected_call_params
        # We don't have any container ops: No splitting possible. Return args and kwargs as is.
        else:
            return tuple(([""] if self.add_auto_key_as_first_param is True else []) + [op[""] for op in ops]),\
                   {key: value[""] for key, value in kwarg_ops.items()}
    def init_device_strategy(self):
        """
        Initializes default device and loads available devices.
        """
        self.device_strategy = self.execution_spec["device_strategy"]
        # Configures available GPUs.
        self.init_gpus()

        if self.device_strategy == "default":
            if self.execution_spec["device_map"] is not None:
                self.logger.warning(
                    "`device_map` given for device-strategy=`default`. Map will be ignored. Use "
                    "device-strategy=`custom` together with a `device_map`.")
            self.logger.info(
                "Initializing graph executor with default device strategy. "
                "Backend will assign all visible devices.")
            self.logger.info("GPUs enabled: {}. Usable GPUs: {}".format(
                self.gpus_enabled, self.gpu_names))
        elif self.device_strategy == 'multi_gpu_sync':
            assert self.gpus_enabled, "ERROR: device_strategy is 'multi_gpu_sync' but GPUs are not enabled. Please" \
                                      "check your gpu_spec and set gpus_enabled to True."
            self.default_device = self.execution_spec.get(
                "default_device", [
                    x.name for x in self.local_device_protos
                    if x.device_type == 'CPU'
                ][0])
            self.logger.info(
                "Initializing graph executor with synchronized multi-gpu device strategy. "
                "Default device: {}. Available gpus are: {}.".format(
                    self.default_device, self.gpu_names))
        elif self.device_strategy == "custom":
            # Default device is user provided device or first CPU.
            default_device = self.execution_spec.get("default_device", None)
            if default_device is None:
                self.default_device = [
                    x.name for x in self.local_device_protos
                    if x.device_type == 'CPU'
                ][0]
            else:
                self.default_device = default_device
                # Sanity check, whether given default device exists.
                # if self.default_device not in self.available_devices:
                #    raise RLGraphError("Provided `default_device` ('{}') is not in `available_devices` ({})".
                #                       format(self.default_device, self.available_devices))
            self.device_map = {}
            # Clean up device map so it only contains devices that are actually available (otherwise,
            # use the default device).
            for component_name, device in self.execution_spec[
                    "device_map"].items():
                if device in self.available_devices:
                    self.device_map[component_name] = device
            self.logger.info(
                "Initializing graph executor with custom device strategy (default device: {})."
                .format(self.default_device))
        else:
            raise RLGraphError(
                "Invalid device_strategy ('{}') for TensorFlowExecutor!".
                format(self.device_strategy))
Esempio n. 23
0
 def mapping_func(key, space):
     if isinstance(space, IntBox):
         # Must have global bounds (bounds valid for all axes).
         if space.num_categories is False:
             raise RLGraphError("ERROR: Cannot flatten categories if one of the IntBox spaces ({}={}) does "
                                "not have global bounds (its `num_categories` is False)!".format(key, space))
         return space.num_categories
     # No categories. Keep as is.
     return 1
Esempio n. 24
0
    def __init__(self, epsilon_spec=None, noise_spec=None, scope="exploration", **kwargs):
        """
        Args:
            epsilon_spec (any): The spec or Component object itself to construct an EpsilonExploration Component.
            noise_spec (dict): The specification dict for a noise generator that adds noise to the NN's output.
        """
        super(Exploration, self).__init__(scope=scope, **kwargs)

        self.action_space = None  # The actual action space (may not have batch-rank, just the plain space)
        self.flat_action_space = None

        self.epsilon_exploration = None
        self.noise_component = None

        # For define-by-run sampling.
        self.sample_obj = None

        # Don't allow both epsilon and noise component
        if epsilon_spec and noise_spec:
            raise RLGraphError("Cannot use both epsilon exploration and a noise component at the same time.")

        # Add epsilon component.
        if epsilon_spec:
            self.epsilon_exploration = EpsilonExploration.from_spec(epsilon_spec)
            self.add_components(self.epsilon_exploration)

            # Define our interface.
            @rlgraph_api(component=self)
            def get_action(self, actions, time_step, use_exploration=True):
                """
                Action depends on time-step (e.g. epsilon-decay).
                """
                epsilon_decisions = self.epsilon_exploration.do_explore(actions, time_step)
                return self._graph_fn_pick(use_exploration, epsilon_decisions, actions)

        # Add noise component.
        elif noise_spec:
            self.noise_component = NoiseComponent.from_spec(noise_spec)
            self.add_components(self.noise_component)

            @rlgraph_api(component=self)
            def get_action(self, actions, time_step=0, use_exploration=True):
                """
                Noise is added to the sampled action.
                """
                noise = self.noise_component.get_noise()
                return self._graph_fn_add_noise(use_exploration, noise, actions)

        # Don't explore at all. Simple pass-through.
        else:
            @rlgraph_api(component=self)
            def get_action(self, actions, time_step=0, use_exploration=False):
                """
                Action is returned as is.
                """
                return actions
Esempio n. 25
0
def convert_dtype(dtype, to="tf"):
    """
    Translates any type (tf, numpy, python, etc..) into the respective tensorflow/numpy data type.

    Args:
        dtype (any): String describing a numerical type (e.g. 'float'), numpy data type, tf dtype,
            pytorch data-type, or python numerical type.
        to (str): Either one of 'tf' (tensorflow), 'pt' (pytorch), 'np' (numpy), 'str' (string).
            Default="tf".

    Returns:
        TensorFlow, Numpy, pytorch or string, representing a data type (depending on `to` parameter).
    """
    # Bool: tensorflow.
    if get_backend() == "tf":
        if dtype in ["bool", bool, np.bool_, tf.bool]:
            return np.bool_ if to == "np" else tf.bool
        elif dtype in ["float", "float32", float, np.float32, tf.float32]:
            return np.float32 if to == "np" else tf.float32
        if dtype in ["float64", np.float64, tf.float64]:
            return np.float64 if to == "np" else tf.float64
        elif dtype in ["int", "int32", int, np.int32, tf.int32]:
            return np.int32 if to == "np" else tf.int32
        elif dtype in ["int64", np.int64]:
            return np.int64 if to == "np" else tf.int64
        elif dtype in ["uint8", np.uint8]:
            return np.uint8 if to == "np" else tf.uint8
        elif dtype in ["str", np.str_]:
            return np.unicode_ if to == "np" else tf.string
        elif dtype in ["int16", np.int16]:
            return np.int16 if to == "np" else tf.int16
    elif get_backend() == "pytorch":
        # N.b. this behaves differently than other bools, careful with Python bool comparisons.
        if dtype in ["bool", bool, np.bool_] or dtype is torch.uint8:
            return np.bool_ if to == "np" else torch.uint8
        elif dtype in ["float", "float32", float, np.float32
                       ] or dtype is torch.float32:
            return np.float32 if to == "np" else torch.float32
        if dtype in ["float64", np.float64] or dtype is torch.float64:
            return np.float64 if to == "np" else torch.float64
        elif dtype in ["int", "int32", int, np.int32] or dtype is torch.int32:
            return np.int32 if to == "np" else torch.int32
        elif dtype in ["int64", np.int64] or dtype is torch.int64:
            return np.int64 if to == "np" else torch.int64
        elif dtype in ["uint8", np.uint8] or dtype is torch.uint8:
            return np.uint8 if to == "np" else torch.uint8
        elif dtype in ["int16", np.int16] or dtype is torch.int16:
            return np.int16 if to == "np" else torch.int16

        # N.b. no string tensor type.

    raise RLGraphError(
        "Error: Type conversion to '{}' for type '{}' not supported.".format(
            to, str(dtype)))
Esempio n. 26
0
    def check_input_spaces(self, input_spaces, action_space=None):
        # Must be a Dict with keys: 'categorical', 'parameters0', 'parameters1', etc...
        in_space = input_spaces["parameters"]

        assert "categorical" in in_space, "ERROR: in_space for Mixed needs parameter key: 'categorical'!"

        for i, s in enumerate(self.sub_distributions):
            sub_space = in_space.get("parameters{}".format(i))
            if sub_space is None:
                raise RLGraphError(
                    "ERROR: in_space for Mixed needs parameter key: 'parameters{}'!"
                    .format(i))
Esempio n. 27
0
def is_bounded_space(box_space):
    if not isinstance(box_space, FloatBox):
        return False
    # Unbounded.
    if box_space.low == float("-inf") and box_space.high == float("inf"):
        return False
    # Bounded.
    elif box_space.low != float("-inf") and box_space.high != float("inf"):
        return True
    # TODO: Semi-bounded -> Exponential distribution.
    else:
        raise RLGraphError(
            "Semi-bounded spaces for distribution-generation are not supported yet! You passed in low={} high={}."
            .format(box_space.low, box_space.high))
Esempio n. 28
0
    def _create_action_adapters_and_distributions(self, action_space,
                                                  action_adapter_spec):
        if action_space is None:
            adapter = ActionAdapter.from_spec(action_adapter_spec)
            self.action_space = adapter.action_space
            # Assert single component action space.
            assert len(self.action_space.flatten()) == 1, \
                "ERROR: Action space must not be ContainerSpace if no `action_space` is given in Policy constructor!"
        else:
            self.action_space = Space.from_spec(action_space)

        # Figure out our Distributions.
        for i, (flat_key, action_component) in enumerate(
                self.action_space.flatten().items()):
            # Spec dict.
            if isinstance(action_adapter_spec, dict):
                aa_spec = flat_key_lookup(action_adapter_spec, flat_key,
                                          action_adapter_spec)
                aa_spec["action_space"] = action_component
            # Simple type spec.
            elif not isinstance(action_adapter_spec, ActionAdapter):
                aa_spec = dict(action_space=action_component)
            # Direct object.
            else:
                aa_spec = action_adapter_spec

            if isinstance(aa_spec, dict) and "type" not in aa_spec:
                dist_spec = get_default_distribution_from_space(
                    action_component, self.bounded_distribution_type,
                    self.discrete_distribution_type,
                    self.gumbel_softmax_temperature)

                self.distributions[flat_key] = Distribution.from_spec(
                    dist_spec, scope="{}-{}".format(dist_spec["type"], i))
                if self.distributions[flat_key] is None:
                    raise RLGraphError(
                        "ERROR: `action_component` is of type {} and not allowed in {} Component!"
                        .format(type(action_space).__name__, self.name))
                aa_spec[
                    "type"] = get_action_adapter_type_from_distribution_type(
                        type(self.distributions[flat_key]).__name__)
                self.action_adapters[flat_key] = ActionAdapter.from_spec(
                    aa_spec, scope="action-adapter-{}".format(i))
            else:
                self.action_adapters[flat_key] = ActionAdapter.from_spec(
                    aa_spec, scope="action-adapter-{}".format(i))
                dist_spec = get_distribution_spec_from_action_adapter(
                    self.action_adapters[flat_key])
                self.distributions[flat_key] = Distribution.from_spec(
                    dist_spec, scope="{}-{}".format(dist_spec["type"], i))
Esempio n. 29
0
def get_default_distribution_from_space(
        space,
        bounded_distribution_type="beta",
        discrete_distribution_type="categorical",
        gumbel_softmax_temperature=1.0):
    """
    Args:
        space (Space): The primitive Space for which to derive a default distribution spec.
        bounded_distribution_type (str): The lookup class string for a bounded FloatBox distribution.
            Default: "beta".
        discrete_distribution_type(str): The class of distributions to use for discrete action spaces. For options
            check the components.distributions package. Default: categorical. Agents requiring reparameterization
            may require a GumbelSoftmax distribution instead.
        gumbel_softmax_temperature (float): Temperature parameter for the Gumbel-Softmax distribution used
            for discrete actions.

    Returns:
        Dict: A Spec dict, from which a valid default distribution object can be created.
    """
    # IntBox: Categorical.
    if isinstance(space, IntBox):
        if discrete_distribution_type == "gumbel_softmax":
            return dict(type="gumbel-softmax",
                        temperature=gumbel_softmax_temperature)
        else:
            return dict(type=discrete_distribution_type)
    # BoolBox: Bernoulli.
    elif isinstance(space, BoolBox):
        return dict(type="bernoulli")
    # Continuous action space: Normal/Beta/etc. distribution.
    elif isinstance(space, FloatBox):
        # Unbounded -> Normal distribution.
        if not is_bounded_space(space):
            return dict(type="normal")
        # Bounded -> according to the bounded_distribution parameter.
        else:
            return dict(type=bounded_distribution_type,
                        low=space.low,
                        high=space.high)
    # Container Space.
    elif isinstance(space, ContainerSpace):
        return dict(type="joint-cumulative",
                    distribution_specs=dict({
                        k: get_default_distribution_from_space(s)
                        for k, s in space.flatten().items()
                    }))
    else:
        raise RLGraphError(
            "No distribution defined for space {}!".format(space))
Esempio n. 30
0
def get_distribution_spec_from_action_adapter_type(action_adapter_type_str):
    if action_adapter_type_str == "CategoricalDistributionAdapter":
        return "categorical"
    elif action_adapter_type_str == "GumbelSoftmaxDistributionAdapter":
        return "gumbel-softmax"
    elif action_adapter_type_str == "BernoulliDistributionAdapter":
        return "bernoulli"
    elif action_adapter_type_str == "NormalDistributionAdapter":
        return "normal"
    elif action_adapter_type_str == "BetaDistributionAdapter":
        return "beta"
    elif action_adapter_type_str == "SquashedNormalDistributionAdapter":
        return "squashed-normal"
    else:
        raise RLGraphError("'{}' is an unknown ActionAdapter type!".format(
            action_adapter_type_str))