Beispiel #1
0
 def _graph_fn_apply(self, preprocessing_inputs):
     if self.backend == "python" or get_backend() == "python":
         if isinstance(preprocessing_inputs, list):
             preprocessing_inputs = np.asarray(preprocessing_inputs)
         return preprocessing_inputs.astype(dtype=util.dtype(self.to_dtype, to="np"))
     elif get_backend() == "pytorch":
         return torch.tensor(preprocessing_inputs, dtype=util.dtype(self.to_dtype, to="pytorch"))
     elif get_backend() == "tf":
         to_dtype = util.dtype(self.to_dtype, to="tf")
         if preprocessing_inputs.dtype != to_dtype:
             return tf.cast(x=preprocessing_inputs, dtype=to_dtype)
         else:
             return preprocessing_inputs
Beispiel #2
0
 def _graph_fn_get_noise(self):
     drift = self.theta * (self.mu - self.ou_state)
     if get_backend() == "tf":
         diffusion = self.sigma * tf.random_normal(
             shape=self.action_space.shape,
             dtype=dtype(self.action_space.dtype))
         delta = drift + diffusion
         return tf.assign_add(ref=self.ou_state, value=delta)
Beispiel #3
0
 def _graph_fn_get_distribution(self, parameters):
     """
     Args:
         parameters (DataOp): The p value (probability that distribution returns True).
     """
     if get_backend() == "tf":
         return tf.distributions.Bernoulli(probs=parameters, dtype=util.dtype("bool"))
     elif get_backend() == "pytorch":
         return torch.distributions.Bernoulli(probs=parameters)
Beispiel #4
0
    def create_variables(self, input_spaces, action_space=None):
        # Create weights matrix and (maybe) biases vector.
        shape = (self.vocab_size, self.embed_dim)
        self.initializer = Initializer.from_spec(shape=shape, specification=self.initializer_spec)
        # TODO: For IMPALA partitioner is not needed. Do this later.
        self.embedding_matrix = self.get_variable(
            name="embedding-matrix", shape=shape, dtype=dtype("float"), initializer=self.initializer.initializer,
            #partitioner=self.partitioners, regularizer=self.regularizers,
            trainable=self.trainable
        )

        self.ids_space = input_spaces["ids"]
Beispiel #5
0
    def _graph_fn_pick(self, use_exploration, epsilon_decisions, sample):
        """
        Exploration for discrete action spaces.
        Either pick a random action (if `use_exploration` and `epsilon_decision` are True),
            or return non-exploratory action.

        Args:
            use_exploration (DataOp): The master switch determining, whether to use exploration or not.
            epsilon_decisions (DataOp): The bool coming from the epsilon-exploration component specifying
                whether to use exploration or not (per batch item).
            sample (DataOp): The output from a distribution's "sample_deterministic" OR "sample_stochastic".

        Returns:
            DataOp: The DataOp representing the action. This will match the shape of self.action_space.
        """
        if get_backend() == "tf":
            random_actions = tf.random_uniform(
                shape=tf.shape(sample),
                maxval=self.action_space.num_categories,
                dtype=dtype("int"))

            if use_exploration is False:
                return sample
            else:
                return tf.where(
                    # `use_exploration` given as actual bool or as tensor?
                    condition=epsilon_decisions if use_exploration is True else
                    tf.logical_and(use_exploration, epsilon_decisions),
                    x=random_actions,
                    y=sample)
        elif get_backend() == "pytorch":
            # N.b. different order versus TF because we dont want to execute the sampling below.
            if use_exploration is False:
                return sample

            if self.sample_obj is None:
                # Don't create new sample objects very time.
                self.sample_obj = torch.distributions.Uniform(
                    0, self.action_space.num_categories)

            random_actions = self.sample_obj.sample(sample.shape).int()
            if use_exploration is True:
                return torch.where(epsilon_decisions, random_actions, sample)
            else:
                if not isinstance(use_exploration, torch.ByteTensor):
                    use_exploration = use_exploration.byte()
                if not isinstance(epsilon_decisions, torch.ByteTensor):
                    epsilon_decisions = epsilon_decisions.byte()
                return torch.where(use_exploration & epsilon_decisions,
                                   random_actions, sample)
    def test_specifiable_server(self):
        action_space = IntBox(2)
        state_space = FloatBox()
        env_spec = dict(type="random_env", state_space=state_space, action_space=action_space, deterministic=True)
        # Create the server, but don't start it yet. This will be done fully automatically by the tf-Session.
        specifiable_server = SpecifiableServer(Environment, env_spec, dict(
            step_for_env_stepper=[state_space, float, bool]
        ), "terminate")

        # ret are ops now in the graph.
        ret1 = specifiable_server.step_for_env_stepper(action_space.sample())
        ret2 = specifiable_server.step_for_env_stepper(action_space.sample())

        # Check all 3 outputs of the Env step (next state, reward, terminal).
        self.assertEqual(ret1[0].shape, ())
        self.assertEqual(ret1[0].dtype, dtype("float32"))
        self.assertEqual(ret1[1].shape, ())
        self.assertEqual(ret1[1].dtype, dtype("float32"))
        self.assertEqual(ret1[2].shape, ())
        self.assertEqual(ret1[2].dtype, dtype("bool"))
        self.assertEqual(ret2[0].shape, ())
        self.assertEqual(ret2[0].dtype, dtype("float32"))
        self.assertEqual(ret2[1].shape, ())
        self.assertEqual(ret2[1].dtype, dtype("float32"))
        self.assertEqual(ret2[2].shape, ())
        self.assertEqual(ret2[2].dtype, dtype("bool"))

        # Start the session and run the op, then check its actual values.
        with tf.train.SingularMonitoredSession(hooks=[SpecifiableServerHook()]) as sess:
            out1 = sess.run(ret1)
            out2 = sess.run(ret2)

        # next state
        self.assertAlmostEqual(out1[0], 0.7713, places=4)
        self.assertAlmostEqual(out2[0], 0.7488, places=4)
        # reward
        self.assertAlmostEqual(out1[1], 0.0208, places=4)
        self.assertAlmostEqual(out2[1], 0.4985, places=4)
        # terminal
        self.assertTrue(out1[2] is np.bool_(False))
        self.assertTrue(out2[2] is np.bool_(False))
Beispiel #7
0
 def _graph_fn_sample_deterministic(self, distribution):
     if get_backend() == "tf":
         return tf.argmax(input=distribution.probs, axis=-1, output_type=util.dtype("int"))
     elif get_backend() == "pytorch":
         return torch.argmax(distribution.probs, dim=-1).int()
Beispiel #8
0
 def _graph_fn_get_distribution(self, parameters):
     if get_backend() == "tf":
         return tf.distributions.Categorical(probs=parameters, dtype=util.dtype("int"))
     elif get_backend() == "pytorch":
         return torch.distributions.Categorical(probs=parameters)
Beispiel #9
0
    def get_variable(self,
                     name,
                     is_input_feed=False,
                     add_batch_rank=None,
                     add_time_rank=None,
                     time_major=None,
                     is_python=False,
                     local=False,
                     **kwargs):
        add_batch_rank = self.has_batch_rank if add_batch_rank is None else add_batch_rank
        batch_rank = () if add_batch_rank is False else (
            None, ) if add_batch_rank is True else (add_batch_rank, )

        add_time_rank = self.has_time_rank if add_time_rank is None else add_time_rank
        time_rank = () if add_time_rank is False else (
            None, ) if add_time_rank is True else (add_time_rank, )

        time_major = self.time_major if time_major is None else time_major

        if time_major is False:
            shape = batch_rank + time_rank + self.shape
        else:
            shape = time_rank + batch_rank + self.shape

        if is_python is True or get_backend() == "python":
            if isinstance(add_batch_rank, int):
                if isinstance(add_time_rank, int):
                    if time_major:
                        var = [[0 for _ in range_(add_batch_rank)]
                               for _ in range_(add_time_rank)]
                    else:
                        var = [[0 for _ in range_(add_time_rank)]
                               for _ in range_(add_batch_rank)]
                else:
                    var = [0 for _ in range_(add_batch_rank)]
            elif isinstance(add_time_rank, int):
                var = [0 for _ in range_(add_time_rank)]
            else:
                var = []

            # Un-indent and just directly construct pytorch?
            if get_backend() == "pytorch" and is_input_feed:
                # Convert to PyTorch tensor because PyTorch cannot use
                return torch.zeros(shape)
            else:
                # TODO also convert?
                return var

        elif get_backend() == "tf":
            import tensorflow as tf
            # TODO: re-evaluate the cutting of a leading '/_?' (tf doesn't like it)
            name = re.sub(r'^/_?', "", name)
            if is_input_feed:
                return tf.placeholder(dtype=dtype(self.dtype),
                                      shape=shape,
                                      name=name)
            else:
                init_spec = kwargs.pop("initializer", None)
                # Bools should be initializable via 0 or not 0.
                if self.dtype == np.bool_ and isinstance(
                        init_spec, (int, float)):
                    init_spec = (init_spec != 0)

                if self.dtype == np.str_ and init_spec == 0:
                    initializer = None
                else:
                    initializer = Initializer.from_spec(
                        shape=shape, specification=init_spec).initializer

                return tf.get_variable(
                    name,
                    shape=shape,
                    dtype=dtype(self.dtype),
                    initializer=initializer,
                    collections=[
                        tf.GraphKeys.GLOBAL_VARIABLES
                        if local is False else tf.GraphKeys.LOCAL_VARIABLES
                    ],
                    **kwargs)
Beispiel #10
0
 def contains(self, sample):
     if self.shape == ():
         return isinstance(sample, (bool, np.bool_))
     else:
         return dtype(sample.dtype, "np") == np.bool_
Beispiel #11
0
def get_space_from_op(op):
    """
    Tries to re-create a Space object given some DataOp.
    This is useful for shape inference when passing a Socket's ops through a GraphFunction and
    auto-inferring the resulting shape/Space.

    Args:
        op (DataOp): The op to create a corresponding Space for.

    Returns:
        Space: The inferred Space object.
    """
    # a Dict
    if isinstance(op, dict):  # DataOpDict
        spec = {}
        add_batch_rank = False
        add_time_rank = False
        for key, value in op.items():
            spec[key] = get_space_from_op(value)
            if spec[key].has_batch_rank:
                add_batch_rank = True
            if spec[key].has_time_rank:
                add_time_rank = True
        return Dict(spec,
                    add_batch_rank=add_batch_rank,
                    add_time_rank=add_time_rank)
    # a Tuple
    elif isinstance(op, tuple):  # DataOpTuple
        spec = []
        add_batch_rank = False
        add_time_rank = False
        for i in op:
            space = get_space_from_op(i)
            if space == 0:
                return 0
            spec.append(space)
            if spec[-1].has_batch_rank:
                add_batch_rank = True
            if spec[-1].has_time_rank:
                add_time_rank = True
        return Tuple(spec,
                     add_batch_rank=add_batch_rank,
                     add_time_rank=add_time_rank)
    # primitive Space -> infer from op dtype and shape
    else:
        # Simple constant value DataOp (python type or an np.ndarray).
        assert not hasattr(
            op, "constant_value")  # we should be done with this by now
        #if isinstance(op, SingleDataOp) and op.constant_value is not None:
        #    value = op.constant_value
        #    if isinstance(value, np.ndarray):
        #        return BoxSpace.from_spec(spec=dtype(str(value.dtype), "np"), shape=value.shape)
        # Op itself is a single value, simple python type.
        if isinstance(op, (bool, int, float)):
            return BoxSpace.from_spec(spec=type(op), shape=())
        # A single numpy array.
        elif isinstance(op, np.ndarray):
            return BoxSpace.from_spec(spec=dtype(str(op.dtype), "np"),
                                      shape=op.shape)
        # No Space: e.g. the tf.no_op, a distribution (anything that's not a tensor).
        # PyTorch Tensors do not have get_shape so must check backend.
        elif hasattr(op, "dtype") is False or (get_backend() == "tf" and
                                               not hasattr(op, "get_shape")):
            return 0
        # Some tensor: can be converted into a BoxSpace.
        else:
            shape = get_shape(op, )
            # Unknown shape (e.g. a cond op).
            if shape is None:
                return 0
            add_batch_rank = False
            add_time_rank = False
            time_major = False
            new_shape = list(shape)

            # New way: Detect via op._batch_rank and op._time_rank properties where these ranks are.
            if hasattr(op, "_batch_rank") and isinstance(op._batch_rank, int):
                add_batch_rank = True
                new_shape[op._batch_rank] = -1

            # elif get_backend() == "pytorch":
            #     if isinstance(op, torch.Tensor):
            #         if op.dim() > 1 and shape[0] == 1:
            #             add_batch_rank = True
            #             new_shape[0] = 1
            if hasattr(op, "_time_rank") and isinstance(op._time_rank, int):
                add_time_rank = True
                if op._time_rank == 0:
                    time_major = True
                new_shape[op._time_rank] = -1
            shape = tuple(n for n in new_shape if n != -1)

            # Old way: Detect automatically whether the first rank(s) are batch and/or time rank.
            if add_batch_rank is False and add_time_rank is False and shape != (
            ) and shape[0] is None:
                if len(shape) > 1 and shape[1] is None:
                    #raise RLGraphError(
                    #    "ERROR: Cannot determine time-major flag if both batch- and time-ranks are in an op w/o saying "
                    #    "which rank goes to which position!"
                    #)
                    shape = shape[2:]
                    add_time_rank = True
                else:
                    shape = shape[1:]
                add_batch_rank = True

            base_dtype = op.dtype.base_dtype if hasattr(
                op.dtype, "base_dtype") else op.dtype
            # PyTorch does not have a bool type
            if get_backend() == "pytorch":
                if op.dtype is torch.uint8:
                    base_dtype = bool
            base_dtype_str = str(base_dtype)

            # FloatBox
            if "float" in base_dtype_str:
                return FloatBox(shape=shape,
                                add_batch_rank=add_batch_rank,
                                add_time_rank=add_time_rank,
                                time_major=time_major,
                                dtype=dtype(base_dtype, "np"))
            # IntBox
            elif "int" in base_dtype_str:
                return IntBox(shape=shape,
                              add_batch_rank=add_batch_rank,
                              add_time_rank=add_time_rank,
                              time_major=time_major,
                              dtype=dtype(base_dtype, "np"))
            # a BoolBox
            elif "bool" in base_dtype_str:
                return BoolBox(shape=shape,
                               add_batch_rank=add_batch_rank,
                               add_time_rank=add_time_rank,
                               time_major=time_major)
            # a TextBox
            elif "string" in base_dtype_str:
                return TextBox(shape=shape,
                               add_batch_rank=add_batch_rank,
                               add_time_rank=add_time_rank,
                               time_major=time_major)

    raise RLGraphError(
        "ERROR: Cannot derive Space from op '{}' (unknown type?)!".format(op))
        def call(*args):
            if isinstance(self.output_spaces, dict):
                assert method_name in self.output_spaces, "ERROR: Method '{}' not specified in output_spaces: {}!".\
                    format(method_name, self.output_spaces)
                specs = self.output_spaces[method_name]
            else:
                specs = self.output_spaces(method_name)

            if specs is None:
                raise RLGraphError(
                    "No Space information received for method '{}:{}'".format(self.class_.__name__, method_name)
                )

            dtypes = []
            shapes = []
            return_slots = []
            for i, space in enumerate(force_list(specs)):
                assert not isinstance(space, ContainerSpace)
                # Expecting an op (space 0).
                if space == 0:
                    dtypes.append(0)
                    shapes.append(0)
                    return_slots.append(i)
                # Expecting a tensor.
                elif space is not None:
                    dtypes.append(dtype(space.dtype))
                    shapes.append(space.shape)
                    return_slots.append(i)

            if get_backend() == "tf":
                # This function will send the method-call-comment via the out-pipe to the remote (server) Specifiable
                # object - all in-graph - and return the results to be used further by other graph ops.
                def py_call(*args_):
                    args_ = [arg.decode('UTF-8') if isinstance(arg, bytes) else arg for arg in args_]
                    try:
                        self.out_pipe.send(args_)
                        result_ = self.out_pipe.recv()

                        # If an error occurred, it'll be passed back through the pipe.
                        if isinstance(result_, Exception):
                            raise result_
                        elif result_ is not None:
                            return result_

                    except Exception as e:
                        if isinstance(e, IOError):
                            raise StopIteration()  # Clean exit.
                        else:
                            raise

                results = tf.py_func(py_call, (method_name,) + tuple(args), dtypes, name=method_name)

                # Force known shapes on the returned tensors.
                for i, (result, shape) in enumerate(zip(results, shapes)):
                    # Not an op (which have shape=0).
                    if shape != 0:
                        result.set_shape(shape)
            else:
                raise NotImplementedError

            return results[0] if len(dtypes) == 1 else tuple(results)
Beispiel #13
0
    def _graph_fn_decayed_value(self, time_step):
        """
        Args:
            time_step (DataOp): The int-type DataOp that holds the current global time_step.

        Returns:
            DataOp: The decay'd value depending on the current time step.
        """
        if get_backend() == "tf":
            smaller_than_start = time_step <= self.start_timestep

            shape = tf.shape(time_step)
            # time_step comes in as a time-sequence of time-steps.
            if shape.shape[0] > 0:
                return tf.where(
                    condition=smaller_than_start,
                    # We are still in pre-decay time.
                    x=tf.tile(tf.constant([self.from_]), multiples=shape),
                    # We are past pre-decay time.
                    y=tf.where(
                        condition=(time_step >=
                                   self.start_timestep + self.num_timesteps),
                        # We are in post-decay time.
                        x=tf.tile(tf.constant([self.to_]), multiples=shape),
                        # We are inside the decay time window.
                        y=self._graph_fn_decay(
                            tf.cast(x=time_step - self.start_timestep,
                                    dtype=util.dtype("float"))),
                        name="cond-past-end-time"),
                    name="cond-before-start-time")
            # Single 0D time step.
            else:
                return tf.cond(
                    pred=smaller_than_start,
                    # We are still in pre-decay time.
                    true_fn=lambda: self.from_,
                    # We are past pre-decay time.
                    false_fn=lambda: tf.cond(
                        pred=(time_step >= self.start_timestep + self.
                              num_timesteps),
                        # We are in post-decay time.
                        true_fn=lambda: self.to_,
                        # We are inside the decay time window.
                        false_fn=lambda: self._graph_fn_decay(
                            tf.cast(x=time_step - self.start_timestep,
                                    dtype=util.dtype("float"))),
                    ),
                )
        elif get_backend() == "pytorch":
            if time_step is None:
                time_step = torch.tensor([0])
            smaller_than_start = time_step <= self.start_timestep
            if time_step.dim() == 0:
                time_step = time_step.unsqueeze(-1)
            shape = time_step.shape
            # time_step comes in as a time-sequence of time-steps.
            # TODO tile shape is confusing -> num tiles should be shape[0] not shape?
            if shape[0] > 0:
                past_decay = torch.where(
                    (time_step >= self.start_timestep + self.num_timesteps),
                    # We are in post-decay time.
                    pytorch_tile(torch.tensor([self.to_]), shape),
                    # We are inside the decay time window.
                    torch.tensor(
                        self._graph_fn_decay(
                            torch.FloatTensor(
                                [time_step - self.start_timestep]))))
                return torch.where(
                    smaller_than_start,
                    # We are still in pre-decay time.
                    pytorch_tile(torch.tensor([self.from_]), shape),
                    # We are past pre-decay time.
                    past_decay)
            # Single 0D time step.
            else:
                if smaller_than_start:
                    return self.from_
                else:
                    if time_step >= self.start_timestep + self.num_timesteps:
                        return self.to_
                    else:
                        return self._graph_fn_decay(
                            torch.FloatTensor(
                                [time_step - self.start_timestep]))
Beispiel #14
0
 def _graph_fn_get_noise(self):
     if get_backend() == "tf":
         return tf.random_normal(shape=(1, ) + self.action_space.shape,
                                 mean=self.mean,
                                 stddev=self.stddev,
                                 dtype=dtype(self.action_space.dtype))
Beispiel #15
0
    def __init__(self, shape, specification=None, **kwargs):
        """
        Args:
            shape (tuple): The shape of the Variables to initialize.
            specification (any): A spec that determines the nature of this initializer.

        Raises:
            RLGraphError: If a fixed shape in `specification` does not match `shape`.
        """
        super(Initializer, self).__init__()

        # The shape of the variable to be initialized.
        self.shape = shape
        # The actual underlying initializer object.
        self.initializer = None

        # Truncated Normal.
        if specification == "truncated_normal":
            if get_backend() == "tf":
                # Use the first dimension (num_rows or batch rank) to figure out the stddev.
                stddev = 1 / math.sqrt(shape[0] if isinstance(
                    shape, (tuple, list)) and len(shape) > 0 else 1.0)
                self.initializer = tf.truncated_normal_initializer(
                    stddev=stddev)
            elif get_backend() == "pytorch":
                stddev = 1 / math.sqrt(shape[0] if isinstance(
                    shape, (tuple, list)) and len(shape) > 0 else 1.0)
                self.initializer = lambda t: torch.nn.init.normal_(tensor=t,
                                                                   std=stddev)

        # No spec -> Leave initializer as None for TF (will then use default;
        #  e.g. for tf weights: Xavier uniform). For PyTorch, still have to set Xavier.
        # TODO this is None or is False is very unclean because TF and PT have different defaults ->
        # change to clean default values for weights and biases.
        elif specification is None or specification is False:
            if get_backend() == "tf":
                pass
            elif get_backend() == "pytorch":
                self.initializer = torch.nn.init.xavier_uniform_

        # Fixed values spec -> Use them, just do sanity checking.
        else:
            # Constant value across the variable.
            if isinstance(specification, (float, int)):
                pass
            # A 1D initializer (e.g. for biases).
            elif isinstance(specification, list):
                array = np.asarray(specification, dtype=dtype("float32", "np"))
                if array.shape != self.shape:
                    raise RLGraphError(
                        "ERROR: Number/shape of given items ({}) not identical with shape ({})!"
                        .format(array.shape, self.shape))
            # A nD initializer (numpy-array).
            elif isinstance(specification, np.ndarray):
                if specification.shape != self.shape:
                    raise RLGraphError(
                        "ERROR: Shape of given items ({}) not identical with shape ({})!"
                        .format(specification.shape, self.shape))
            # Unknown type.
            else:
                raise RLGraphError(
                    "ERROR: Bad specification given ({}) for Initializer object!"
                    .format(specification))

            # Create the backend initializer object.
            if get_backend() == "tf":
                self.initializer = tf.constant_initializer(
                    value=specification, dtype=dtype("float32"))
            elif get_backend() == "pytorch":
                self.initializer = lambda t: torch.nn.init.constant_(
                    tensor=t, val=specification)